docs: add release notes for 17.1.1

Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
Update version to 17.1.1
2017-05-25 08:11:42 +01:00 · 2017-05-25 08:06:23 +01:00 · 2017-05-22 10:19:30 +01:00 · 2017-05-22 10:19:04 +01:00 · 2017-05-19 23:23:21 +01:00 · 2017-05-19 23:17:29 +01:00
53 changed files with 711 additions and 289 deletions
--- a/Android.common.mk
+++ b/Android.common.mk
@@ -116,7 +116,3 @@ endif

 # Quiet down the build system and remove any .h files from the sources
 LOCAL_SRC_FILES := $(patsubst %.h, , $(LOCAL_SRC_FILES))
-
-ifneq ($(LOCAL_IS_HOST_MODULE),true)
-LOCAL_SHARED_LIBRARIES += libz
-endif
--- a/2
+++ b/2
@@ -1 +1 @@
-17.1.0
+17.1.1
--- a/bin/get-extra-pick-list.sh
+++ b/bin/get-extra-pick-list.sh
@@ -30,7 +30,15 @@ do
 		if grep -q ^$candidate already_picked ; then
 			continue
 		fi
-		echo Commit $candidate references $sha
+		# Or if it isn't in the ignore list.
+		if [ -f bin/.cherry-ignore ] ; then
+			if grep -q ^$candidate bin/.cherry-ignore ; then
+				continue
+			fi
+		fi
+		printf "Commit \"%s\" references %s\n" \
+		       "`git log -n1 --pretty=oneline $candidate`" \
+		       "$sha"
 	done
 done

--- a/bin/get-fixes-pick-list.sh
+++ b/bin/get-fixes-pick-list.sh
@@ -24,35 +24,52 @@ git log --reverse --grep="cherry picked from commit" $latest_branchpoint..HEAD |
 git log --reverse --pretty=%H -i --grep="fixes:" $latest_branchpoint..origin/master |\
 while read sha
 do
-	# For each one try to extract the tag
-	fixes_count=`git show $sha | grep -i "fixes:" | wc -l`
-	if [ "x$fixes_count" != x1 ] ; then
-		echo WARNING: Commit $sha has more than one Fixes tag
+	# Check to see whether the patch is on the ignore list ...
+	if [ -f bin/.cherry-ignore ] ; then
+		if grep -q ^$sha bin/.cherry-ignore ; then
+			continue
+		fi
 	fi
-	fixes=`git show $sha | grep -i "fixes:" | head -n 1`
-	# The following sed/cut combination is borrowed from GregKH
-	id=`echo ${fixes} | sed -e 's/^[ \t]*//' | cut -f 2 -d ':' | sed -e 's/^[ \t]*//' | cut -f 1 -d ' '`

-	# Bail out if we cannot find suitable id.
-	# Any specific validation the $id is valid and not some junk, is
-	# implied with the follow up code
-	if [ "x$id" = x ] ; then
+	# Skip if it has been already cherry-picked.
+	if grep -q ^$sha already_picked ; then
 		continue
 	fi

-	# Check if the offending commit is in branch.
+	# For each one try to extract the tag
+	fixes_count=`git show $sha | grep -i "fixes:" | wc -l`
+	warn=`(test $fixes_count -gt 1 && echo $fixes_count) || echo 0`
+	while [ $fixes_count -gt 0 ] ; do
+		fixes=`git show $sha | grep -i "fixes:" | tail -n $fixes_count`
+		fixes_count=$(($fixes_count-1))
+		# The following sed/cut combination is borrowed from GregKH
+		id=`echo ${fixes} | sed -e 's/^[ \t]*//' | cut -f 2 -d ':' | sed -e 's/^[ \t]*//' | cut -f 1 -d ' '`

-	# Be that cherry-picked ...
-	# ... or landed before the branchpoint.
-	if grep -q ^$id already_picked ||
-	   grep -q ^$id already_landed ; then
-
-		# Finally nominate the fix if it hasn't landed yet.
-		if grep -q ^$sha already_picked ; then
+		# Bail out if we cannot find suitable id.
+		# Any specific validation the $id is valid and not some junk, is
+		# implied with the follow up code
+		if [ "x$id" = x ] ; then
 			continue
 		fi

-		echo Commit $sha fixes $id
+		# Check if the offending commit is in branch.
+
+		# Be that cherry-picked ...
+		# ... or landed before the branchpoint.
+		if grep -q ^$id already_picked ||
+		   grep -q ^$id already_landed ; then
+
+			printf "Commit \"%s\" fixes %s\n" \
+			       "`git log -n1 --pretty=oneline $sha`" \
+			       "$id"
+			warn=$(($warn-1))
+		fi
+
+	done
+
+	if [ $warn -gt 0 ] ; then
+		printf "WARNING: Commit \"%s\" has more than one Fixes tag\n" \
+		       "`git log -n1 --pretty=oneline $sha`"
 	fi

 done
--- a/configure.ac
+++ b/configure.ac
@@ -1079,14 +1079,9 @@ fi

 if test "x$LIBUNWIND" = "xyes"; then
    PKG_CHECK_MODULES(LIBUNWIND, libunwind)
-    if test "x$HAVE_LIBUNWIND" != "xyes"; then
-        AC_MSG_ERROR([libunwind requested but not installed.])
-    fi
    AC_DEFINE(HAVE_LIBUNWIND, 1, [Have libunwind support])
 fi

-AM_CONDITIONAL(HAVE_LIBUNWIND, [test "x$LIBUNWIND" = xyes])
-

 dnl Options for APIs
 AC_ARG_ENABLE([opengl],
@@ -2477,6 +2472,10 @@ if test -n "$with_gallium_drivers"; then
    done
 fi

+# XXX: Keep in sync with LLVM_REQUIRED_SWR
+AM_CONDITIONAL(SWR_INVALID_LLVM_VERSION, test "x$LLVM_VERSION" != x3.9.0 -a \
+                                              "x$LLVM_VERSION" != x3.9.1)
+
 if test "x$enable_llvm" = "xyes" -a "$with_gallium_drivers"; then
    llvm_require_version $LLVM_REQUIRED_GALLIUM "gallium"
    llvm_add_default_components "gallium"
--- a/docs/relnotes/17.1.0.html
+++ b/docs/relnotes/17.1.0.html
@@ -33,7 +33,8 @@ because compatibility contexts are not supported.

 <h2>SHA256 checksums</h2>
 <pre>
-TBD.
+c388069581a72853161657ac365f2c083afabd7cffd53f80513dacfa1cfa58a8  mesa-17.1.0.tar.gz
+cf234a6ed4764673886b6661553b54675776ef0898f774716173cec890ac3b17  mesa-17.1.0.tar.xz
 </pre>


--- a/docs/relnotes/17.1.1.html
+++ b/docs/relnotes/17.1.1.html
@@ -0,0 +1,187 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 17.1.1 Release Notes / March 25, 2017</h1>
+
+<p>
+Mesa 17.1.1 is a bug fix release which fixes bugs found since the 17.1.0 release.
+</p>
+<p>
+Mesa 17.1.1 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+TBD
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100854">Bug 100854</a> - YUV to RGB Color Space Conversion result is not precise</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100925">Bug 100925</a> - [HSW/BSW/BDW/SKL] Google Earth is not resolving all the details in the map correctly</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Alex Deucher (1):</p>
+<ul>
+  <li>radeonsi: add new vega10 pci ids</li>
+</ul>
+
+<p>Andres Gomez (2):</p>
+<ul>
+  <li>bin/get-fixes-pick-list.sh: don't warn if more than one, go over them</li>
+  <li>bin/get-fixes-pick-list.sh: bring back the warning</li>
+</ul>
+
+<p>Bruce Cherniak (1):</p>
+<ul>
+  <li>swr: move msaa resolve to generalized StoreTile</li>
+</ul>
+
+<p>Chad Versace (1):</p>
+<ul>
+  <li>egl: Partially revert 23c86c74, fix eglMakeCurrent</li>
+</ul>
+
+<p>Chih-Wei Huang (1):</p>
+<ul>
+  <li>Android: correct libz dependency</li>
+</ul>
+
+<p>Daniel Stone (1):</p>
+<ul>
+  <li>gbm/dri: Fix sign-extension in modifier query</li>
+</ul>
+
+<p>Emil Velikov (6):</p>
+<ul>
+  <li>docs: add sha256 checksums for 17.1.0</li>
+  <li>radeon: automake: remove unneeded elf Cflags/Libs</li>
+  <li>configure: remove unneeded bits around libunwind handling</li>
+  <li>egl: add g_egldispatchstubs.h to the release tarball</li>
+  <li>automake: add SWR LLVM gen_builder.hpp workaround</li>
+  <li>Update version to 17.1.1</li>
+</ul>
+
+<p>Eric Anholt (2):</p>
+<ul>
+  <li>renderonly: Initialize fields of struct winsys_handle.</li>
+  <li>vc4: Don't allocate new BOs to avoid synchronization when they're shared.</li>
+</ul>
+
+<p>Grazvydas Ignotas (2):</p>
+<ul>
+  <li>anv: fix possible stack corruption</li>
+  <li>anv: don't leak DRM devices</li>
+</ul>
+
+<p>Hans de Goede (1):</p>
+<ul>
+  <li>glxglvnddispatch: Add missing dispatch for GetDriverConfig</li>
+</ul>
+
+<p>Ilia Mirkin (1):</p>
+<ul>
+  <li>nvc0/ir: SHLADD's middle source must be an immediate</li>
+</ul>
+
+<p>Johnson Lin (1):</p>
+<ul>
+  <li>nir/lower_tex: Fix minor error in YUV color conversion matrix</li>
+</ul>
+
+<p>Juan A. Suarez Romero (2):</p>
+<ul>
+  <li>bin/get-{extra,fixes}-pick-list.sh: add support for ignore list</li>
+  <li>bin/get-{extra,fixes}-pick-list.sh: improve output</li>
+</ul>
+
+<p>Lucas Stach (2):</p>
+<ul>
+  <li>etnaviv: stop oversizing buffer resources</li>
+  <li>etnaviv: allow R/B swapped surfaces to be cleared</li>
+</ul>
+
+<p>Marek Olšák (2):</p>
+<ul>
+  <li>amd/addrlib: import Raven support</li>
+  <li>radeonsi/gfx9: add support for Raven</li>
+</ul>
+
+<p>Nanley Chery (2):</p>
+<ul>
+  <li>anv/formats: Update the three-channel BC1 mappings</li>
+  <li>i965/formats: Update the three-channel DXT1 mappings</li>
+</ul>
+
+<p>Nicolai Hähnle (5):</p>
+<ul>
+  <li>radeonsi: mark fast-cleared textures as compressed when dirtying</li>
+  <li>radeonsi: fix primitive ID in fragment shader when using tessellation</li>
+  <li>radeonsi: fix gl_PrimitiveID in tessellation with instanced draws on SI</li>
+  <li>radeonsi: fix gl_PrimitiveIDIn in geometry shader when using tessellation</li>
+  <li>st/mesa: remove an incorrect assertion</li>
+</ul>
+
+<p>Pohjolainen, Topi (1):</p>
+<ul>
+  <li>intel/isl/gen7: Use stencil vertical alignment of 8 instead of 4</li>
+</ul>
+
+<p>Rob Clark (2):</p>
+<ul>
+  <li>mesa/st: fix yuv EGLImage's</li>
+  <li>freedreno: fix crash when flush() but no rendering</li>
+</ul>
+
+<p>Rob Herring (1):</p>
+<ul>
+  <li>virgl: fix virgl_bo_transfer_{put, get} box struct copy</li>
+</ul>
+
+<p>Samuel Iglesias Gonsálvez (3):</p>
+<ul>
+  <li>i965/vec4/gs: restore the uniform values which was overwritten by failed vec4_gs_visitor execution</li>
+  <li>i965/vec4: fix swizzle and writemask when loading an uniform with constant offset</li>
+  <li>i965/vec4: load dvec3/4 uniforms first in the push constant buffer</li>
+</ul>
+
+<p>Tom Stellard (1):</p>
+<ul>
+  <li>gallivm: Make sure module has the correct data layout when pass manager runs</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/include/pci_ids/radeonsi_pci_ids.h
+++ b/include/pci_ids/radeonsi_pci_ids.h
@@ -219,6 +219,10 @@ CHIPSET(0x6860, VEGA10_, VEGA10)
 CHIPSET(0x6861, VEGA10_, VEGA10)
 CHIPSET(0x6862, VEGA10_, VEGA10)
 CHIPSET(0x6863, VEGA10_, VEGA10)
+CHIPSET(0x6864, VEGA10_, VEGA10)
 CHIPSET(0x6867, VEGA10_, VEGA10)
+CHIPSET(0x6868, VEGA10_, VEGA10)
 CHIPSET(0x687F, VEGA10_, VEGA10)
 CHIPSET(0x686C, VEGA10_, VEGA10)
+
+CHIPSET(0x15DD, RAVEN_, RAVEN)
--- a/src/amd/Makefile.common.am
+++ b/src/amd/Makefile.common.am
@@ -65,6 +65,8 @@ common_libamd_common_la_SOURCES += $(AMD_NIR_FILES)
 endif
 endif

+common_libamd_common_la_LIBADD = $(LIBELF_LIBS)
+
 common/sid_tables.h: $(srcdir)/common/sid_tables.py $(srcdir)/common/sid.h $(srcdir)/common/gfx9d.h
 	$(AM_V_at)$(MKDIR_P) $(@D)
 	$(AM_V_GEN) $(PYTHON2) $(srcdir)/common/sid_tables.py $(srcdir)/common/sid.h $(srcdir)/common/gfx9d.h > $@
--- a/src/amd/addrlib/gfx9/gfx9addrlib.cpp
+++ b/src/amd/addrlib/gfx9/gfx9addrlib.cpp
@@ -1193,6 +1193,20 @@ ChipFamily Gfx9Lib::HwlConvertChipFamily(
            m_settings.depthPipeXorDisable = 1;
            break;

+        case FAMILY_RV:
+            m_settings.isArcticIsland = 1;
+            m_settings.isRaven        = ASICREV_IS_RAVEN(uChipRevision);
+
+            if (m_settings.isRaven)
+            {
+                m_settings.isDcn1   = 1;
+            }
+
+            m_settings.metaBaseAlignFix = 1;
+
+            m_settings.depthPipeXorDisable = 1;
+            break;
+
        default:
            ADDR_ASSERT(!"This should be a Fusion");
            break;
@@ -2734,6 +2748,35 @@ BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
                break;
        }
    }
+    else if (m_settings.isDcn1)
+    {
+        switch (swizzleMode)
+        {
+            case ADDR_SW_4KB_D:
+            case ADDR_SW_64KB_D:
+            case ADDR_SW_VAR_D:
+            case ADDR_SW_64KB_D_T:
+            case ADDR_SW_4KB_D_X:
+            case ADDR_SW_64KB_D_X:
+            case ADDR_SW_VAR_D_X:
+                support = (pIn->bpp == 64);
+                break;
+
+            case ADDR_SW_LINEAR:
+            case ADDR_SW_4KB_S:
+            case ADDR_SW_64KB_S:
+            case ADDR_SW_VAR_S:
+            case ADDR_SW_64KB_S_T:
+            case ADDR_SW_4KB_S_X:
+            case ADDR_SW_64KB_S_X:
+            case ADDR_SW_VAR_S_X:
+                support = (pIn->bpp <= 64);
+                break;
+
+            default:
+                break;
+        }
+    }
    else
    {
        ADDR_NOT_IMPLEMENTED();
@@ -3195,6 +3238,20 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
                        // DCE12 does not support display surface to be _T swizzle mode
                        prtXor = FALSE;
                    }
+                    else if (m_settings.isDcn1)
+                    {
+                        // _R is not supported by Dcn1
+                        if (pIn->bpp == 64)
+                        {
+                            swType = ADDR_SW_D;
+                        }
+                        else
+                        {
+                            swType = ADDR_SW_S;
+                        }
+
+                        blockSet.micro = FALSE;
+                    }
                    else
                    {
                        ADDR_NOT_IMPLEMENTED();
--- a/src/amd/addrlib/gfx9/gfx9addrlib.h
+++ b/src/amd/addrlib/gfx9/gfx9addrlib.h
@@ -54,11 +54,13 @@ struct Gfx9ChipSettings
        // Asic/Generation name
        UINT_32 isArcticIsland      : 1;
        UINT_32 isVega10            : 1;
-        UINT_32 reserved0           : 30;
+        UINT_32 isRaven             : 1;
+        UINT_32 reserved0           : 29;

        // Display engine IP version name
        UINT_32 isDce12             : 1;
-        UINT_32 reserved1           : 31;
+        UINT_32 isDcn1              : 1;
+        UINT_32 reserved1           : 29;

        // Misc configuration bits
        UINT_32 metaBaseAlignFix    : 1;
@@ -201,7 +203,7 @@ protected:

        if (IsXor(swizzleMode))
        {
-            if (m_settings.isVega10)
+            if (m_settings.isVega10 || m_settings.isRaven)
            {
                baseAlign = GetBlockSize(swizzleMode);
            }
--- a/src/amd/common/amd_family.h
+++ b/src/amd/common/amd_family.h
@@ -93,6 +93,7 @@ enum radeon_family {
    CHIP_POLARIS11,
    CHIP_POLARIS12,
    CHIP_VEGA10,
+    CHIP_RAVEN,
    CHIP_LAST,
 };

--- a/src/amd/common/amdgpu_id.h
+++ b/src/amd/common/amdgpu_id.h
@@ -49,6 +49,7 @@ enum {
 	FAMILY_CZ,
 	FAMILY_PI,
 	FAMILY_AI,
+	FAMILY_RV,
 	FAMILY_LAST,
 };

@@ -185,4 +186,13 @@ enum {
 #define ASICREV_IS_VEGA10_P(eChipRev) \
   ((eChipRev) >= AI_VEGA10_P_A0 && (eChipRev) < AI_UNKNOWN)

+/* RV specific rev IDs */
+enum {
+   RAVEN_A0      = 0x01,
+   RAVEN_UNKNOWN = 0xFF
+};
+
+#define ASICREV_IS_RAVEN(eChipRev) \
+   ((eChipRev) >= RAVEN_A0 && (eChipRev) < RAVEN_UNKNOWN)
+
 #endif /* AMDGPU_ID_H */
--- a/src/compiler/nir/nir_lower_tex.c
+++ b/src/compiler/nir/nir_lower_tex.c
@@ -244,9 +244,9 @@ convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex,
   nir_ssa_def *yuv =
      nir_vec4(b,
               nir_fmul(b, nir_imm_float(b, 1.16438356f),
-                        nir_fadd(b, y, nir_imm_float(b, -0.0625f))),
-               nir_channel(b, nir_fadd(b, u, nir_imm_float(b, -0.5f)), 0),
-               nir_channel(b, nir_fadd(b, v, nir_imm_float(b, -0.5f)), 0),
+                        nir_fadd(b, y, nir_imm_float(b, -16.0f / 255.0f))),
+               nir_channel(b, nir_fadd(b, u, nir_imm_float(b, -128.0f / 255.0f)), 0),
+               nir_channel(b, nir_fadd(b, v, nir_imm_float(b, -128.0f / 255.0f)), 0),
               nir_imm_float(b, 0.0));

   nir_ssa_def *red = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[0]));
--- a/src/egl/Makefile.am
+++ b/src/egl/Makefile.am
@@ -138,7 +138,8 @@ libEGL_mesa_la_SOURCES = \
 	main/eglglvnd.c \
 	main/egldispatchstubs.h \
 	main/egldispatchstubs.c \
-	g_egldispatchstubs.c
+	g_egldispatchstubs.c \
+	g_egldispatchstubs.h
 libEGL_mesa_la_LIBADD = libEGL_common.la
 libEGL_mesa_la_LDFLAGS = \
 	-no-undefined \
--- a/src/egl/main/eglapi.c
+++ b/src/egl/main/eglapi.c
@@ -828,25 +828,6 @@ eglMakeCurrent(EGLDisplay dpy, EGLSurface draw, EGLSurface read,
         RETURN_EGL_ERROR(disp, EGL_BAD_MATCH, EGL_FALSE);
   }

-   _EGLThreadInfo *t =_eglGetCurrentThread();
-   _EGLContext *old_ctx = t->CurrentContext;
-   _EGLSurface *old_draw_surf = old_ctx ? old_ctx->DrawSurface : NULL;
-   _EGLSurface *old_read_surf = old_ctx ? old_ctx->ReadSurface : NULL;
-
-   /* From the EGL 1.5 spec, Section 3.7.3 Binding Context and Drawables:
-    *
-    *    If the previous context of the calling thread has unflushed commands,
-    *    and the previous surface is no longer valid, an
-    *    EGL_BAD_CURRENT_SURFACE error is generated.
-    *
-    * It's difficult to check if the context has unflushed commands, but it's
-    * easy to check if the surface is no longer valid.
-    */
-   if (old_draw_surf && old_draw_surf->Lost)
-      RETURN_EGL_ERROR(disp, EGL_BAD_CURRENT_SURFACE, EGL_FALSE);
-   if (old_read_surf && old_read_surf->Lost)
-      RETURN_EGL_ERROR(disp, EGL_BAD_CURRENT_SURFACE, EGL_FALSE);
-
   /*    If a native window underlying either draw or read is no longer valid,
    *    an EGL_BAD_NATIVE_WINDOW error is generated.
    */
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -125,19 +125,6 @@ create_pass_manager(struct gallivm_state *gallivm)
   LLVMAddTargetData(gallivm->target, gallivm->passmgr);
 #endif

-   /* Setting the module's DataLayout to an empty string will cause the
-    * ExecutionEngine to copy to the DataLayout string from its target
-    * machine to the module.  As of LLVM 3.8 the module and the execution
-    * engine are required to have the same DataLayout.
-    *
-    * TODO: This is just a temporary work-around.  The correct solution is
-    * for gallivm_init_state() to create a TargetMachine and pull the
-    * DataLayout from there.  Currently, the TargetMachine used by llvmpipe
-    * is being implicitly created by the EngineBuilder in
-    * lp_build_create_jit_compiler_for_module()
-    */
-
-#if HAVE_LLVM < 0x0308
   {
      char *td_str;
      // New ones from the Module.
@@ -145,9 +132,6 @@ create_pass_manager(struct gallivm_state *gallivm)
      LLVMSetDataLayout(gallivm->module, td_str);
      free(td_str);
   }
-#else
-   LLVMSetDataLayout(gallivm->module, "");
-#endif

   if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) {
      /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
@@ -628,6 +612,24 @@ gallivm_compile_module(struct gallivm_state *gallivm)
   }

   if (use_mcjit) {
+      /* Setting the module's DataLayout to an empty string will cause the
+       * ExecutionEngine to copy to the DataLayout string from its target
+       * machine to the module.  As of LLVM 3.8 the module and the execution
+       * engine are required to have the same DataLayout.
+       *
+       * We must make sure we do this after running the optimization passes,
+       * because those passes need a correct datalayout string.  For example,
+       * if those optimization passes see an empty datalayout, they will assume
+       * this is a little endian target and will do optimizations that break big
+       * endian machines.
+       *
+       * TODO: This is just a temporary work-around.  The correct solution is
+       * for gallivm_init_state() to create a TargetMachine and pull the
+       * DataLayout from there.  Currently, the TargetMachine used by llvmpipe
+       * is being implicitly created by the EngineBuilder in
+       * lp_build_create_jit_compiler_for_module()
+       */
+      LLVMSetDataLayout(gallivm->module, "");
      assert(!gallivm->engine);
      if (!init_gallivm_engine(gallivm)) {
         assert(0);
--- a/src/gallium/auxiliary/renderonly/renderonly.c
+++ b/src/gallium/auxiliary/renderonly/renderonly.c
@@ -117,6 +117,7 @@ renderonly_create_kms_dumb_buffer_for_resource(struct pipe_resource *rsc,
   }

   /* import dumb buffer */
+   memset(&handle, 0, sizeof(handle));
   handle.type = DRM_API_HANDLE_TYPE_FD;
   handle.handle = prime_fd;
   handle.stride = create_dumb.pitch;
--- a/src/gallium/drivers/etnaviv/etnaviv_resource.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_resource.c
@@ -180,7 +180,7 @@ etna_resource_alloc(struct pipe_screen *pscreen, unsigned layout,
                        &paddingY, &halign);
   assert(paddingX && paddingY);

-   if (templat->bind != PIPE_BUFFER) {
+   if (templat->target != PIPE_BUFFER) {
      unsigned min_paddingY = 4 * screen->specs.pixel_pipes;
      if (paddingY < min_paddingY)
         paddingY = min_paddingY;
--- a/src/gallium/drivers/etnaviv/etnaviv_translate.h
+++ b/src/gallium/drivers/etnaviv/etnaviv_translate.h
@@ -416,6 +416,8 @@ translate_clear_color(enum pipe_format format,
   switch (format) {
   case PIPE_FORMAT_B8G8R8A8_UNORM:
   case PIPE_FORMAT_B8G8R8X8_UNORM:
+   case PIPE_FORMAT_R8G8B8A8_UNORM:
+   case PIPE_FORMAT_R8G8B8X8_UNORM:
      clear_value = etna_cfloat_to_uintN(color->f[2], 8) |
                    (etna_cfloat_to_uintN(color->f[1], 8) << 8) |
                    (etna_cfloat_to_uintN(color->f[0], 8) << 16) |
--- a/src/gallium/drivers/freedreno/freedreno_context.c
+++ b/src/gallium/drivers/freedreno/freedreno_context.c
@@ -45,6 +45,12 @@ fd_context_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
 {
 	struct fd_context *ctx = fd_context(pctx);

+	if (!ctx->batch) {
+		if (fence)
+			*fence = NULL;
+		return;
+	}
+
 	if (flags & PIPE_FLUSH_FENCE_FD)
 		ctx->batch->needs_out_fence_fd = true;

--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -642,6 +642,8 @@ NVC0LegalizePostRA::replaceZero(Instruction *i)
   for (int s = 0; i->srcExists(s); ++s) {
      if (s == 2 && i->op == OP_SUCLAMP)
         continue;
+      if (s == 1 && i->op == OP_SHLADD)
+         continue;
      ImmediateValue *imm = i->getSrc(s)->asImm();
      if (imm) {
         if (i->op == OP_SELP && s == 2) {
--- a/src/gallium/drivers/radeon/Makefile.am
+++ b/src/gallium/drivers/radeon/Makefile.am
@@ -16,13 +16,11 @@ libradeon_la_SOURCES = \
 if HAVE_GALLIUM_LLVM

 AM_CFLAGS += \
-	$(LLVM_CFLAGS) \
-	$(LIBELF_CFLAGS)
+	$(LLVM_CFLAGS)

 libradeon_la_LIBADD = \
 	$(CLOCK_LIB) \
-	$(LLVM_LIBS) \
-	$(LIBELF_LIBS)
+	$(LLVM_LIBS)

 libradeon_la_LDFLAGS = \
 	$(LLVM_LDFLAGS)
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -833,6 +833,7 @@ static const char* r600_get_chip_name(struct r600_common_screen *rscreen)
 	case CHIP_POLARIS12: return "AMD POLARIS12";
 	case CHIP_STONEY: return "AMD STONEY";
 	case CHIP_VEGA10: return "AMD VEGA10";
+	case CHIP_RAVEN: return "AMD RAVEN";
 	default: return "AMD unknown";
 	}
 }
@@ -1006,6 +1007,7 @@ const char *r600_get_llvm_processor_name(enum radeon_family family)
 	case CHIP_POLARIS12: /* same as polaris11 */
 		return HAVE_LLVM >= 0x0309 ? "polaris11" : "carrizo";
 	case CHIP_VEGA10:
+	case CHIP_RAVEN:
 		return "gfx900";
 	default:
 		return "";
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -2720,8 +2720,15 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,

 			vi_dcc_clear_level(rctx, tex, 0, reset_value);

-			if (clear_words_needed)
-				tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
+			unsigned level_bit = 1 << fb->cbufs[i]->u.tex.level;
+			if (clear_words_needed) {
+				bool need_compressed_update = !tex->dirty_level_mask;
+
+				tex->dirty_level_mask |= level_bit;
+
+				if (need_compressed_update)
+					p_atomic_inc(&rctx->screen->compressed_colortex_counter);
+			}
 			tex->separate_dcc_dirty = true;
 		} else {
 			/* 128-bit formats are unusupported */
@@ -2744,7 +2751,12 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
 					   tex->cmask.offset, tex->cmask.size, 0,
 					   R600_COHERENCY_CB_META);

+			bool need_compressed_update = !tex->dirty_level_mask;
+
 			tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
+
+			if (need_compressed_update)
+				p_atomic_inc(&rctx->screen->compressed_colortex_counter);
 		}

 		/* We can change the micro tile mode before a full clear. */
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -759,6 +759,7 @@ static bool si_init_gs_info(struct si_screen *sscreen)
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
 	case CHIP_VEGA10:
+	case CHIP_RAVEN:
 		sscreen->gs_table_depth = 32;
 		return true;
 	default:
@@ -897,7 +898,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)

 	sscreen->has_msaa_sample_loc_bug = (sscreen->b.family >= CHIP_POLARIS10 &&
 					    sscreen->b.family <= CHIP_POLARIS12) ||
-					   sscreen->b.family == CHIP_VEGA10;
+					   sscreen->b.family == CHIP_VEGA10 ||
+					   sscreen->b.family == CHIP_RAVEN;

 	sscreen->b.has_cp_dma = true;
 	sscreen->b.has_streamout = true;
@@ -911,7 +913,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)

 		sscreen->b.rbplus_allowed =
 			!(sscreen->b.debug_flags & DBG_NO_RB_PLUS) &&
-			sscreen->b.family == CHIP_STONEY;
+			(sscreen->b.family == CHIP_STONEY ||
+			 sscreen->b.family == CHIP_RAVEN);
 	}

 	(void) mtx_init(&sscreen->shader_parts_mutex, mtx_plain);
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -4568,6 +4568,9 @@ static void si_init_config(struct si_context *sctx)
 		case CHIP_VEGA10:
 			pc_lines = 4096;
 			break;
+		case CHIP_RAVEN:
+			pc_lines = 1024;
+			break;
 		default:
 			assert(0);
 		}
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -174,6 +174,20 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
 	if (sctx->b.chip_class == SI) {
 		unsigned one_wave = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp);
 		*num_patches = MIN2(*num_patches, one_wave);
+
+		if (sctx->screen->b.info.max_se == 1) {
+			/* The VGT HS block increments the patch ID unconditionally
+			 * within a single threadgroup. This results in incorrect
+			 * patch IDs when instanced draws are used.
+			 *
+			 * The intended solution is to restrict threadgroups to
+			 * a single instance by setting SWITCH_ON_EOI, which
+			 * should cause IA to split instances up. However, this
+			 * doesn't work correctly on SI when there is no other
+			 * SE to switch to.
+			 */
+			*num_patches = 1;
+		}
 	}

 	sctx->last_num_patches = *num_patches;
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1813,6 +1813,19 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
 	r600_update_vs_writes_viewport_index(&sctx->b, si_get_vs_info(sctx));
 }

+static void si_update_tcs_tes_uses_prim_id(struct si_context *sctx)
+{
+	sctx->ia_multi_vgt_param_key.u.tcs_tes_uses_prim_id =
+		(sctx->tes_shader.cso &&
+		 sctx->tes_shader.cso->info.uses_primid) ||
+		(sctx->tcs_shader.cso &&
+		 sctx->tcs_shader.cso->info.uses_primid) ||
+		(sctx->gs_shader.cso &&
+		 sctx->gs_shader.cso->info.uses_primid) ||
+		(sctx->ps_shader.cso && !sctx->gs_shader.cso &&
+		 sctx->ps_shader.cso->info.uses_primid);
+}
+
 static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
@@ -1829,20 +1842,14 @@ static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
 	si_mark_atom_dirty(sctx, &sctx->clip_regs);
 	sctx->last_rast_prim = -1; /* reset this so that it gets updated */

-	if (enable_changed)
+	if (enable_changed) {
 		si_shader_change_notify(sctx);
+		if (sctx->ia_multi_vgt_param_key.u.uses_tess)
+			si_update_tcs_tes_uses_prim_id(sctx);
+	}
 	r600_update_vs_writes_viewport_index(&sctx->b, si_get_vs_info(sctx));
 }

-static void si_update_tcs_tes_uses_prim_id(struct si_context *sctx)
-{
-	sctx->ia_multi_vgt_param_key.u.tcs_tes_uses_prim_id =
-		(sctx->tes_shader.cso &&
-		 sctx->tes_shader.cso->info.uses_primid) ||
-		(sctx->tcs_shader.cso &&
-		 sctx->tcs_shader.cso->info.uses_primid);
-}
-
 static void si_bind_tcs_shader(struct pipe_context *ctx, void *state)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
@@ -1897,6 +1904,8 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
 	sctx->ps_shader.cso = sel;
 	sctx->ps_shader.current = sel ? sel->first_variant : NULL;
 	sctx->do_update_shaders = true;
+	if (sel && sctx->ia_multi_vgt_param_key.u.uses_tess)
+		si_update_tcs_tes_uses_prim_id(sctx);
 	si_mark_atom_dirty(sctx, &sctx->cb_render_state);
 }

--- a/src/gallium/drivers/swr/Makefile.am
+++ b/src/gallium/drivers/swr/Makefile.am
@@ -56,6 +56,7 @@ BUILT_SOURCES = \
 	rasterizer/codegen/gen_knobs.cpp \
 	rasterizer/codegen/gen_knobs.h \
 	rasterizer/jitter/gen_state_llvm.h \
+	rasterizer/jitter/gen_builder.hpp \
 	rasterizer/jitter/gen_builder_x86.hpp \
 	rasterizer/archrast/gen_ar_event.hpp \
 	rasterizer/archrast/gen_ar_event.cpp \
@@ -168,20 +169,6 @@ COMMON_LDFLAGS = \
 	$(LLVM_LDFLAGS)


-# XXX: As we cannot use BUILT_SOURCES (the files will end up in the dist
-# tarball) just annotate the dependency directly.
-# As the single direct user of gen_builder.hpp is a header (builder.h) trace all
-# the translusive users (one that use the latter header).
-rasterizer/jitter/blend_jit.cpp: rasterizer/jitter/gen_builder.hpp
-rasterizer/jitter/builder.cpp: rasterizer/jitter/gen_builder.hpp
-rasterizer/jitter/builder_misc.cpp: rasterizer/jitter/gen_builder.hpp
-rasterizer/jitter/fetch_jit.cpp: rasterizer/jitter/gen_builder.hpp
-rasterizer/jitter/streamout_jit.cpp: rasterizer/jitter/gen_builder.hpp
-swr_shader.cpp: rasterizer/jitter/gen_builder.hpp
-
-CLEANFILES = \
-	rasterizer/jitter/gen_builder.hpp
-
 lib_LTLIBRARIES = libswrAVX.la libswrAVX2.la

 libswrAVX_la_CXXFLAGS = \
@@ -192,14 +179,6 @@ libswrAVX_la_CXXFLAGS = \
 libswrAVX_la_SOURCES = \
 	$(COMMON_SOURCES)

-# XXX: Don't ship these generated sources for now, since they are specific
-# to the LLVM version they are generated from. Thus a release tarball
-# containing the said files, generated against eg. LLVM 3.8 will fail to build
-# on systems with other versions of LLVM eg. 3.7 or 3.6.
-# Move these back to BUILT_SOURCES once that is resolved.
-nodist_libswrAVX_la_SOURCES = \
-	rasterizer/jitter/gen_builder.hpp
-
 libswrAVX_la_LIBADD = \
 	$(COMMON_LIBADD)

@@ -214,14 +193,6 @@ libswrAVX2_la_CXXFLAGS = \
 libswrAVX2_la_SOURCES = \
 	$(COMMON_SOURCES)

-# XXX: Don't ship these generated sources for now, since they are specific
-# to the LLVM version they are generated from. Thus a release tarball
-# containing the said files, generated against eg. LLVM 3.8 will fail to build
-# on systems with other versions of LLVM eg. 3.7 or 3.6.
-# Move these back to BUILT_SOURCES once that is resolved.
-nodist_libswrAVX2_la_SOURCES = \
-	rasterizer/jitter/gen_builder.hpp
-
 libswrAVX2_la_LIBADD = \
 	$(COMMON_LIBADD)

@@ -230,6 +201,16 @@ libswrAVX2_la_LDFLAGS = \

 include $(top_srcdir)/install-gallium-links.mk

+# Generated gen_builder.hpp is not backwards compatible. So ship only one
+# created with the oldest supported version of LLVM.
+dist-hook:
+if SWR_INVALID_LLVM_VERSION
+	@echo "*******************************************************"
+	@echo "LLVM 3.9.0 or LLVM 3.9.1 required to create the tarball"
+	@echo "*******************************************************"
+	@test
+endif
+
 EXTRA_DIST = \
 	SConscript \
 	rasterizer/archrast/events.proto \
--- a/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h
+++ b/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h
@@ -1133,6 +1133,64 @@ struct StoreRasterTile
            }
        }
    }
+
+    //////////////////////////////////////////////////////////////////////////
+    /// @brief Resolves an 8x8 raster tile to the resolve destination surface.
+    /// @param pSrc - Pointer to raster tile.
+    /// @param pDstSurface - Destination surface state
+    /// @param x, y - Coordinates to raster tile.
+    /// @param sampleOffset - Offset between adjacent multisamples
+    INLINE static void Resolve(
+        uint8_t *pSrc,
+        SWR_SURFACE_STATE* pDstSurface,
+        uint32_t x, uint32_t y, uint32_t sampleOffset, uint32_t renderTargetArrayIndex) // (x, y) pixel coordinate to start of raster tile.
+    {
+        uint32_t lodWidth = std::max(pDstSurface->width >> pDstSurface->lod, 1U);
+        uint32_t lodHeight = std::max(pDstSurface->height >> pDstSurface->lod, 1U);
+
+        float oneOverNumSamples = 1.0f / pDstSurface->numSamples;
+
+        // For each raster tile pixel (rx, ry)
+        for (uint32_t ry = 0; ry < KNOB_TILE_Y_DIM; ++ry)
+        {
+            for (uint32_t rx = 0; rx < KNOB_TILE_X_DIM; ++rx)
+            {
+                // Perform bounds checking.
+                if (((x + rx) < lodWidth) &&
+                        ((y + ry) < lodHeight))
+                {
+                    // Sum across samples
+                    float resolveColor[4] = {0};
+                    for (uint32_t sampleNum = 0; sampleNum < pDstSurface->numSamples; sampleNum++)
+                    {
+                        float sampleColor[4] = {0};
+                        uint8_t *pSampleSrc = pSrc + sampleOffset * sampleNum;
+                        GetSwizzledSrcColor(pSampleSrc, rx, ry, sampleColor);
+                        resolveColor[0] += sampleColor[0];
+                        resolveColor[1] += sampleColor[1];
+                        resolveColor[2] += sampleColor[2];
+                        resolveColor[3] += sampleColor[3];
+                    }
+
+                    // Divide by numSamples to average
+                    resolveColor[0] *= oneOverNumSamples;
+                    resolveColor[1] *= oneOverNumSamples;
+                    resolveColor[2] *= oneOverNumSamples;
+                    resolveColor[3] *= oneOverNumSamples;
+
+                    // Use the resolve surface state
+                    SWR_SURFACE_STATE* pResolveSurface = (SWR_SURFACE_STATE*)pDstSurface->pAuxBaseAddress;
+                    uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>((x + rx), (y + ry),
+                        pResolveSurface->arrayIndex + renderTargetArrayIndex, pResolveSurface->arrayIndex + renderTargetArrayIndex,
+                        0, pResolveSurface->lod, pResolveSurface);
+                    {
+                        ConvertPixelFromFloat<DstFormat>(pDst, resolveColor);
+                    }
+                }
+            }
+        }
+    }
+
 };

 template<typename TTraits, SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>
@@ -2316,6 +2374,9 @@ struct StoreMacroTile
            pfnStore[sampleNum] = (bForceGeneric || KNOB_USE_GENERIC_STORETILE) ? StoreRasterTile<TTraits, SrcFormat, DstFormat>::Store : OptStoreRasterTile<TTraits, SrcFormat, DstFormat>::Store;
        }

+        // Save original for pSrcHotTile resolve.
+        uint8_t *pResolveSrcHotTile = pSrcHotTile;
+
        // Store each raster tile from the hot tile to the destination surface.
        for(uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
        {
@@ -2328,6 +2389,20 @@ struct StoreMacroTile
                }
            }
        }
+
+        if (pDstSurface->pAuxBaseAddress)
+        {
+            uint32_t sampleOffset = KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<SrcFormat>::bpp / 8);
+            // Store each raster tile from the hot tile to the destination surface.
+            for(uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
+            {
+                for(uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
+                {
+                    StoreRasterTile<TTraits, SrcFormat, DstFormat>::Resolve(pResolveSrcHotTile, pDstSurface, (x + col), (y + row), sampleOffset, renderTargetArrayIndex);
+                    pResolveSrcHotTile += sampleOffset * pDstSurface->numSamples;
+                }
+            }
+        }
    }
 };

--- a/src/gallium/drivers/swr/swr_context.cpp
+++ b/src/gallium/drivers/swr/swr_context.cpp
@@ -267,65 +267,6 @@ swr_resource_copy(struct pipe_context *pipe,
 }


-/* XXX: This resolve is incomplete and suboptimal. It will be removed once the
- * pipelined resolve blit works. */
-void
-swr_do_msaa_resolve(struct pipe_resource *src_resource,
-                    struct pipe_resource *dst_resource)
-{
-   /* This is a pretty dumb inline resolve.  It only supports 8-bit formats
-    * (ex RGBA8/BGRA8) - which are most common display formats anyway.
-    */
-
-   /* quick check for 8-bit and number of components */
-   uint8_t bits_per_component =
-      util_format_get_component_bits(src_resource->format,
-            UTIL_FORMAT_COLORSPACE_RGB, 0);
-
-   /* Unsupported resolve format */
-   assert(src_resource->format == dst_resource->format);
-   assert(bits_per_component == 8);
-   if ((src_resource->format != dst_resource->format) ||
-       (bits_per_component != 8)) {
-      return;
-   }
-
-   uint8_t src_num_comps = util_format_get_nr_components(src_resource->format);
-
-   SWR_SURFACE_STATE *src_surface = &swr_resource(src_resource)->swr;
-   SWR_SURFACE_STATE *dst_surface = &swr_resource(dst_resource)->swr;
-
-   uint32_t *src, *dst, offset;
-   uint32_t num_samples = src_surface->numSamples;
-   float recip_num_samples = 1.0f / num_samples;
-   for (uint32_t y = 0; y < src_surface->height; y++) {
-      for (uint32_t x = 0; x < src_surface->width; x++) {
-         float r = 0.0f;
-         float g = 0.0f;
-         float b = 0.0f;
-         float a = 0.0f;
-         for (uint32_t sampleNum = 0;  sampleNum < num_samples; sampleNum++) {
-            offset = ComputeSurfaceOffset<false>(x, y, 0, 0, sampleNum, 0, src_surface);
-            src = (uint32_t *) src_surface->pBaseAddress + offset/src_num_comps;
-            const uint32_t sample = *src;
-            r += (float)((sample >> 24) & 0xff) / 255.0f * recip_num_samples;
-            g += (float)((sample >> 16) & 0xff) / 255.0f * recip_num_samples;
-            b += (float)((sample >>  8) & 0xff) / 255.0f * recip_num_samples;
-            a += (float)((sample      ) & 0xff) / 255.0f * recip_num_samples;
-         }
-         uint32_t result = 0;
-         result  = ((uint8_t)(r * 255.0f) & 0xff) << 24;
-         result |= ((uint8_t)(g * 255.0f) & 0xff) << 16;
-         result |= ((uint8_t)(b * 255.0f) & 0xff) <<  8;
-         result |= ((uint8_t)(a * 255.0f) & 0xff);
-         offset = ComputeSurfaceOffset<false>(x, y, 0, 0, 0, 0, src_surface);
-         dst = (uint32_t *) dst_surface->pBaseAddress + offset/src_num_comps;
-         *dst = result;
-      }
-   }
-}
-
-
 static void
 swr_blit(struct pipe_context *pipe, const struct pipe_blit_info *blit_info)
 {
@@ -342,28 +283,14 @@ swr_blit(struct pipe_context *pipe, const struct pipe_blit_info *blit_info)
      debug_printf("swr_blit: color resolve : %d -> %d\n",
            info.src.resource->nr_samples, info.dst.resource->nr_samples);

-      /* Because the resolve is being done inline (not pipelined),
-       * resources need to be stored out of hottiles and the pipeline empty.
-       *
-       * Resources are marked unused following fence finish because all
-       * pipeline operations are complete.  Validation of the blit will mark
-       * them are read/write again.
-       */
+      /* Resolve is done as part of the surface store. */
      swr_store_dirty_resource(pipe, info.src.resource, SWR_TILE_RESOLVED);
-      swr_store_dirty_resource(pipe, info.dst.resource, SWR_TILE_RESOLVED);
-      swr_fence_finish(pipe->screen, NULL, swr_screen(pipe->screen)->flush_fence, 0);
-      swr_resource_unused(info.src.resource);
-      swr_resource_unused(info.dst.resource);

      struct pipe_resource *src_resource = info.src.resource;
      struct pipe_resource *resolve_target =
         swr_resource(src_resource)->resolve_target;

-      /* Inline resolve samples into resolve target resource, then continue
-       * the blit. */
-      swr_do_msaa_resolve(src_resource, resolve_target);
-
-      /* The resolve target becomes the new source for the blit.  */
+      /* The resolve target becomes the new source for the blit. */
      info.src.resource = resolve_target;
   }

--- a/src/gallium/drivers/swr/swr_screen.cpp
+++ b/src/gallium/drivers/swr/swr_screen.cpp
@@ -891,6 +891,10 @@ swr_create_resolve_resource(struct pipe_screen *_screen,

      /* Attach it to the multisample resource */
      msaa_res->resolve_target = alt;
+
+      /* Hang resolve surface state off the multisample surface state to so
+       * StoreTiles knows where to resolve the surface. */
+      msaa_res->swr.pAuxBaseAddress =  (uint8_t *)&swr_resource(alt)->swr;
   }

   return true; /* success */
@@ -1009,14 +1013,10 @@ swr_flush_frontbuffer(struct pipe_screen *p_screen,
      SwrEndFrame(swr_context(pipe)->swrContext);
   }

-   /* Multisample surfaces need to be resolved before present */
+   /* Multisample resolved into resolve_target at flush with store_resource */
   if (pipe && spr->swr.numSamples > 1) {
      struct pipe_resource *resolve_target = spr->resolve_target;

-      /* Do an inline surface resolve into the resolve target resource
-       * XXX: This works, just not optimal. Work on using a pipelined blit. */
-      swr_do_msaa_resolve(resource, resolve_target);
-
      /* Once resolved, copy into display target */
      SWR_SURFACE_STATE *resolve = &swr_resource(resolve_target)->swr;

--- a/src/gallium/drivers/vc4/vc4_resource.c
+++ b/src/gallium/drivers/vc4/vc4_resource.c
@@ -165,7 +165,8 @@ vc4_resource_transfer_map(struct pipe_context *pctx,
            prsc->width0 == box->width &&
            prsc->height0 == box->height &&
            prsc->depth0 == box->depth &&
-            prsc->array_size == 1) {
+            prsc->array_size == 1 &&
+            rsc->bo->private) {
                usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
        }

--- a/src/gallium/targets/dri/Android.mk
+++ b/src/gallium/targets/dri/Android.mk
@@ -35,7 +35,8 @@ LOCAL_CFLAGS :=
 LOCAL_SHARED_LIBRARIES := \
 	libdl \
 	libglapi \
-	libexpat
+	libexpat \
+	libz

 ifneq ($(filter freedreno,$(MESA_GPU_DRIVERS)),)
 LOCAL_CFLAGS += -DGALLIUM_FREEDRENO
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -315,6 +315,10 @@ static bool do_winsys_init(struct amdgpu_winsys *ws, int fd)
      ws->family = FAMILY_AI;
      ws->rev_id = AI_VEGA10_P_A0;
      break;
+   case CHIP_RAVEN:
+      ws->family = FAMILY_RV;
+      ws->rev_id = RAVEN_A0;
+      break;
   default:
      fprintf(stderr, "amdgpu: Unknown family.\n");
      goto fail;
--- a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
+++ b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
@@ -258,7 +258,12 @@ virgl_bo_transfer_put(struct virgl_winsys *vws,

   memset(&tohostcmd, 0, sizeof(tohostcmd));
   tohostcmd.bo_handle = res->bo_handle;
-   tohostcmd.box = *(struct drm_virtgpu_3d_box *)box;
+   tohostcmd.box.x = box->x;
+   tohostcmd.box.y = box->y;
+   tohostcmd.box.z = box->z;
+   tohostcmd.box.w = box->width;
+   tohostcmd.box.h = box->height;
+   tohostcmd.box.d = box->depth;
   tohostcmd.offset = buf_offset;
   tohostcmd.level = level;
  // tohostcmd.stride = stride;
@@ -282,7 +287,12 @@ virgl_bo_transfer_get(struct virgl_winsys *vws,
   fromhostcmd.offset = buf_offset;
  // fromhostcmd.stride = stride;
  // fromhostcmd.layer_stride = layer_stride;
-   fromhostcmd.box = *(struct drm_virtgpu_3d_box *)box;
+   fromhostcmd.box.x = box->x;
+   fromhostcmd.box.y = box->y;
+   fromhostcmd.box.z = box->z;
+   fromhostcmd.box.w = box->width;
+   fromhostcmd.box.h = box->height;
+   fromhostcmd.box.d = box->depth;
   return drmIoctl(vdws->fd, DRM_IOCTL_VIRTGPU_TRANSFER_FROM_HOST, &fromhostcmd);
 }

--- a/src/gbm/backends/dri/gbm_dri.c
+++ b/src/gbm/backends/dri/gbm_dri.c
@@ -772,7 +772,7 @@ gbm_dri_bo_get_modifier(struct gbm_bo *_bo)
                               &mod))
      return DRM_FORMAT_MOD_INVALID;

-   ret |= mod;
+   ret |= (uint64_t)(mod & 0xffffffff);

   return ret;
 }
--- a/src/glx/g_glxglvnddispatchfuncs.c
+++ b/src/glx/g_glxglvnddispatchfuncs.c
@@ -4,6 +4,7 @@
 */
 #include <stdlib.h>

+#include "glxclient.h"
 #include "glxglvnd.h"
 #include "glxglvnddispatchfuncs.h"
 #include "g_glxglvnddispatchindices.h"
@@ -50,6 +51,7 @@ const char * const __glXDispatchTableStrings[DI_LAST_INDEX] = {
    __ATTRIB(GetCurrentDisplayEXT),
    // glXGetCurrentDrawable implemented by libglvnd
    // glXGetCurrentReadDrawable implemented by libglvnd
+    __ATTRIB(GetDriverConfig),
    // glXGetFBConfigAttrib implemented by libglvnd
    __ATTRIB(GetFBConfigAttribSGIX),
    __ATTRIB(GetFBConfigFromVisualSGIX),
@@ -334,6 +336,17 @@ static Display *dispatch_GetCurrentDisplayEXT(void)



+static const char *dispatch_GetDriverConfig(const char *driverName)
+{
+    /*
+     * The options are constant for a given driverName, so we do not need
+     * a context (and apps expect to be able to call this without one).
+     */
+    return glXGetDriverConfig(driverName);
+}
+
+
+
 static int dispatch_GetFBConfigAttribSGIX(Display *dpy, GLXFBConfigSGIX config,
                                          int attribute, int *value_return)
 {
@@ -939,6 +952,7 @@ const void * const __glXDispatchFunctions[DI_LAST_INDEX + 1] = {
    __ATTRIB(DestroyGLXPbufferSGIX),
    __ATTRIB(GetContextIDEXT),
    __ATTRIB(GetCurrentDisplayEXT),
+    __ATTRIB(GetDriverConfig),
    __ATTRIB(GetFBConfigAttribSGIX),
    __ATTRIB(GetFBConfigFromVisualSGIX),
    __ATTRIB(GetMscRateOML),
--- a/src/glx/g_glxglvnddispatchindices.h
+++ b/src/glx/g_glxglvnddispatchindices.h
@@ -39,6 +39,7 @@ typedef enum __GLXdispatchIndex {
    DI_GetCurrentDisplayEXT,
    // GetCurrentDrawable implemented by libglvnd
    // GetCurrentReadDrawable implemented by libglvnd
+    DI_GetDriverConfig,
    // GetFBConfigAttrib implemented by libglvnd
    DI_GetFBConfigAttribSGIX,
    DI_GetFBConfigFromVisualSGIX,
--- a/src/intel/Android.vulkan.mk
+++ b/src/intel/Android.vulkan.mk
@@ -231,7 +231,7 @@ LOCAL_WHOLE_STATIC_LIBRARIES := \
 	libmesa_intel_compiler \
 	libmesa_anv_entrypoints

-LOCAL_SHARED_LIBRARIES := libdrm
+LOCAL_SHARED_LIBRARIES := libdrm libz

 include $(MESA_COMMON_MK)
 include $(BUILD_SHARED_LIBRARY)
--- a/src/intel/compiler/brw_vec4.cpp
+++ b/src/intel/compiler/brw_vec4.cpp
@@ -583,16 +583,46 @@ vec4_visitor::split_uniform_registers()
   }
 }

+/* This function returns the register number where we placed the uniform */
+static int
+set_push_constant_loc(const int nr_uniforms, int *new_uniform_count,
+                      const int src, const int size, const int channel_size,
+                      int *new_loc, int *new_chan,
+                      int *new_chans_used)
+{
+   int dst;
+   /* Find the lowest place we can slot this uniform in. */
+   for (dst = 0; dst < nr_uniforms; dst++) {
+      if (ALIGN(new_chans_used[dst], channel_size) + size <= 4)
+         break;
+   }
+
+   assert(dst < nr_uniforms);
+
+   new_loc[src] = dst;
+   new_chan[src] = ALIGN(new_chans_used[dst], channel_size);
+   new_chans_used[dst] = ALIGN(new_chans_used[dst], channel_size) + size;
+
+   *new_uniform_count = MAX2(*new_uniform_count, dst + 1);
+   return dst;
+}
+
 void
 vec4_visitor::pack_uniform_registers()
 {
   uint8_t chans_used[this->uniforms];
   int new_loc[this->uniforms];
   int new_chan[this->uniforms];
+   bool is_aligned_to_dvec4[this->uniforms];
+   int new_chans_used[this->uniforms];
+   int channel_sizes[this->uniforms];

   memset(chans_used, 0, sizeof(chans_used));
   memset(new_loc, 0, sizeof(new_loc));
   memset(new_chan, 0, sizeof(new_chan));
+   memset(new_chans_used, 0, sizeof(new_chans_used));
+   memset(is_aligned_to_dvec4, 0, sizeof(is_aligned_to_dvec4));
+   memset(channel_sizes, 0, sizeof(channel_sizes));

   /* Find which uniform vectors are actually used by the program.  We
    * expect unused vector elements when we've moved array access out
@@ -622,7 +652,7 @@ vec4_visitor::pack_uniform_registers()
            continue;

         assert(type_sz(inst->src[i].type) % 4 == 0);
-         unsigned channel_size = type_sz(inst->src[i].type) / 4;
+         int channel_size = type_sz(inst->src[i].type) / 4;

         int reg = inst->src[i].nr;
         for (int c = 0; c < 4; c++) {
@@ -631,10 +661,15 @@ vec4_visitor::pack_uniform_registers()

            unsigned channel = BRW_GET_SWZ(inst->src[i].swizzle, c) + 1;
            unsigned used = MAX2(chans_used[reg], channel * channel_size);
-            if (used <= 4)
+            if (used <= 4) {
               chans_used[reg] = used;
-            else
+               channel_sizes[reg] = MAX2(channel_sizes[reg], channel_size);
+            } else {
+               is_aligned_to_dvec4[reg] = true;
+               is_aligned_to_dvec4[reg + 1] = true;
               chans_used[reg + 1] = used - 4;
+               channel_sizes[reg + 1] = MAX2(channel_sizes[reg + 1], channel_size);
+            }
         }
      }

@@ -659,42 +694,60 @@ vec4_visitor::pack_uniform_registers()

   int new_uniform_count = 0;

+   /* As the uniforms are going to be reordered, take the data from a temporary
+    * copy of the original param[].
+    */
+   gl_constant_value **param = ralloc_array(NULL, gl_constant_value*,
+                                            stage_prog_data->nr_params);
+   memcpy(param, stage_prog_data->param,
+          sizeof(gl_constant_value*) * stage_prog_data->nr_params);
+
   /* Now, figure out a packing of the live uniform vectors into our
-    * push constants.
+    * push constants. Start with dvec{3,4} because they are aligned to
+    * dvec4 size (2 vec4).
    */
   for (int src = 0; src < uniforms; src++) {
      int size = chans_used[src];

-      if (size == 0)
+      if (size == 0 || !is_aligned_to_dvec4[src])
         continue;

-      int dst;
-      /* Find the lowest place we can slot this uniform in. */
-      for (dst = 0; dst < src; dst++) {
-         if (chans_used[dst] + size <= 4)
-            break;
+      /* dvec3 are aligned to dvec4 size, apply the alignment of the size
+       * to 4 to avoid moving last component of a dvec3 to the available
+       * location at the end of a previous dvec3. These available locations
+       * could be filled by smaller variables in next loop.
+       */
+      size = ALIGN(size, 4);
+      int dst = set_push_constant_loc(uniforms, &new_uniform_count,
+                                      src, size, channel_sizes[src],
+                                      new_loc, new_chan,
+                                      new_chans_used);
+      /* Move the references to the data */
+      for (int j = 0; j < size; j++) {
+         stage_prog_data->param[dst * 4 + new_chan[src] + j] =
+            param[src * 4 + j];
      }
-
-      if (src == dst) {
-         new_loc[src] = dst;
-         new_chan[src] = 0;
-      } else {
-         new_loc[src] = dst;
-         new_chan[src] = chans_used[dst];
-
-         /* Move the references to the data */
-         for (int j = 0; j < size; j++) {
-            stage_prog_data->param[dst * 4 + new_chan[src] + j] =
-               stage_prog_data->param[src * 4 + j];
-         }
-
-         chans_used[dst] += size;
-         chans_used[src] = 0;
-      }
-
-      new_uniform_count = MAX2(new_uniform_count, dst + 1);
   }

+   /* Continue with the rest of data, which is aligned to vec4. */
+   for (int src = 0; src < uniforms; src++) {
+      int size = chans_used[src];
+
+      if (size == 0 || is_aligned_to_dvec4[src])
+         continue;
+
+      int dst = set_push_constant_loc(uniforms, &new_uniform_count,
+                                      src, size, channel_sizes[src],
+                                      new_loc, new_chan,
+                                      new_chans_used);
+      /* Move the references to the data */
+      for (int j = 0; j < size; j++) {
+         stage_prog_data->param[dst * 4 + new_chan[src] + j] =
+            param[src * 4 + j];
+      }
+   }
+
+   ralloc_free(param);
   this->uniforms = new_uniform_count;

   /* Now, update the instructions for our repacked uniforms. */
@@ -705,9 +758,9 @@ vec4_visitor::pack_uniform_registers()
         if (inst->src[i].file != UNIFORM)
            continue;

+         int chan = new_chan[src] / channel_sizes[src];
         inst->src[i].nr = new_loc[src];
-         inst->src[i].swizzle += BRW_SWIZZLE4(new_chan[src], new_chan[src],
-                                              new_chan[src], new_chan[src]);
+         inst->src[i].swizzle += BRW_SWIZZLE4(chan, chan, chan, chan);
      }
   }
 }
--- a/src/intel/compiler/brw_vec4_gs_visitor.cpp
+++ b/src/intel/compiler/brw_vec4_gs_visitor.cpp
@@ -868,10 +868,36 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,

         vec4_gs_visitor v(compiler, log_data, &c, prog_data, shader,
                           mem_ctx, true /* no_spills */, shader_time_index);
+
+         /* Backup 'nr_params' and 'param' as they can be modified by the
+          * the DUAL_OBJECT visitor. If it fails, we will run the fallback
+          * (DUAL_INSTANCED or SINGLE mode) and we need to restore original
+          * values.
+          */
+         const unsigned param_count = prog_data->base.base.nr_params;
+         gl_constant_value **param = ralloc_array(NULL, gl_constant_value*,
+                                                  param_count);
+         memcpy(param, prog_data->base.base.param,
+                sizeof(gl_constant_value*) * param_count);
+
         if (v.run()) {
+            /* Success! Backup is not needed */
+            ralloc_free(param);
            return brw_vec4_generate_assembly(compiler, log_data, mem_ctx,
                                              shader, &prog_data->base, v.cfg,
                                              final_assembly_size);
+         } else {
+            /* These variables could be modified by the execution of the GS
+             * visitor if it packed the uniforms in the push constant buffer.
+             * As it failed, we need restore them so we can start again with
+             * DUAL_INSTANCED or SINGLE mode.
+             *
+             * FIXME: Could more variables be modified by this execution?
+             */
+            memcpy(prog_data->base.base.param, param,
+                   sizeof(gl_constant_value*) * param_count);
+            prog_data->base.base.nr_params = param_count;
+            ralloc_free(param);
         }
      }
   }
--- a/src/intel/compiler/brw_vec4_nir.cpp
+++ b/src/intel/compiler/brw_vec4_nir.cpp
@@ -852,7 +852,8 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       * The swizzle also works in the indirect case as the generator adds
       * the swizzle to the offset for us.
       */
-      unsigned shift = (nir_intrinsic_base(instr) % 16) / 4;
+      const int type_size = type_sz(src.type);
+      unsigned shift = (nir_intrinsic_base(instr) % 16) / type_size;
      assert(shift + instr->num_components <= 4);

      nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
@@ -860,14 +861,20 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
         /* Offsets are in bytes but they should always be multiples of 4 */
         assert(const_offset->u32[0] % 4 == 0);

-         unsigned offset = const_offset->u32[0] + shift * 4;
+         src.swizzle = brw_swizzle_for_size(instr->num_components);
+         dest.writemask = brw_writemask_for_size(instr->num_components);
+         unsigned offset = const_offset->u32[0] + shift * type_size;
         src.offset = ROUND_DOWN_TO(offset, 16);
-         shift = (offset % 16) / 4;
+         shift = (offset % 16) / type_size;
+         assert(shift + instr->num_components <= 4);
         src.swizzle += BRW_SWIZZLE4(shift, shift, shift, shift);

         emit(MOV(dest, src));
      } else {
-         src.swizzle += BRW_SWIZZLE4(shift, shift, shift, shift);
+         /* Uniform arrays are vec4 aligned, because of std140 alignment
+          * rules.
+          */
+         assert(shift == 0);

         src_reg indirect = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_UD, 1);

--- a/src/intel/isl/isl_gen7.c
+++ b/src/intel/isl/isl_gen7.c
@@ -352,30 +352,12 @@ gen7_choose_valign_el(const struct isl_device *dev,
   if (isl_surf_usage_is_stencil(info->usage)) {
      /* The Ivybridge PRM states that the stencil buffer's vertical alignment
       * is 8 [Ivybridge PRM, Volume 1, Part 1, Section 6.18.4.4 Alignment
-       * Unit Size]. However, valign=8 is outside the set of valid values of
-       * RENDER_SURFACE_STATE.SurfaceVerticalAlignment, which is VALIGN_2
-       * (0x0) and VALIGN_4 (0x1).
-       *
-       * The PRM is generally confused about the width, height, and alignment
-       * of the stencil buffer; and this confusion appears elsewhere. For
-       * example, the following PRM text effectively converts the stencil
-       * buffer's 8-pixel alignment to a 4-pixel alignment [Ivybridge PRM,
-       * Volume 1, Part 1, Section
-       * 6.18.4.2 Base Address and LOD Calculation]:
-       *
-       *    For separate stencil buffer, the width must be mutiplied by 2 and
-       *    height divided by 2 as follows:
-       *
-       *       w_L = 2*i*ceil(W_L/i)
-       *       h_L = 1/2*j*ceil(H_L/j)
-       *
-       * The root of the confusion is that, in W tiling, each pair of rows is
-       * interleaved into one.
-       *
-       * FINISHME(chadv): Decide to set valign=4 or valign=8 after isl's API
-       * is more polished.
+       * Unit Size]. valign=8 is outside the set of valid values of
+       * RENDER_SURFACE_STATE.SurfaceVerticalAlignment, but that's ok because
+       * a stencil buffer will never be used directly for texturing or
+       * rendering on gen7.
       */
-      require_valign4 = true;
+      return 8;
   }

   assert(!require_valign2 || !require_valign4);
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -452,7 +452,7 @@ anv_enumerate_devices(struct anv_instance *instance)

   instance->physicalDeviceCount = 0;

-   max_devices = drmGetDevices2(0, devices, sizeof(devices));
+   max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
   if (max_devices < 1)
      return VK_ERROR_INCOMPATIBLE_DRIVER;

@@ -468,6 +468,7 @@ anv_enumerate_devices(struct anv_instance *instance)
            break;
      }
   }
+   drmFreeDevices(devices, max_devices);

   if (result == VK_SUCCESS)
      instance->physicalDeviceCount = 1;
--- a/src/intel/vulkan/anv_formats.c
+++ b/src/intel/vulkan/anv_formats.c
@@ -179,8 +179,8 @@ static const struct anv_format anv_formats[] = {
   fmt(VK_FORMAT_D24_UNORM_S8_UINT,       ISL_FORMAT_R24_UNORM_X8_TYPELESS),
   fmt(VK_FORMAT_D32_SFLOAT_S8_UINT,      ISL_FORMAT_R32_FLOAT),

-   fmt(VK_FORMAT_BC1_RGB_UNORM_BLOCK,     ISL_FORMAT_DXT1_RGB),
-   fmt(VK_FORMAT_BC1_RGB_SRGB_BLOCK,      ISL_FORMAT_DXT1_RGB_SRGB),
+   swiz_fmt(VK_FORMAT_BC1_RGB_UNORM_BLOCK,     ISL_FORMAT_BC1_UNORM, RGB1),
+   swiz_fmt(VK_FORMAT_BC1_RGB_SRGB_BLOCK,      ISL_FORMAT_BC1_UNORM_SRGB, RGB1),
   fmt(VK_FORMAT_BC1_RGBA_UNORM_BLOCK,    ISL_FORMAT_BC1_UNORM),
   fmt(VK_FORMAT_BC1_RGBA_SRGB_BLOCK,     ISL_FORMAT_BC1_UNORM_SRGB),
   fmt(VK_FORMAT_BC2_UNORM_BLOCK,         ISL_FORMAT_BC2_UNORM),
--- a/src/mesa/drivers/dri/Android.mk
+++ b/src/mesa/drivers/dri/Android.mk
@@ -51,7 +51,8 @@ MESA_DRI_SHARED_LIBRARIES := \
 	libdl \
 	libexpat \
 	libglapi \
-	liblog
+	liblog \
+	libz

 #-----------------------------------------------
 # Build drivers and libmesa_dri_common
--- a/src/mesa/drivers/dri/i965/brw_surface_formats.c
+++ b/src/mesa/drivers/dri/i965/brw_surface_formats.c
@@ -94,14 +94,14 @@ brw_isl_format_for_mesa_format(mesa_format mesa_format)
      [MESA_FORMAT_L_SRGB8] = ISL_FORMAT_L8_UNORM_SRGB,
      [MESA_FORMAT_L8A8_SRGB] = ISL_FORMAT_L8A8_UNORM_SRGB,
      [MESA_FORMAT_A8L8_SRGB] = 0,
-      [MESA_FORMAT_SRGB_DXT1] = ISL_FORMAT_DXT1_RGB_SRGB,
+      [MESA_FORMAT_SRGB_DXT1] = ISL_FORMAT_BC1_UNORM_SRGB,
      [MESA_FORMAT_SRGBA_DXT1] = ISL_FORMAT_BC1_UNORM_SRGB,
      [MESA_FORMAT_SRGBA_DXT3] = ISL_FORMAT_BC2_UNORM_SRGB,
      [MESA_FORMAT_SRGBA_DXT5] = ISL_FORMAT_BC3_UNORM_SRGB,

      [MESA_FORMAT_RGB_FXT1] = ISL_FORMAT_FXT1,
      [MESA_FORMAT_RGBA_FXT1] = ISL_FORMAT_FXT1,
-      [MESA_FORMAT_RGB_DXT1] = ISL_FORMAT_DXT1_RGB,
+      [MESA_FORMAT_RGB_DXT1] = ISL_FORMAT_BC1_UNORM,
      [MESA_FORMAT_RGBA_DXT1] = ISL_FORMAT_BC1_UNORM,
      [MESA_FORMAT_RGBA_DXT3] = ISL_FORMAT_BC2_UNORM,
      [MESA_FORMAT_RGBA_DXT5] = ISL_FORMAT_BC3_UNORM,
@@ -541,17 +541,6 @@ translate_tex_format(struct brw_context *brw,
       */
      return ISL_FORMAT_R32G32B32A32_FLOAT;

-   case MESA_FORMAT_SRGB_DXT1:
-      if (brw->gen == 4 && !brw->is_g4x) {
-         /* Work around missing SRGB DXT1 support on original gen4 by just
-          * skipping SRGB decode.  It's not worth not supporting sRGB in
-          * general to prevent this.
-          */
-         WARN_ONCE(true, "Demoting sRGB DXT1 texture to non-sRGB\n");
-         mesa_format = MESA_FORMAT_RGB_DXT1;
-      }
-      return brw_isl_format_for_mesa_format(mesa_format);
-
   case MESA_FORMAT_RGBA_ASTC_4x4:
   case MESA_FORMAT_RGBA_ASTC_5x4:
   case MESA_FORMAT_RGBA_ASTC_5x5:
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -389,7 +389,9 @@ brw_get_texture_swizzle(const struct gl_context *ctx,
   case GL_RED:
   case GL_RG:
   case GL_RGB:
-      if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
+      if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0 ||
+          img->TexFormat == MESA_FORMAT_RGB_DXT1 ||
+          img->TexFormat == MESA_FORMAT_SRGB_DXT1)
         swizzles[3] = SWIZZLE_ONE;
      break;
   }
--- a/src/mesa/state_tracker/st_cb_eglimage.c
+++ b/src/mesa/state_tracker/st_cb_eglimage.c
@@ -39,6 +39,35 @@
 #include "st_sampler_view.h"
 #include "util/u_surface.h"

+static bool
+is_format_supported(struct pipe_screen *screen, enum pipe_format format,
+                    unsigned nr_samples, unsigned usage)
+{
+   bool supported = screen->is_format_supported(screen, format, PIPE_TEXTURE_2D,
+                                                nr_samples, usage);
+
+   /* for sampling, some formats can be emulated.. it doesn't matter that
+    * the surface will have a format that the driver can't cope with because
+    * we'll give it sampler view formats that it can deal with and generate
+    * a shader variant that converts.
+    */
+   if ((usage == PIPE_BIND_SAMPLER_VIEW) && !supported) {
+      if (format == PIPE_FORMAT_IYUV) {
+         supported = screen->is_format_supported(screen, PIPE_FORMAT_R8_UNORM,
+                                                 PIPE_TEXTURE_2D, nr_samples,
+                                                 usage);
+      } else if (format == PIPE_FORMAT_NV12) {
+         supported = screen->is_format_supported(screen, PIPE_FORMAT_R8_UNORM,
+                                                 PIPE_TEXTURE_2D, nr_samples,
+                                                 usage) &&
+                     screen->is_format_supported(screen, PIPE_FORMAT_R8G8_UNORM,
+                                                 PIPE_TEXTURE_2D, nr_samples,
+                                                 usage);
+      }
+   }
+
+   return supported;
+}

 /**
 * Return the surface of an EGLImage.
@@ -65,8 +94,7 @@ st_egl_image_get_surface(struct gl_context *ctx, GLeglImageOES image_handle,
      return NULL;
   }

-   if (!screen->is_format_supported(screen, stimg.format, PIPE_TEXTURE_2D,
-                                    stimg.texture->nr_samples, usage)) {
+   if (!is_format_supported(screen, stimg.format, stimg.texture->nr_samples, usage)) {
      /* unable to specify a texture object using the specified EGL image */
      _mesa_error(ctx, GL_INVALID_OPERATION, "%s(format not supported)", error);
      return NULL;
--- a/src/mesa/state_tracker/st_manager.c
+++ b/src/mesa/state_tracker/st_manager.c
@@ -844,8 +844,6 @@ st_manager_flush_frontbuffer(struct st_context *st)
   struct st_framebuffer *stfb = st_ws_framebuffer(st->ctx->DrawBuffer);
   struct st_renderbuffer *strb = NULL;

-   assert(st->ctx->DrawBuffer != _mesa_get_incomplete_framebuffer());
-
   if (stfb)
      strb = st_renderbuffer(stfb->Base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer);
   if (!strb)
--- a/src/util/Android.mk
+++ b/src/util/Android.mk
@@ -34,6 +34,7 @@ LOCAL_SRC_FILES := \
 	$(MESA_UTIL_FILES)

 LOCAL_C_INCLUDES := \
+	external/zlib \
 	$(MESA_TOP)/src/mesa \
 	$(MESA_TOP)/src/mapi \
 	$(MESA_TOP)/src/gallium/include \
@@ -1 +1 @@
 .1.0
 .1.1