docs: add release notes for 17.1.2

Signed-off-by: Juan A. Suarez Romero <jasuarez@igalia.com>
Update version to 17.1.2
2017-06-05 20:27:24 +00:00 · 2017-06-05 20:15:30 +00:00 · 2017-06-03 20:37:13 +02:00 · 2017-06-03 20:37:13 +02:00 · 2017-06-02 23:04:01 +02:00 · 2017-06-02 23:04:01 +02:00
119 changed files with 2136 additions and 806 deletions
--- a/.travis.yml
+++ b/.travis.yml
@@ -179,12 +179,9 @@ matrix:
        - MAKE_CHECK_COMMAND="make -C src/gtest check && make -C src/intel check"
        - LLVM_VERSION=3.9
        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
-        # XXX: we want to test the WSI, but those are enabled via the EGL toggles
-        # XXX: Platform X11 dependencies are checked when --enable-glx is set
-        - DRI_LOADERS="--enable-glx --disable-gbm --enable-egl --with-platforms=x11,wayland"
+        - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl --with-platforms=x11,wayland"
        - DRI_DRIVERS=""
-        # XXX: enable DRI for EGL above
-        - GALLIUM_ST="--enable-dri --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx --disable-gallium-osmesa"
+        - GALLIUM_ST="--enable-dri --enable-dri3 --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx --disable-gallium-osmesa"
        - GALLIUM_DRIVERS=""
        - VULKAN_DRIVERS="intel,radeon"
      addons:
--- a/Android.common.mk
+++ b/Android.common.mk
@@ -116,7 +116,3 @@ endif

 # Quiet down the build system and remove any .h files from the sources
 LOCAL_SRC_FILES := $(patsubst %.h, , $(LOCAL_SRC_FILES))
-
-ifneq ($(LOCAL_IS_HOST_MODULE),true)
-LOCAL_SHARED_LIBRARIES += libz
-endif
--- a/2
+++ b/2
@@ -1 +1 @@
-17.1.0-rc4
+17.1.2
--- a/bin/.cherry-ignore
+++ b/bin/.cherry-ignore
@@ -0,0 +1,4 @@
+# This commit depends on 9fd9a7d0ba3 and 678d568c7b2, neither of which is in branch.
+b84b631c6381d9b36bca5d0e7cc67dd23af188c1 radeonsi: load patch_id for TES-as-ES when exporting for PS
+# This commit addressed an earlier commit 126d5ad which did not land in branch.
+9da104593386f6e8ddec8f0d9d288aceb8908fe1 radv: fix regression in descriptor set freeing.
--- a/bin/get-extra-pick-list.sh
+++ b/bin/get-extra-pick-list.sh
@@ -30,7 +30,15 @@ do
 		if grep -q ^$candidate already_picked ; then
 			continue
 		fi
-		echo Commit $candidate references $sha
+		# Or if it isn't in the ignore list.
+		if [ -f bin/.cherry-ignore ] ; then
+			if grep -q ^$candidate bin/.cherry-ignore ; then
+				continue
+			fi
+		fi
+		printf "Commit \"%s\" references %s\n" \
+		       "`git log -n1 --pretty=oneline $candidate`" \
+		       "$sha"
 	done
 done

--- a/bin/get-fixes-pick-list.sh
+++ b/bin/get-fixes-pick-list.sh
@@ -24,35 +24,52 @@ git log --reverse --grep="cherry picked from commit" $latest_branchpoint..HEAD |
 git log --reverse --pretty=%H -i --grep="fixes:" $latest_branchpoint..origin/master |\
 while read sha
 do
-	# For each one try to extract the tag
-	fixes_count=`git show $sha | grep -i "fixes:" | wc -l`
-	if [ "x$fixes_count" != x1 ] ; then
-		echo WARNING: Commit $sha has more than one Fixes tag
+	# Check to see whether the patch is on the ignore list ...
+	if [ -f bin/.cherry-ignore ] ; then
+		if grep -q ^$sha bin/.cherry-ignore ; then
+			continue
+		fi
 	fi
-	fixes=`git show $sha | grep -i "fixes:" | head -n 1`
-	# The following sed/cut combination is borrowed from GregKH
-	id=`echo ${fixes} | sed -e 's/^[ \t]*//' | cut -f 2 -d ':' | sed -e 's/^[ \t]*//' | cut -f 1 -d ' '`

-	# Bail out if we cannot find suitable id.
-	# Any specific validation the $id is valid and not some junk, is
-	# implied with the follow up code
-	if [ "x$id" = x ] ; then
+	# Skip if it has been already cherry-picked.
+	if grep -q ^$sha already_picked ; then
 		continue
 	fi

-	# Check if the offending commit is in branch.
+	# For each one try to extract the tag
+	fixes_count=`git show $sha | grep -i "fixes:" | wc -l`
+	warn=`(test $fixes_count -gt 1 && echo $fixes_count) || echo 0`
+	while [ $fixes_count -gt 0 ] ; do
+		fixes=`git show $sha | grep -i "fixes:" | tail -n $fixes_count`
+		fixes_count=$(($fixes_count-1))
+		# The following sed/cut combination is borrowed from GregKH
+		id=`echo ${fixes} | sed -e 's/^[ \t]*//' | cut -f 2 -d ':' | sed -e 's/^[ \t]*//' | cut -f 1 -d ' '`

-	# Be that cherry-picked ...
-	# ... or landed before the branchpoint.
-	if grep -q ^$id already_picked ||
-	   grep -q ^$id already_landed ; then
-
-		# Finally nominate the fix if it hasn't landed yet.
-		if grep -q ^$sha already_picked ; then
+		# Bail out if we cannot find suitable id.
+		# Any specific validation the $id is valid and not some junk, is
+		# implied with the follow up code
+		if [ "x$id" = x ] ; then
 			continue
 		fi

-		echo Commit $sha fixes $id
+		# Check if the offending commit is in branch.
+
+		# Be that cherry-picked ...
+		# ... or landed before the branchpoint.
+		if grep -q ^$id already_picked ||
+		   grep -q ^$id already_landed ; then
+
+			printf "Commit \"%s\" fixes %s\n" \
+			       "`git log -n1 --pretty=oneline $sha`" \
+			       "$id"
+			warn=$(($warn-1))
+		fi
+
+	done
+
+	if [ $warn -gt 0 ] ; then
+		printf "WARNING: Commit \"%s\" has more than one Fixes tag\n" \
+		       "`git log -n1 --pretty=oneline $sha`"
 	fi

 done
--- a/configure.ac
+++ b/configure.ac
@@ -269,7 +269,7 @@ DEFINES="-D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS"
 AC_SUBST([DEFINES])
 android=no
 case "$host_os" in
-*-android)
+*-android*)
    android=yes
    ;;
 linux*|*-gnu*|gnu*|cygwin*)
@@ -1079,14 +1079,9 @@ fi

 if test "x$LIBUNWIND" = "xyes"; then
    PKG_CHECK_MODULES(LIBUNWIND, libunwind)
-    if test "x$HAVE_LIBUNWIND" != "xyes"; then
-        AC_MSG_ERROR([libunwind requested but not installed.])
-    fi
    AC_DEFINE(HAVE_LIBUNWIND, 1, [Have libunwind support])
 fi

-AM_CONDITIONAL(HAVE_LIBUNWIND, [test "x$LIBUNWIND" = xyes])
-

 dnl Options for APIs
 AC_ARG_ENABLE([opengl],
@@ -1541,15 +1536,10 @@ xdri)

            PKG_CHECK_MODULES([DRI2PROTO], [dri2proto >= $DRI2PROTO_REQUIRED])
            GL_PC_REQ_PRIV="$GL_PC_REQ_PRIV libdrm >= $LIBDRM_REQUIRED"
+
            if test x"$enable_dri" = xyes; then
               dri_modules="$dri_modules xcb-dri2 >= $XCBDRI2_REQUIRED"
            fi
-
-            if test x"$enable_dri3" = xyes; then
-               PKG_CHECK_EXISTS([xcb >= $XCB_REQUIRED], [], AC_MSG_ERROR([DRI3 requires xcb >= $XCB_REQUIRED]))
-               dri3_modules="xcb xcb-dri3 xcb-present xcb-sync xshmfence >= $XSHMFENCE_REQUIRED"
-               PKG_CHECK_MODULES([XCB_DRI3], [$dri3_modules])
-            fi
        fi
        if test x"$dri_platform" = xapple ; then
            DEFINES="$DEFINES -DGLX_USE_APPLEGL"
@@ -1638,6 +1628,111 @@ if test "x$enable_glx_read_only_text" = xyes; then
    DEFINES="$DEFINES -DGLX_X86_READONLY_TEXT"
 fi

+dnl
+dnl DEPRECATED: EGL Platforms configuration
+dnl
+AC_ARG_WITH([egl-platforms],
+    [AS_HELP_STRING([--with-egl-platforms@<:@=DIRS...@:>@],
+        [DEPRECATED: use --with-platforms instead@<:@default=auto@:>@])],
+    [with_egl_platforms="$withval"],
+    [with_egl_platforms=auto])
+
+if test "x$with_egl_platforms" = xauto; then
+    with_egl_platforms="x11,surfaceless"
+    if test "x$enable_gbm" = xyes; then
+        with_egl_platforms="$with_egl_platforms,drm"
+    fi
+else
+    AC_MSG_WARN([--with-egl-platforms is deprecated. Use --with-platforms instead.])
+fi
+
+dnl
+dnl Platforms configuration
+dnl
+AC_ARG_WITH([platforms],
+    [AS_HELP_STRING([--with-platforms@<:@=DIRS...@:>@],
+        [comma delimited native platforms libEGL/Vulkan/other supports, e.g.
+        "x11,drm,wayland,surfaceless..." @<:@default=auto@:>@])],
+    [with_platforms="$withval"],
+    [with_platforms=auto])
+
+# Reuse the autodetection rather than duplicating it.
+if test "x$with_platforms" = xauto; then
+    with_platforms=$with_egl_platforms
+fi
+
+PKG_CHECK_MODULES([WAYLAND_SCANNER], [wayland-scanner],
+        WAYLAND_SCANNER=`$PKG_CONFIG --variable=wayland_scanner wayland-scanner`,
+        WAYLAND_SCANNER='')
+if test "x$WAYLAND_SCANNER" = x; then
+    AC_PATH_PROG([WAYLAND_SCANNER], [wayland-scanner], [:])
+fi
+
+# Do per platform setups and checks
+platforms=`IFS=', '; echo $with_platforms`
+for plat in $platforms; do
+	case "$plat" in
+	wayland)
+
+		PKG_CHECK_MODULES([WAYLAND], [wayland-client >= $WAYLAND_REQUIRED wayland-server >= $WAYLAND_REQUIRED])
+
+		if test "x$WAYLAND_SCANNER" = "x:"; then
+			AC_MSG_ERROR([wayland-scanner is needed to compile the wayland platform])
+		fi
+		DEFINES="$DEFINES -DHAVE_WAYLAND_PLATFORM"
+		;;
+
+	x11)
+		PKG_CHECK_MODULES([XCB_DRI2], [x11-xcb xcb xcb-dri2 >= $XCBDRI2_REQUIRED xcb-xfixes])
+		DEFINES="$DEFINES -DHAVE_X11_PLATFORM"
+		;;
+
+	drm)
+		test "x$enable_gbm" = "xno" &&
+			AC_MSG_ERROR([EGL platform drm needs gbm])
+		DEFINES="$DEFINES -DHAVE_DRM_PLATFORM"
+		;;
+
+	surfaceless)
+		DEFINES="$DEFINES -DHAVE_SURFACELESS_PLATFORM"
+		;;
+
+	android)
+		PKG_CHECK_MODULES([ANDROID], [cutils hardware sync])
+		DEFINES="$DEFINES -DHAVE_ANDROID_PLATFORM"
+		;;
+
+	*)
+		AC_MSG_ERROR([platform '$plat' does not exist])
+		;;
+	esac
+
+	case "$plat" in
+	wayland|drm|surfaceless)
+		require_libdrm "Platform $plat"
+		;;
+	esac
+done
+
+if test "x$enable_glx" != xno; then
+    if ! echo "$platforms" | grep -q 'x11'; then
+        AC_MSG_ERROR([Building without the x11 platform as GLX is enabled, is not supported])
+    fi
+fi
+
+if test x"$enable_dri3" = xyes; then
+    DEFINES="$DEFINES -DHAVE_DRI3"
+
+    dri3_modules="x11-xcb xcb >= $XCB_REQUIRED xcb-dri3 xcb-xfixes xcb-present xcb-sync xshmfence >= $XSHMFENCE_REQUIRED"
+    PKG_CHECK_MODULES([XCB_DRI3], [$dri3_modules])
+fi
+
+AM_CONDITIONAL(HAVE_PLATFORM_X11, echo "$platforms" | grep -q 'x11')
+AM_CONDITIONAL(HAVE_PLATFORM_WAYLAND, echo "$platforms" | grep -q 'wayland')
+AM_CONDITIONAL(HAVE_PLATFORM_DRM, echo "$platforms" | grep -q 'drm')
+AM_CONDITIONAL(HAVE_PLATFORM_SURFACELESS, echo "$platforms" | grep -q 'surfaceless')
+AM_CONDITIONAL(HAVE_PLATFORM_ANDROID, echo "$platforms" | grep -q 'android')
+
 dnl
 dnl More DRI setup
 dnl
@@ -1680,10 +1775,6 @@ if test "x$enable_dri" = xyes; then
    # Platform specific settings and drivers to build
    case "$host_os" in
    linux*)
-        if test "x$enable_dri3" = xyes; then
-            DEFINES="$DEFINES -DHAVE_DRI3"
-        fi
-
        case "$host_cpu" in
        powerpc* | sparc*)
            # Build only the drivers for cards that exist on PowerPC/sparc
@@ -1839,6 +1930,14 @@ AC_ARG_WITH([vulkan-icddir],
    [VULKAN_ICD_INSTALL_DIR='${datarootdir}/vulkan/icd.d'])
 AC_SUBST([VULKAN_ICD_INSTALL_DIR])

+require_x11_dri3() {
+    if echo "$platforms" | grep -q 'x11'; then
+        if test "x$enable_dri3" != xyes; then
+            AC_MSG_ERROR([$1 Vulkan driver requires DRI3 when built with X11])
+        fi
+    fi
+}
+
 if test -n "$with_vulkan_drivers"; then
    if test "x$ac_cv_func_dl_iterate_phdr" = xno; then
        AC_MSG_ERROR([Vulkan drivers require the dl_iterate_phdr function])
@@ -1850,12 +1949,14 @@ if test -n "$with_vulkan_drivers"; then
        xintel)
            require_libdrm "ANV"
            PKG_CHECK_MODULES([INTEL], [libdrm >= $LIBDRM_INTEL_REQUIRED libdrm_intel >= $LIBDRM_INTEL_REQUIRED])
+            require_x11_dri3 "ANV"
            HAVE_INTEL_VULKAN=yes
            ;;
        xradeon)
            require_libdrm "radv"
            PKG_CHECK_MODULES([AMDGPU], [libdrm >= $LIBDRM_AMDGPU_REQUIRED libdrm_amdgpu >= $LIBDRM_AMDGPU_REQUIRED])
            radeon_llvm_check $LLVM_REQUIRED_RADV "radv"
+            require_x11_dri3 "radv"
            HAVE_RADEON_VULKAN=yes
            ;;
        *)
@@ -1961,23 +2062,47 @@ if test "x$enable_xa" = xyes; then
 fi
 AM_CONDITIONAL(HAVE_ST_XA, test "x$enable_xa" = xyes)

+if echo $platforms | grep -q "x11"; then
+    have_xvmc_platform=yes
+else
+    have_xvmc_platform=no
+fi
+
+if echo $platforms | grep -q "x11"; then
+    have_vdpau_platform=yes
+else
+    have_vdpau_platform=no
+fi
+
+if echo $platforms | grep -q "x11\|drm"; then
+    have_omx_platform=yes
+else
+    have_omx_platform=no
+fi
+
+if echo $platforms | grep -q "x11\|drm\|wayland"; then
+    have_va_platform=yes
+else
+    have_va_platform=no
+fi
+
 dnl
 dnl Gallium G3DVL configuration
 dnl
 if test -n "$with_gallium_drivers" -a "x$with_gallium_drivers" != xswrast; then
-    if test "x$enable_xvmc" = xauto; then
+    if test "x$enable_xvmc" = xauto -a "x$have_xvmc_platform" = xyes; then
 	PKG_CHECK_EXISTS([xvmc >= $XVMC_REQUIRED], [enable_xvmc=yes], [enable_xvmc=no])
    fi

-    if test "x$enable_vdpau" = xauto; then
+    if test "x$enable_vdpau" = xauto -a "x$have_vdpau_platform" = xyes; then
 	PKG_CHECK_EXISTS([vdpau >= $VDPAU_REQUIRED], [enable_vdpau=yes], [enable_vdpau=no])
    fi

-    if test "x$enable_omx" = xauto; then
+    if test "x$enable_omx" = xauto -a "x$have_omx_platform" = xyes; then
 	PKG_CHECK_EXISTS([libomxil-bellagio >= $LIBOMXIL_BELLAGIO_REQUIRED], [enable_omx=yes], [enable_omx=no])
    fi

-    if test "x$enable_va" = xauto; then
+    if test "x$enable_va" = xauto -a "x$have_va_platform" = xyes; then
        PKG_CHECK_EXISTS([libva >= $LIBVA_REQUIRED], [enable_va=yes], [enable_va=no])
    fi
 fi
@@ -1995,23 +2120,24 @@ if test "x$enable_xvmc" = xyes -o \
        "x$enable_vdpau" = xyes -o \
        "x$enable_omx" = xyes -o \
        "x$enable_va" = xyes; then
-    if test x"$enable_dri3" = xyes; then
-        PKG_CHECK_MODULES([VL], [xcb-dri3 xcb-present xcb-sync xshmfence >= $XSHMFENCE_REQUIRED
-                                 xcb-xfixes x11-xcb xcb xcb-dri2 >= $XCBDRI2_REQUIRED])
-    else
-        PKG_CHECK_MODULES([VL], [x11-xcb xcb xcb-dri2 >= $XCBDRI2_REQUIRED])
-    fi
+    PKG_CHECK_MODULES([VL], [x11-xcb xcb xcb-dri2 >= $XCBDRI2_REQUIRED])
    need_gallium_vl_winsys=yes
 fi
 AM_CONDITIONAL(NEED_GALLIUM_VL_WINSYS, test "x$need_gallium_vl_winsys" = xyes)

 if test "x$enable_xvmc" = xyes; then
+    if test "x$have_xvmc_platform" != xyes; then
+        AC_MSG_ERROR([XVMC requires the x11 platforms])
+    fi
    PKG_CHECK_MODULES([XVMC], [xvmc >= $XVMC_REQUIRED])
    gallium_st="$gallium_st xvmc"
 fi
 AM_CONDITIONAL(HAVE_ST_XVMC, test "x$enable_xvmc" = xyes)

 if test "x$enable_vdpau" = xyes; then
+    if test "x$have_vdpau_platform" != xyes; then
+        AC_MSG_ERROR([VDPAU requires the x11 platforms])
+    fi
    PKG_CHECK_MODULES([VDPAU], [vdpau >= $VDPAU_REQUIRED])
    gallium_st="$gallium_st vdpau"
    DEFINES="$DEFINES -DHAVE_ST_VDPAU"
@@ -2019,12 +2145,18 @@ fi
 AM_CONDITIONAL(HAVE_ST_VDPAU, test "x$enable_vdpau" = xyes)

 if test "x$enable_omx" = xyes; then
+    if test "x$have_omx_platform" != xyes; then
+        AC_MSG_ERROR([OMX requires at least one of the x11 or drm platforms])
+    fi
    PKG_CHECK_MODULES([OMX], [libomxil-bellagio >= $LIBOMXIL_BELLAGIO_REQUIRED])
    gallium_st="$gallium_st omx"
 fi
 AM_CONDITIONAL(HAVE_ST_OMX, test "x$enable_omx" = xyes)

 if test "x$enable_va" = xyes; then
+    if test "x$have_va_platform" != xyes; then
+        AC_MSG_ERROR([VA requires at least one of the x11 drm or wayland platforms])
+    fi
    PKG_CHECK_MODULES([VA], [libva >= $LIBVA_REQUIRED])
    gallium_st="$gallium_st va"
 fi
@@ -2141,113 +2273,21 @@ dnl Gallium configuration
 dnl
 AM_CONDITIONAL(HAVE_GALLIUM, test -n "$with_gallium_drivers")

-dnl
-dnl DEPRECATED: EGL Platforms configuration
-dnl
-AC_ARG_WITH([egl-platforms],
-    [AS_HELP_STRING([--with-egl-platforms@<:@=DIRS...@:>@],
-        [DEPRECATED: use --with-platforms instead@<:@default=auto@:>@])],
-    [with_egl_platforms="$withval"],
-    [with_egl_platforms=auto])
-
-if test "x$with_egl_platforms" = xauto; then
-    if test "x$enable_egl" = xyes; then
-        if test "x$enable_gbm" = xyes; then
-           with_egl_platforms="x11,drm"
-        else
-           with_egl_platforms="x11"
-        fi
-    else
-        with_egl_platforms=""
-    fi
-else
-    AC_MSG_WARN([--with-egl-platforms is deprecated. Use --with-platforms instead.])
-fi
-
-dnl
-dnl Platforms configuration
-dnl
-AC_ARG_WITH([platforms],
-    [AS_HELP_STRING([--with-platforms@<:@=DIRS...@:>@],
-        [comma delimited native platforms libEGL/Vulkan/other supports, e.g.
-        "x11,drm,wayland,surfaceless..." @<:@default=auto@:>@])],
-    [with_platforms="$withval"],
-    [with_platforms=auto])
-
-# For the time being, we still reuse the EGL named variables/defines.
-if test "x$with_platforms" != xauto; then
-    with_egl_platforms=$with_platforms
-fi
-
-PKG_CHECK_MODULES([WAYLAND_SCANNER], [wayland-scanner],
-        WAYLAND_SCANNER=`$PKG_CONFIG --variable=wayland_scanner wayland-scanner`,
-        WAYLAND_SCANNER='')
-if test "x$WAYLAND_SCANNER" = x; then
-    AC_PATH_PROG([WAYLAND_SCANNER], [wayland-scanner], [:])
-fi
-
-# Do per-EGL platform setups and checks
-egl_platforms=`IFS=', '; echo $with_egl_platforms`
-for plat in $egl_platforms; do
-	case "$plat" in
-	wayland)
-
-		PKG_CHECK_MODULES([WAYLAND], [wayland-client >= $WAYLAND_REQUIRED wayland-server >= $WAYLAND_REQUIRED])
-
-		if test "x$WAYLAND_SCANNER" = "x:"; then
-			AC_MSG_ERROR([wayland-scanner is needed to compile the wayland egl platform])
-		fi
-		;;
-
-	x11)
-		PKG_CHECK_MODULES([XCB_DRI2], [x11-xcb xcb xcb-dri2 >= $XCBDRI2_REQUIRED xcb-xfixes])
-		;;
-
-	drm)
-		test "x$enable_gbm" = "xno" &&
-			AC_MSG_ERROR([EGL platform drm needs gbm])
-		;;
-
-	surfaceless)
-		;;
-
-	android)
-		PKG_CHECK_MODULES([ANDROID], [cutils hardware sync])
-		;;
-
-	*)
-		AC_MSG_ERROR([EGL platform '$plat' does not exist])
-		;;
-	esac
-
-	case "$plat" in
-	wayland|drm|surfaceless)
-		require_libdrm "Platform $plat"
-		;;
-	esac
-done
-
 # libEGL wants to default to the first platform specified in
 # ./configure.  parse that here.
-if test "x$egl_platforms" != "x"; then
-    FIRST_PLATFORM_CAPS=`echo $egl_platforms | sed 's| .*||' | tr '[[a-z]]' '[[A-Z]]'`
+if test "x$platforms" != "x"; then
+    FIRST_PLATFORM_CAPS=`echo $platforms | sed 's| .*||' | tr '[[a-z]]' '[[A-Z]]'`
    EGL_NATIVE_PLATFORM="_EGL_PLATFORM_$FIRST_PLATFORM_CAPS"
 else
    EGL_NATIVE_PLATFORM="_EGL_INVALID_PLATFORM"
 fi

-AM_CONDITIONAL(HAVE_PLATFORM_X11, echo "$egl_platforms" | grep -q 'x11')
-AM_CONDITIONAL(HAVE_PLATFORM_WAYLAND, echo "$egl_platforms" | grep -q 'wayland')
-AM_CONDITIONAL(HAVE_EGL_PLATFORM_DRM, echo "$egl_platforms" | grep -q 'drm')
-AM_CONDITIONAL(HAVE_EGL_PLATFORM_SURFACELESS, echo "$egl_platforms" | grep -q 'surfaceless')
-AM_CONDITIONAL(HAVE_EGL_PLATFORM_ANDROID, echo "$egl_platforms" | grep -q 'android')
-
 AC_SUBST([EGL_NATIVE_PLATFORM])
 AC_SUBST([EGL_CFLAGS])

 # If we don't have the X11 platform, set this define so we don't try to include
 # the X11 headers.
-if ! echo "$egl_platforms" | grep -q 'x11'; then
+if ! echo "$platforms" | grep -q 'x11'; then
    DEFINES="$DEFINES -DMESA_EGL_NO_X11_HEADERS"
    GL_PC_CFLAGS="$GL_PC_CFLAGS -DMESA_EGL_NO_X11_HEADERS"
 fi
@@ -2317,7 +2357,7 @@ dnl DRM is needed by X, Wayland, and offscreen rendering.
 dnl Surfaceless is an alternative for the last one.
 dnl
 require_basic_egl() {
-    case "$with_egl_platforms" in
+    case "$with_platforms" in
        *drm*|*surfaceless*)
            ;;
        *)
@@ -2477,6 +2517,10 @@ if test -n "$with_gallium_drivers"; then
    done
 fi

+# XXX: Keep in sync with LLVM_REQUIRED_SWR
+AM_CONDITIONAL(SWR_INVALID_LLVM_VERSION, test "x$LLVM_VERSION" != x3.9.0 -a \
+                                              "x$LLVM_VERSION" != x3.9.1)
+
 if test "x$enable_llvm" = "xyes" -a "$with_gallium_drivers"; then
    llvm_require_version $LLVM_REQUIRED_GALLIUM "gallium"
    llvm_add_default_components "gallium"
@@ -2872,7 +2916,7 @@ else
    echo "        GBM:             no"
 fi

-    echo "        EGL/Vulkan/VL platforms:   $egl_platforms"
+    echo "        EGL/Vulkan/VL platforms:   $platforms"

 # Vulkan
 echo ""
--- a/docs/egl.html
+++ b/docs/egl.html
@@ -77,15 +77,13 @@ drivers will be installed to <code>${libdir}/egl</code>.</p>

 </dd>

-<dt><code>--with-egl-platforms</code></dt>
+<dt><code>--with-platforms</code></dt>
 <dd>

 <p>List the platforms (window systems) to support.  Its argument is a comma
-separated string such as <code>--with-egl-platforms=x11,drm</code>.  It decides
+separated string such as <code>--with-platforms=x11,drm</code>.  It decides
 the platforms a driver may support.  The first listed platform is also used by
-the main library to decide the native platform: this defines EGL native
-types such as <code>EGLNativeDisplayType</code> or
-<code>EGLNativeWindowType</code>.</p>
+the main library to decide the native platform.</p>

 <p>The available platforms are <code>x11</code>, <code>drm</code>,
 <code>wayland</code>, <code>surfaceless</code>, <code>android</code>,
@@ -167,9 +165,9 @@ binaries.</p>
 <dd>

 <p>This variable specifies the native platform.  The valid values are the same
-as those for <code>--with-egl-platforms</code>.  When the variable is not set,
+as those for <code>--with-platforms</code>.  When the variable is not set,
 the main library uses the first platform listed in
-<code>--with-egl-platforms</code> as the native platform.</p>
+<code>--with-platforms</code> as the native platform.</p>

 <p>Extensions like <code>EGL_MESA_drm_display</code> define new functions to
 create displays for non-native platforms.  These extensions are usually used by
--- a/docs/releasing.html
+++ b/docs/releasing.html
@@ -402,7 +402,7 @@ Here is one solution that I've been using.
 		--enable-glx-tls \
 		--enable-gbm \
 		--enable-egl \
-		--with-egl-platforms=x11,drm,wayland
+		--with-platforms=x11,drm,wayland,surfaceless
 	make -j2 &amp;&amp; DESTDIR=`pwd`/test make -j6 install
 	__glxinfo_cmd='glxinfo 2>&amp;1 | egrep -o "Mesa.*|Gallium.*|.*dri\.so"'
 	__glxgears_cmd='glxgears 2>&amp;1 | grep -v "configuration file"'
--- a/docs/relnotes/17.1.0.html
+++ b/docs/relnotes/17.1.0.html
@@ -14,7 +14,7 @@
 <iframe src="../contents.html"></iframe>
 <div class="content">

-<h1>Mesa 17.1.0 Release Notes / TBD</h1>
+<h1>Mesa 17.1.0 Release Notes / May 10, 2017</h1>

 <p>
 Mesa 17.1.0 is a new development release.
@@ -33,7 +33,8 @@ because compatibility contexts are not supported.

 <h2>SHA256 checksums</h2>
 <pre>
-TBD.
+c388069581a72853161657ac365f2c083afabd7cffd53f80513dacfa1cfa58a8  mesa-17.1.0.tar.gz
+cf234a6ed4764673886b6661553b54675776ef0898f774716173cec890ac3b17  mesa-17.1.0.tar.xz
 </pre>


@@ -63,6 +64,147 @@ Note: some of the new features are only available with certain drivers.
 <h2>Bug fixes</h2>

 <ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=68504">Bug 68504</a> - 9.2-rc1 workaround for clover build failure on ppc/altivec: cannot convert 'bool' to '__vector(4) __bool int' in return</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=84325">Bug 84325</a> - X.Org segfaults when starting DE on an Intel+Radeon laptop, caused by libpciaccess cleanup, patch attached</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93089">Bug 93089</a> - mesa fails to check for gcc atomic primitives before using them</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95460">Bug 95460</a> - Please add more drivers (freedreno, virgl) to features.txt status document</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96743">Bug 96743</a> - [BYT, HSW, SKL, BXT, KBL] GPU hangs with GfxBench 4.0 CarChase</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97102">Bug 97102</a> - [dri][swr] stack overflow / infinite loop with GALLIUM_DRIVER=swr</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97338">Bug 97338</a> - Black squares in the Spec Ops: The Line chapter select screen</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97524">Bug 97524</a> - Samplers referring to the same texture unit with different types should raise GL_INVALID_OPERATION</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97967">Bug 97967</a> - glsl/tests/cache-test regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97988">Bug 97988</a> - [radeonsi] playing back videos with VDPAU exhibits deinterlacing/anti-aliasing issues not visible with VA-API</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98263">Bug 98263</a> - [radv] The Talos Principle fails to launch with &quot;Fatal error: Cannot set display mode.&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98428">Bug 98428</a> - Undefined non-weak-symbol in dri-drivers</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98502">Bug 98502</a> - Delay when starting firefox, thunderbird or chromium and dmesg spam</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98869">Bug 98869</a> - Electronic Super Joy graphic artefacts (regression,bisected)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98975">Bug 98975</a> - Wasteland 2 Directors Cut: Hangs. GPU fault</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99010">Bug 99010</a> - --disable-gallium-llvm no longer recognized</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99246">Bug 99246</a> - [d3dadapter+radeonsi &amp; bisect] EVE-Online : hang on wormhole sight</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99265">Bug 99265</a> - i965: Piglit egl_khr_gl_renderbuffer_image-clear-shared-image fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99339">Bug 99339</a> - Blender line rendering broken after removing XY clipping of lines</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99401">Bug 99401</a> - [g33] regression: piglit.spec.!opengl 1_0.gl-1_0-beginend-coverage</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99450">Bug 99450</a> - [amdgpu] Payday 2 visual glitches on some models</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99451">Bug 99451</a> - polygon offset use after free</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99456">Bug 99456</a> - Firefox crashing when opening about:support with WebGL2 enabled</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99465">Bug 99465</a> - vtn_vector_construct writing out of bounds when given multiple non-zero length sources</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99484">Bug 99484</a> - Crusader Kings 2 - Loading bars, siege bars, morale bars, etc. do not render correctly</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99532">Bug 99532</a> - Compute shader doesn't give right result under some circumstances</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99542">Bug 99542</a> - vdpau  logging errors since gallium/radeon: adjust the rule for using the LINEAR_ALIGNED layout</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99631">Bug 99631</a> - segfault with OSVRTrackerView and openscenegraph git master</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99633">Bug 99633</a> - rasterizer/core/clip.h:279:49: error: ‘const struct API_STATE’ has no member named ‘linkageCount’</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99660">Bug 99660</a> - Not all of the int64 conversion opcodes got implemented</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99677">Bug 99677</a> - heap-use-after-free in glsl</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99692">Bug 99692</a> - [radv] Mostly broken on Hawaii PRO/CIK ASICs</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99701">Bug 99701</a> - loader.c:353:8: error: implicit declaration of function 'geteuid' is invalid in C99 [-Werror,-Wimplicit-function-declaration]</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99715">Bug 99715</a> - Don't print: &quot;Note: Buggy applications may crash, if they do please report to vendor&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99789">Bug 99789</a> - Memory leak on failure to create an ir_constant in calculate_iterations in loop_controls.cpp</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99817">Bug 99817</a> - [softpipe] piglit glsl-fs-tan-1 regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99842">Bug 99842</a> - GL_ARB_transform_feedback2 on i965 gen6</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99850">Bug 99850</a> - Tessellation bug on Carrizo</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99918">Bug 99918</a> - disk_cache.h:57:20: error: no member named 'st_mtim' in 'struct stat'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99953">Bug 99953</a> - device9.c:122:49: error: ‘PIPE_CAP_USER_INDEX_BUFFERS’ undeclared (first use in this function)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99955">Bug 99955</a> - [r600g] GPU load always displayed at 100% with GALLIUM_HUD=GPU-load</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100026">Bug 100026</a> - piglit.spec.arb_shader_subroutine.compiler.direct-call_vert regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100049">Bug 100049</a> - &quot;ralloc: Make sure ralloc() allocations match malloc()'s alignment.&quot; causes seg fault in 32bit build</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100060">Bug 100060</a> - wsi/wsi_common_wayland.c:25:41: fatal error: wayland-drm-client-protocol.h: No such file or directory</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100061">Bug 100061</a> - LODQ instruction generated with invalid dst mask</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100068">Bug 100068</a> - LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.buffer.load.format</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100088">Bug 100088</a> - piglit.spec.arb_get_texture_sub_image.arb_get_texture_sub_image regressions</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100091">Bug 100091</a> - Failure to create folder for on-disk shader cache</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100133">Bug 100133</a> - swr_context.cpp:336:44: error: invalid conversion from ‘uint {aka unsigned int}’ to ‘pipe_render_cond_flag’ [-fpermissive]</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100154">Bug 100154</a> - test_eu_compact regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100180">Bug 100180</a> - Build failure in GNOME Continuous</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100182">Bug 100182</a> - Flickering in The Talos Principle on Sky Lake GT4.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100201">Bug 100201</a> - Windows scons build with MSVC toolchain and LLVM 4.0 fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100223">Bug 100223</a> - marshal_generated.c:38:10: fatal error: 'X11/Xlib-xcb.h' file not found</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100236">Bug 100236</a> - Undefined symbols for architecture x86_64: &quot;typeinfo for llvm::RTDyldMemoryManager&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100259">Bug 100259</a> - [EGL] [GBM] undefined reference to `gbm_bo_create_with_modifiers'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100288">Bug 100288</a> - clover unable to run OpenCL kernels since 03127bb radeonsi: compile all TGSI compute shaders asynchronously</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100303">Bug 100303</a> - Adding a single, meaningless if-else to a shader source leads to different image</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100391">Bug 100391</a> - SachaWillems deferredmultisampling asserts</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100452">Bug 100452</a> - push_constants host memory leak when resetting command buffer</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100531">Bug 100531</a> - [regression] Broken graphics in several games</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100562">Bug 100562</a> - u_debug_stack.c:59: undefined reference to `_Ux86_64_getcontext'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100569">Bug 100569</a> - core/resource.cpp:36:33: error: non-constant-expression cannot be narrowed from type 'int' to 'int16_t' (aka 'short') in initializer list [-Wc++11-narrowing]</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100574">Bug 100574</a> - anv_device.c:189: undefined reference to `anv_gem_supports_48b_addresses'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100582">Bug 100582</a> - [GEN8+] piglit.spec.arb_stencil_texturing.glblitframebuffer corrupts state.gl_texture* assertions</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100600">Bug 100600</a> - anv_device.c:1337: undefined reference to `anv_gem_busy'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100620">Bug 100620</a> - [SKL] 48-bit addresses break DOOM</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100663">Bug 100663</a> - commit 61e47d92c5196 breaks RS780</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100690">Bug 100690</a> - [Regression, bisected] TotalWar: Warhammer corrupted graphics</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100892">Bug 100892</a> - Polaris 12: winsys init bad switch (missing break) initializing addrlib</li>
+
 </ul>

 <h2>Changes</h2>
--- a/docs/relnotes/17.1.1.html
+++ b/docs/relnotes/17.1.1.html
@@ -0,0 +1,188 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 17.1.1 Release Notes / March 25, 2017</h1>
+
+<p>
+Mesa 17.1.1 is a bug fix release which fixes bugs found since the 17.1.0 release.
+</p>
+<p>
+Mesa 17.1.1 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+652315af87f2bb015ce99ee3b90d9d115d53cbf9e052493bd13d521a753b1930  mesa-17.1.1.tar.gz
+aed503f94c0c1630a162a3e276f4ee12a86764cee4cb92338ea2dea99a04e7ef  mesa-17.1.1.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100854">Bug 100854</a> - YUV to RGB Color Space Conversion result is not precise</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100925">Bug 100925</a> - [HSW/BSW/BDW/SKL] Google Earth is not resolving all the details in the map correctly</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Alex Deucher (1):</p>
+<ul>
+  <li>radeonsi: add new vega10 pci ids</li>
+</ul>
+
+<p>Andres Gomez (2):</p>
+<ul>
+  <li>bin/get-fixes-pick-list.sh: don't warn if more than one, go over them</li>
+  <li>bin/get-fixes-pick-list.sh: bring back the warning</li>
+</ul>
+
+<p>Bruce Cherniak (1):</p>
+<ul>
+  <li>swr: move msaa resolve to generalized StoreTile</li>
+</ul>
+
+<p>Chad Versace (1):</p>
+<ul>
+  <li>egl: Partially revert 23c86c74, fix eglMakeCurrent</li>
+</ul>
+
+<p>Chih-Wei Huang (1):</p>
+<ul>
+  <li>Android: correct libz dependency</li>
+</ul>
+
+<p>Daniel Stone (1):</p>
+<ul>
+  <li>gbm/dri: Fix sign-extension in modifier query</li>
+</ul>
+
+<p>Emil Velikov (6):</p>
+<ul>
+  <li>docs: add sha256 checksums for 17.1.0</li>
+  <li>radeon: automake: remove unneeded elf Cflags/Libs</li>
+  <li>configure: remove unneeded bits around libunwind handling</li>
+  <li>egl: add g_egldispatchstubs.h to the release tarball</li>
+  <li>automake: add SWR LLVM gen_builder.hpp workaround</li>
+  <li>Update version to 17.1.1</li>
+</ul>
+
+<p>Eric Anholt (2):</p>
+<ul>
+  <li>renderonly: Initialize fields of struct winsys_handle.</li>
+  <li>vc4: Don't allocate new BOs to avoid synchronization when they're shared.</li>
+</ul>
+
+<p>Grazvydas Ignotas (2):</p>
+<ul>
+  <li>anv: fix possible stack corruption</li>
+  <li>anv: don't leak DRM devices</li>
+</ul>
+
+<p>Hans de Goede (1):</p>
+<ul>
+  <li>glxglvnddispatch: Add missing dispatch for GetDriverConfig</li>
+</ul>
+
+<p>Ilia Mirkin (1):</p>
+<ul>
+  <li>nvc0/ir: SHLADD's middle source must be an immediate</li>
+</ul>
+
+<p>Johnson Lin (1):</p>
+<ul>
+  <li>nir/lower_tex: Fix minor error in YUV color conversion matrix</li>
+</ul>
+
+<p>Juan A. Suarez Romero (2):</p>
+<ul>
+  <li>bin/get-{extra,fixes}-pick-list.sh: add support for ignore list</li>
+  <li>bin/get-{extra,fixes}-pick-list.sh: improve output</li>
+</ul>
+
+<p>Lucas Stach (2):</p>
+<ul>
+  <li>etnaviv: stop oversizing buffer resources</li>
+  <li>etnaviv: allow R/B swapped surfaces to be cleared</li>
+</ul>
+
+<p>Marek Olšák (2):</p>
+<ul>
+  <li>amd/addrlib: import Raven support</li>
+  <li>radeonsi/gfx9: add support for Raven</li>
+</ul>
+
+<p>Nanley Chery (2):</p>
+<ul>
+  <li>anv/formats: Update the three-channel BC1 mappings</li>
+  <li>i965/formats: Update the three-channel DXT1 mappings</li>
+</ul>
+
+<p>Nicolai Hähnle (5):</p>
+<ul>
+  <li>radeonsi: mark fast-cleared textures as compressed when dirtying</li>
+  <li>radeonsi: fix primitive ID in fragment shader when using tessellation</li>
+  <li>radeonsi: fix gl_PrimitiveID in tessellation with instanced draws on SI</li>
+  <li>radeonsi: fix gl_PrimitiveIDIn in geometry shader when using tessellation</li>
+  <li>st/mesa: remove an incorrect assertion</li>
+</ul>
+
+<p>Pohjolainen, Topi (1):</p>
+<ul>
+  <li>intel/isl/gen7: Use stencil vertical alignment of 8 instead of 4</li>
+</ul>
+
+<p>Rob Clark (2):</p>
+<ul>
+  <li>mesa/st: fix yuv EGLImage's</li>
+  <li>freedreno: fix crash when flush() but no rendering</li>
+</ul>
+
+<p>Rob Herring (1):</p>
+<ul>
+  <li>virgl: fix virgl_bo_transfer_{put, get} box struct copy</li>
+</ul>
+
+<p>Samuel Iglesias Gonsálvez (3):</p>
+<ul>
+  <li>i965/vec4/gs: restore the uniform values which was overwritten by failed vec4_gs_visitor execution</li>
+  <li>i965/vec4: fix swizzle and writemask when loading an uniform with constant offset</li>
+  <li>i965/vec4: load dvec3/4 uniforms first in the push constant buffer</li>
+</ul>
+
+<p>Tom Stellard (1):</p>
+<ul>
+  <li>gallivm: Make sure module has the correct data layout when pass manager runs</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/17.1.2.html
+++ b/docs/relnotes/17.1.2.html
@@ -0,0 +1,186 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 17.1.2 Release Notes / June 5, 2017</h1>
+
+<p>
+Mesa 17.1.2 is a bug fix release which fixes bugs found since the 17.1.1 release.
+</p>
+<p>
+Mesa 17.1.2 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+TBD
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98833">Bug 98833</a> - [REGRESSION, bisected] Wayland revert commit breaks non-Vsync fullscreen frame updates</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100741">Bug 100741</a> - Chromium - Memory leak</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100877">Bug 100877</a> - vulkan/tests/block_pool_no_free regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101110">Bug 101110</a> - Build failure in GNOME Continuous</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Bartosz Tomczyk (1):</p>
+<ul>
+  <li>mesa: Avoid leaking surface in st_renderbuffer_delete</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (1):</p>
+<ul>
+  <li>radv: Reserve space for descriptor and push constant user SGPR setting.</li>
+</ul>
+
+<p>Daniel Stone (7):</p>
+<ul>
+  <li>vulkan: Fix Wayland uninitialised registry</li>
+  <li>vulkan/wsi/wayland: Remove roundtrip when creating image</li>
+  <li>vulkan/wsi/wayland: Use per-display event queue</li>
+  <li>vulkan/wsi/wayland: Use proxy wrappers for swapchain</li>
+  <li>egl/wayland: Don't open-code roundtrip</li>
+  <li>egl/wayland: Use per-surface event queues</li>
+  <li>egl/wayland: Ensure we get a back buffer</li>
+</ul>
+
+<p>Emil Velikov (24):</p>
+<ul>
+  <li>docs: add sha256 checksums for 17.1.1</li>
+  <li>configure: move platform handling further up</li>
+  <li>configure: rename remaining HAVE_EGL_PLATFORM_* guards</li>
+  <li>configure: update remaining --with-egl-platforms references</li>
+  <li>configure: loosen --with-platforms heuristics</li>
+  <li>configure: enable the surfaceless platform by default</li>
+  <li>configure: set HAVE_foo_PLATFORM as applicable</li>
+  <li>configure: error out when building GLX w/o the X11 platform</li>
+  <li>configure: check once for DRI3 dependencies</li>
+  <li>loader: build libloader_dri3_helper.la only with HAVE_PLATFORM_X11</li>
+  <li>configure: error out when building X11 Vulkan without DRI3</li>
+  <li>auxiliary/vl: use vl_*_screen_create stubs when building w/o platform</li>
+  <li>st/va: fix misplaced closing bracket</li>
+  <li>st/omx: remove unneeded X11 include</li>
+  <li>st/omx: fix building against X11-less setups</li>
+  <li>gallium/targets: link against XCB only as needed</li>
+  <li>configure: error out if building VA w/o supported platform</li>
+  <li>configure: error out if building OMX w/o supported platform</li>
+  <li>configure: error out if building VDPAU w/o supported platform</li>
+  <li>configure: error out if building XVMC w/o supported platform</li>
+  <li>travis: remove workarounds for the Vulkan target</li>
+  <li>anv: automake: list shared libraries after the static ones</li>
+  <li>radv: automake: list shared libraries after the static ones</li>
+  <li>egl/wayland: select the format based on the interface used</li>
+</ul>
+
+<p>Ian Romanick (3):</p>
+<ul>
+  <li>r100: Don't assume that the base mipmap of a texture exists</li>
+  <li>r100,r200: Don't assume glVisual is non-NULL during context creation</li>
+  <li>r100: Use _mesa_get_format_base_format in radeon_update_wrapper</li>
+</ul>
+
+<p>Jason Ekstrand (17):</p>
+<ul>
+  <li>anv: Handle color layout transitions from the UNINITIALIZED layout</li>
+  <li>anv: Handle transitioning depth from UNDEFINED to other layouts</li>
+  <li>anv/image: Get rid of the memset(aux, 0, sizeof(aux)) hack</li>
+  <li>anv: Predicate 48bit support on gen &gt;= 8</li>
+  <li>anv: Set up memory types and heaps during physical device init</li>
+  <li>anv: Set image memory types based on the type count</li>
+  <li>i965/blorp: Do and end-of-pipe sync on both sides of fast-clear ops</li>
+  <li>i965: Round copy size to the nearest block in intel_miptree_copy</li>
+  <li>anv: Set EXEC_OBJECT_ASYNC when available</li>
+  <li>anv: Determine the type of mapping based on type metadata</li>
+  <li>anv: Add valid_bufer_usage to the memory type metadata</li>
+  <li>anv: Stop setting BO flags in bo_init_new</li>
+  <li>anv: Make supports_48bit_addresses a heap property</li>
+  <li>anv: Refactor memory type setup</li>
+  <li>anv: Advertise both 32-bit and 48-bit heaps when we have enough memory</li>
+  <li>i965: Rework Sandy Bridge HiZ and stencil layouts</li>
+  <li>anv: Require vertex buffers to come from a 32-bit heap</li>
+</ul>
+
+<p>Juan A. Suarez Romero (13):</p>
+<ul>
+  <li>Revert "android: fix segfault within swap_buffers"</li>
+  <li>cherry-ignore: radeonsi: load patch_id for TES-as-ES when exporting for PS</li>
+  <li>cherry-ignore: anv: Determine the type of mapping based on type metadata</li>
+  <li>cherry-ignore: anv: Stop setting BO flags in bo_init_new</li>
+  <li>cherry-ignore: anv: Make supports_48bit_addresses a heap property</li>
+  <li>cherry-ignore: anv: Advertise both 32-bit and 48-bit heaps when we have enough memory</li>
+  <li>cherry-ignore: anv: Require vertex buffers to come from a 32-bit heap</li>
+  <li>cherry-ignore: radv: fix regression in descriptor set freeing</li>
+  <li>cherry-ignore: anv: Add valid_bufer_usage to the memory type metadata</li>
+  <li>cherry-ignore: anv: Refactor memory type setup</li>
+  <li>Revert "cherry-ignore: anv: [...]"</li>
+  <li>Revert "cherry-ignore: anv: Require vertex buffers to come from a 32-bit heap"</li>
+  <li>Update version to 17.1.2</li>
+</ul>
+
+<p>Marek Olšák (1):</p>
+<ul>
+  <li>radeonsi/gfx9: compile shaders with +xnack</li>
+</ul>
+
+<p>Nicolai Hähnle (1):</p>
+<ul>
+  <li>st/mesa: remove redundant stfb-&gt;iface checks</li>
+</ul>
+
+<p>Nicolas Boichat (1):</p>
+<ul>
+  <li>configure.ac: Also match -androideabi tuple</li>
+</ul>
+
+<p>Rob Clark (1):</p>
+<ul>
+  <li>freedreno: fix fence creation fail if no rendering</li>
+</ul>
+
+<p>Tapani Pälli (1):</p>
+<ul>
+  <li>egl/android: fix segfault within swap_buffers</li>
+</ul>
+
+<p>Timothy Arceri (1):</p>
+<ul>
+  <li>st/mesa: don't mark the program as in cache_fallback when there is cache miss</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/include/pci_ids/radeonsi_pci_ids.h
+++ b/include/pci_ids/radeonsi_pci_ids.h
@@ -219,6 +219,10 @@ CHIPSET(0x6860, VEGA10_, VEGA10)
 CHIPSET(0x6861, VEGA10_, VEGA10)
 CHIPSET(0x6862, VEGA10_, VEGA10)
 CHIPSET(0x6863, VEGA10_, VEGA10)
+CHIPSET(0x6864, VEGA10_, VEGA10)
 CHIPSET(0x6867, VEGA10_, VEGA10)
+CHIPSET(0x6868, VEGA10_, VEGA10)
 CHIPSET(0x687F, VEGA10_, VEGA10)
 CHIPSET(0x686C, VEGA10_, VEGA10)
+
+CHIPSET(0x15DD, RAVEN_, RAVEN)
--- a/src/amd/Makefile.common.am
+++ b/src/amd/Makefile.common.am
@@ -65,6 +65,8 @@ common_libamd_common_la_SOURCES += $(AMD_NIR_FILES)
 endif
 endif

+common_libamd_common_la_LIBADD = $(LIBELF_LIBS)
+
 common/sid_tables.h: $(srcdir)/common/sid_tables.py $(srcdir)/common/sid.h $(srcdir)/common/gfx9d.h
 	$(AM_V_at)$(MKDIR_P) $(@D)
 	$(AM_V_GEN) $(PYTHON2) $(srcdir)/common/sid_tables.py $(srcdir)/common/sid.h $(srcdir)/common/gfx9d.h > $@
--- a/src/amd/addrlib/gfx9/gfx9addrlib.cpp
+++ b/src/amd/addrlib/gfx9/gfx9addrlib.cpp
@@ -1193,6 +1193,20 @@ ChipFamily Gfx9Lib::HwlConvertChipFamily(
            m_settings.depthPipeXorDisable = 1;
            break;

+        case FAMILY_RV:
+            m_settings.isArcticIsland = 1;
+            m_settings.isRaven        = ASICREV_IS_RAVEN(uChipRevision);
+
+            if (m_settings.isRaven)
+            {
+                m_settings.isDcn1   = 1;
+            }
+
+            m_settings.metaBaseAlignFix = 1;
+
+            m_settings.depthPipeXorDisable = 1;
+            break;
+
        default:
            ADDR_ASSERT(!"This should be a Fusion");
            break;
@@ -2734,6 +2748,35 @@ BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
                break;
        }
    }
+    else if (m_settings.isDcn1)
+    {
+        switch (swizzleMode)
+        {
+            case ADDR_SW_4KB_D:
+            case ADDR_SW_64KB_D:
+            case ADDR_SW_VAR_D:
+            case ADDR_SW_64KB_D_T:
+            case ADDR_SW_4KB_D_X:
+            case ADDR_SW_64KB_D_X:
+            case ADDR_SW_VAR_D_X:
+                support = (pIn->bpp == 64);
+                break;
+
+            case ADDR_SW_LINEAR:
+            case ADDR_SW_4KB_S:
+            case ADDR_SW_64KB_S:
+            case ADDR_SW_VAR_S:
+            case ADDR_SW_64KB_S_T:
+            case ADDR_SW_4KB_S_X:
+            case ADDR_SW_64KB_S_X:
+            case ADDR_SW_VAR_S_X:
+                support = (pIn->bpp <= 64);
+                break;
+
+            default:
+                break;
+        }
+    }
    else
    {
        ADDR_NOT_IMPLEMENTED();
@@ -3195,6 +3238,20 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
                        // DCE12 does not support display surface to be _T swizzle mode
                        prtXor = FALSE;
                    }
+                    else if (m_settings.isDcn1)
+                    {
+                        // _R is not supported by Dcn1
+                        if (pIn->bpp == 64)
+                        {
+                            swType = ADDR_SW_D;
+                        }
+                        else
+                        {
+                            swType = ADDR_SW_S;
+                        }
+
+                        blockSet.micro = FALSE;
+                    }
                    else
                    {
                        ADDR_NOT_IMPLEMENTED();
--- a/src/amd/addrlib/gfx9/gfx9addrlib.h
+++ b/src/amd/addrlib/gfx9/gfx9addrlib.h
@@ -54,11 +54,13 @@ struct Gfx9ChipSettings
        // Asic/Generation name
        UINT_32 isArcticIsland      : 1;
        UINT_32 isVega10            : 1;
-        UINT_32 reserved0           : 30;
+        UINT_32 isRaven             : 1;
+        UINT_32 reserved0           : 29;

        // Display engine IP version name
        UINT_32 isDce12             : 1;
-        UINT_32 reserved1           : 31;
+        UINT_32 isDcn1              : 1;
+        UINT_32 reserved1           : 29;

        // Misc configuration bits
        UINT_32 metaBaseAlignFix    : 1;
@@ -201,7 +203,7 @@ protected:

        if (IsXor(swizzleMode))
        {
-            if (m_settings.isVega10)
+            if (m_settings.isVega10 || m_settings.isRaven)
            {
                baseAlign = GetBlockSize(swizzleMode);
            }
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1230,6 +1230,33 @@ static LLVMValueRef emit_b2f(struct nir_to_llvm_context *ctx,
 	return LLVMBuildAnd(ctx->builder, src0, LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), "");
 }

+static LLVMValueRef emit_f2f16(struct nir_to_llvm_context *ctx,
+			       LLVMValueRef src0)
+{
+	LLVMValueRef result;
+	LLVMValueRef cond;
+
+	src0 = to_float(ctx, src0);
+	result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, "");
+
+	/* TODO SI/CIK options here */
+	if (ctx->options->chip_class >= VI) {
+		LLVMValueRef args[2];
+		/* Check if the result is a denormal - and flush to 0 if so. */
+		args[0] = result;
+		args[1] = LLVMConstInt(ctx->i32, N_SUBNORMAL | P_SUBNORMAL, false);
+		cond = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.class.f16", ctx->i1, args, 2, AC_FUNC_ATTR_READNONE);
+	}
+
+	/* need to convert back up to f32 */
+	result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
+
+	if (ctx->options->chip_class >= VI)
+		result = LLVMBuildSelect(ctx->builder, cond, ctx->f32zero, result, "");
+
+	return result;
+}
+
 static LLVMValueRef emit_umul_high(struct nir_to_llvm_context *ctx,
 				   LLVMValueRef src0, LLVMValueRef src1)
 {
@@ -1626,10 +1653,18 @@ static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr)
 	case nir_op_fmax:
 		result = emit_intrin_2f_param(ctx, "llvm.maxnum",
 		                              to_float_type(ctx, def_type), src[0], src[1]);
+		if (instr->dest.dest.ssa.bit_size == 32)
+			result = emit_intrin_1f_param(ctx, "llvm.canonicalize",
+						      to_float_type(ctx, def_type),
+						      result);
 		break;
 	case nir_op_fmin:
 		result = emit_intrin_2f_param(ctx, "llvm.minnum",
 		                              to_float_type(ctx, def_type), src[0], src[1]);
+		if (instr->dest.dest.ssa.bit_size == 32)
+			result = emit_intrin_1f_param(ctx, "llvm.canonicalize",
+						      to_float_type(ctx, def_type),
+						      result);
 		break;
 	case nir_op_ffma:
 		result = emit_intrin_3f_param(ctx, "llvm.fma",
@@ -1717,10 +1752,7 @@ static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr)
 		result = emit_b2f(ctx, src[0]);
 		break;
 	case nir_op_fquantize2f16:
-		src[0] = to_float(ctx, src[0]);
-		result = LLVMBuildFPTrunc(ctx->builder, src[0], ctx->f16, "");
-		/* need to convert back up to f32 */
-		result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
+		result = emit_f2f16(ctx, src[0]);
 		break;
 	case nir_op_umul_high:
 		result = emit_umul_high(ctx, src[0], src[1]);
--- a/src/amd/common/amd_family.h
+++ b/src/amd/common/amd_family.h
@@ -93,6 +93,7 @@ enum radeon_family {
    CHIP_POLARIS11,
    CHIP_POLARIS12,
    CHIP_VEGA10,
+    CHIP_RAVEN,
    CHIP_LAST,
 };

--- a/src/amd/common/amdgpu_id.h
+++ b/src/amd/common/amdgpu_id.h
@@ -49,6 +49,7 @@ enum {
 	FAMILY_CZ,
 	FAMILY_PI,
 	FAMILY_AI,
+	FAMILY_RV,
 	FAMILY_LAST,
 };

@@ -185,4 +186,13 @@ enum {
 #define ASICREV_IS_VEGA10_P(eChipRev) \
   ((eChipRev) >= AI_VEGA10_P_A0 && (eChipRev) < AI_UNKNOWN)

+/* RV specific rev IDs */
+enum {
+   RAVEN_A0      = 0x01,
+   RAVEN_UNKNOWN = 0xFF
+};
+
+#define ASICREV_IS_RAVEN(eChipRev) \
+   ((eChipRev) >= RAVEN_A0 && (eChipRev) < RAVEN_UNKNOWN)
+
 #endif /* AMDGPU_ID_H */
--- a/src/amd/common/sid.h
+++ b/src/amd/common/sid.h
@@ -9094,5 +9094,18 @@
 #define    CIK_SDMA_PACKET_SRBM_WRITE              0xe
 #define    CIK_SDMA_COPY_MAX_SIZE                  0x3fffe0

+enum amd_cmp_class_flags {
+	S_NAN = 1 << 0,        // Signaling NaN
+	Q_NAN = 1 << 1,        // Quiet NaN
+	N_INFINITY = 1 << 2,   // Negative infinity
+	N_NORMAL = 1 << 3,     // Negative normal
+	N_SUBNORMAL = 1 << 4,  // Negative subnormal
+	N_ZERO = 1 << 5,       // Negative zero
+	P_ZERO = 1 << 6,       // Positive zero
+	P_SUBNORMAL = 1 << 7,  // Positive subnormal
+	P_NORMAL = 1 << 8,     // Positive normal
+	P_INFINITY = 1 << 9    // Positive infinity
+};
+
 #endif /* _SID_H */

--- a/src/amd/vulkan/Makefile.am
+++ b/src/amd/vulkan/Makefile.am
@@ -59,8 +59,22 @@ VULKAN_SOURCES = \
 	$(VULKAN_GENERATED_FILES) \
 	$(VULKAN_FILES)

-VULKAN_LIB_DEPS =
-
+VULKAN_LIB_DEPS = \
+	libvulkan_common.la \
+	$(top_builddir)/src/vulkan/libvulkan_util.la \
+	$(top_builddir)/src/vulkan/libvulkan_wsi.la \
+	$(top_builddir)/src/amd/common/libamd_common.la \
+	$(top_builddir)/src/amd/addrlib/libamdgpu_addrlib.la \
+	$(top_builddir)/src/compiler/nir/libnir.la \
+	$(top_builddir)/src/util/libmesautil.la \
+	$(LLVM_LIBS) \
+	$(LIBELF_LIBS) \
+	$(PTHREAD_LIBS) \
+	$(AMDGPU_LIBS) \
+	$(LIBDRM_LIBS) \
+	$(PTHREAD_LIBS) \
+	$(DLOPEN_LIBS) \
+	-lm

 if HAVE_PLATFORM_X11
 AM_CPPFLAGS += \
@@ -70,8 +84,7 @@ AM_CPPFLAGS += \

 VULKAN_SOURCES += $(VULKAN_WSI_X11_FILES)

-# FIXME: Use pkg-config for X11-xcb ldflags.
-VULKAN_LIB_DEPS += $(XCB_DRI3_LIBS) -lX11-xcb
+VULKAN_LIB_DEPS += $(XCB_DRI3_LIBS)
 endif


@@ -89,23 +102,6 @@ endif
 noinst_LTLIBRARIES = libvulkan_common.la
 libvulkan_common_la_SOURCES = $(VULKAN_SOURCES)

-VULKAN_LIB_DEPS += \
-	libvulkan_common.la \
-	$(top_builddir)/src/vulkan/libvulkan_util.la \
-	$(top_builddir)/src/vulkan/libvulkan_wsi.la \
-	$(top_builddir)/src/amd/common/libamd_common.la \
-	$(top_builddir)/src/amd/addrlib/libamdgpu_addrlib.la \
-	$(top_builddir)/src/compiler/nir/libnir.la \
-	$(top_builddir)/src/util/libmesautil.la \
-	$(LLVM_LIBS) \
-	$(LIBELF_LIBS) \
-	$(PTHREAD_LIBS) \
-	$(AMDGPU_LIBS) \
-	$(LIBDRM_LIBS) \
-	$(PTHREAD_LIBS) \
-	$(DLOPEN_LIBS) \
-	-lm
-
 nodist_EXTRA_libvulkan_radeon_la_SOURCES = dummy.cpp
 libvulkan_radeon_la_SOURCES = $(VULKAN_GEM_FILES)

--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -1334,6 +1334,10 @@ radv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer,
 	if (cmd_buffer->state.push_descriptors_dirty)
 		radv_flush_push_descriptors(cmd_buffer);

+	MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
+	                                                   cmd_buffer->cs,
+	                                                   MAX_SETS * MESA_SHADER_STAGES * 4);
+
 	for (i = 0; i < MAX_SETS; i++) {
 		if (!(cmd_buffer->state.descriptors_dirty & (1 << i)))
 			continue;
@@ -1345,6 +1349,7 @@ radv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer,
 	}
 	cmd_buffer->state.descriptors_dirty = 0;
 	cmd_buffer->state.push_descriptors_dirty = false;
+	assert(cmd_buffer->cs->cdw <= cdw_max);
 }

 static void
@@ -1373,6 +1378,8 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer,
 	va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->upload.upload_bo);
 	va += offset;

+	MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
+	                                                   cmd_buffer->cs, MESA_SHADER_STAGES * 4);
 	if (stages & VK_SHADER_STAGE_VERTEX_BIT)
 		radv_emit_userdata_address(cmd_buffer, pipeline, MESA_SHADER_VERTEX,
 					   AC_UD_PUSH_CONSTANTS, va);
@@ -1398,6 +1405,7 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer,
 					   AC_UD_PUSH_CONSTANTS, va);

 	cmd_buffer->push_constant_stages &= ~stages;
+	assert(cmd_buffer->cs->cdw <= cdw_max);
 }

 static void radv_emit_primitive_reset_state(struct radv_cmd_buffer *cmd_buffer,
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -1049,6 +1049,22 @@ VkResult radv_CreateDevice(
 			break;
 		}
 		device->ws->cs_finalize(device->flush_cs[family]);
+
+		device->flush_shader_cs[family] = device->ws->cs_create(device->ws, family);
+		switch (family) {
+		case RADV_QUEUE_GENERAL:
+		case RADV_QUEUE_COMPUTE:
+			si_cs_emit_cache_flush(device->flush_shader_cs[family],
+			                       device->physical_device->rad_info.chip_class,
+			                       family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
+					       family == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH) |
+			                       RADV_CMD_FLAG_INV_ICACHE |
+			                       RADV_CMD_FLAG_INV_SMEM_L1 |
+			                       RADV_CMD_FLAG_INV_VMEM_L1 |
+			                       RADV_CMD_FLAG_INV_GLOBAL_L2);
+			break;
+		}
+		device->ws->cs_finalize(device->flush_shader_cs[family]);
 	}

 	if (getenv("RADV_TRACE_FILE")) {
@@ -1124,6 +1140,8 @@ void radv_DestroyDevice(
 			device->ws->cs_destroy(device->empty_cs[i]);
 		if (device->flush_cs[i])
 			device->ws->cs_destroy(device->flush_cs[i]);
+		if (device->flush_shader_cs[i])
+			device->ws->cs_destroy(device->flush_shader_cs[i]);
 	}
 	radv_device_finish_meta(device);

@@ -1825,7 +1843,7 @@ VkResult radv_QueueSubmit(

 	for (uint32_t i = 0; i < submitCount; i++) {
 		struct radeon_winsys_cs **cs_array;
-		bool do_flush = !i;
+		bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
 		bool can_patch = !do_flush;
 		uint32_t advance;

@@ -1852,7 +1870,9 @@ VkResult radv_QueueSubmit(
 					        (pSubmits[i].commandBufferCount + do_flush));

 		if(do_flush)
-			cs_array[0] = queue->device->flush_cs[queue->queue_family_index];
+			cs_array[0] = pSubmits[i].waitSemaphoreCount ?
+				queue->device->flush_shader_cs[queue->queue_family_index] :
+				queue->device->flush_cs[queue->queue_family_index];

 		for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
 			RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
--- a/src/amd/vulkan/radv_formats.c
+++ b/src/amd/vulkan/radv_formats.c
@@ -597,13 +597,13 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical
 				tiled |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT;
 			}
 		}
-		if (util_is_power_of_two(vk_format_get_blocksize(format)) && !scaled) {
+		if (tiled && util_is_power_of_two(vk_format_get_blocksize(format)) && !scaled) {
 			tiled |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR |
 			         VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR;
 		}
 	}

-	if (util_is_power_of_two(vk_format_get_blocksize(format)) && !scaled) {
+	if (linear && util_is_power_of_two(vk_format_get_blocksize(format)) && !scaled) {
 		linear |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR |
 		          VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR;
 	}
--- a/src/amd/vulkan/radv_meta_clear.c
+++ b/src/amd/vulkan/radv_meta_clear.c
@@ -693,6 +693,9 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
 			   VK_IMAGE_ASPECT_STENCIL_BIT));
 	assert(pass_att != VK_ATTACHMENT_UNUSED);

+	if (!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
+		clear_value.depth = 1.0f;
+
 	const struct depthstencil_clear_vattrs vertex_data[3] = {
 		{
 			.position = {
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -495,7 +495,7 @@ struct radv_device {
 	int queue_count[RADV_MAX_QUEUE_FAMILIES];
 	struct radeon_winsys_cs *empty_cs[RADV_MAX_QUEUE_FAMILIES];
 	struct radeon_winsys_cs *flush_cs[RADV_MAX_QUEUE_FAMILIES];
-
+	struct radeon_winsys_cs *flush_shader_cs[RADV_MAX_QUEUE_FAMILIES];
 	uint64_t debug_flags;

 	bool llvm_supports_spill;
--- a/src/amd/vulkan/radv_wsi.c
+++ b/src/amd/vulkan/radv_wsi.c
@@ -460,16 +460,20 @@ VkResult radv_QueuePresentKHR(
 		RADV_FROM_HANDLE(wsi_swapchain, swapchain, pPresentInfo->pSwapchains[i]);
 		struct radeon_winsys_cs *cs;
 		const VkPresentRegionKHR *region = NULL;
+		VkResult item_result;

 		assert(radv_device_from_handle(swapchain->device) == queue->device);
 		if (swapchain->fences[0] == VK_NULL_HANDLE) {
-			result = radv_CreateFence(radv_device_to_handle(queue->device),
+			item_result = radv_CreateFence(radv_device_to_handle(queue->device),
 						  &(VkFenceCreateInfo) {
 							  .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
 								  .flags = 0,
 								  }, &swapchain->alloc, &swapchain->fences[0]);
-			if (result != VK_SUCCESS)
-				return result;
+			if (pPresentInfo->pResults != NULL)
+				pPresentInfo->pResults[i] = item_result;
+			result = result == VK_SUCCESS ? item_result : result;
+			if (item_result != VK_SUCCESS)
+				continue;
 		} else {
 			radv_ResetFences(radv_device_to_handle(queue->device),
 					 1, &swapchain->fences[0]);
@@ -493,12 +497,15 @@ VkResult radv_QueuePresentKHR(
 		if (regions && regions->pRegions)
 			region = &regions->pRegions[i];

-		result = swapchain->queue_present(swapchain,
+		item_result = swapchain->queue_present(swapchain,
 						  pPresentInfo->pImageIndices[i],
 						  region);
 		/* TODO: What if one of them returns OUT_OF_DATE? */
-		if (result != VK_SUCCESS)
-			return result;
+		if (pPresentInfo->pResults != NULL)
+			pPresentInfo->pResults[i] = item_result;
+		result = result == VK_SUCCESS ? item_result : result;
+		if (item_result != VK_SUCCESS)
+			continue;

 		VkFence last = swapchain->fences[2];
 		swapchain->fences[2] = swapchain->fences[1];
--- a/src/compiler/glsl/shader_cache.cpp
+++ b/src/compiler/glsl/shader_cache.cpp
@@ -1292,7 +1292,7 @@ shader_cache_read_program_metadata(struct gl_context *ctx,
      return false;

   struct disk_cache *cache = ctx->Cache;
-   if (!cache || prog->data->cache_fallback)
+   if (!cache || prog->data->cache_fallback || prog->data->skip_cache)
      return false;

   /* Include bindings when creating sha1. These bindings change the resulting
--- a/src/compiler/nir/nir_lower_tex.c
+++ b/src/compiler/nir/nir_lower_tex.c
@@ -244,9 +244,9 @@ convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex,
   nir_ssa_def *yuv =
      nir_vec4(b,
               nir_fmul(b, nir_imm_float(b, 1.16438356f),
-                        nir_fadd(b, y, nir_imm_float(b, -0.0625f))),
-               nir_channel(b, nir_fadd(b, u, nir_imm_float(b, -0.5f)), 0),
-               nir_channel(b, nir_fadd(b, v, nir_imm_float(b, -0.5f)), 0),
+                        nir_fadd(b, y, nir_imm_float(b, -16.0f / 255.0f))),
+               nir_channel(b, nir_fadd(b, u, nir_imm_float(b, -128.0f / 255.0f)), 0),
+               nir_channel(b, nir_fadd(b, v, nir_imm_float(b, -128.0f / 255.0f)), 0),
               nir_imm_float(b, 0.0));

   nir_ssa_def *red = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[0]));
--- a/src/egl/Makefile.am
+++ b/src/egl/Makefile.am
@@ -47,7 +47,6 @@ dri2_backend_FILES =
 dri3_backend_FILES =

 if HAVE_PLATFORM_X11
-AM_CFLAGS += -DHAVE_X11_PLATFORM
 AM_CFLAGS += $(XCB_DRI2_CFLAGS)
 libEGL_common_la_LIBADD += $(XCB_DRI2_LIBS)
 dri2_backend_FILES += drivers/dri2/platform_x11.c
@@ -62,7 +61,6 @@ endif
 endif

 if HAVE_PLATFORM_WAYLAND
-AM_CFLAGS += -DHAVE_WAYLAND_PLATFORM
 AM_CFLAGS += $(WAYLAND_CFLAGS)
 libEGL_common_la_LIBADD += $(WAYLAND_LIBS)
 libEGL_common_la_LIBADD += $(LIBDRM_LIBS)
@@ -70,19 +68,16 @@ libEGL_common_la_LIBADD += $(top_builddir)/src/egl/wayland/wayland-drm/libwaylan
 dri2_backend_FILES += drivers/dri2/platform_wayland.c
 endif

-if HAVE_EGL_PLATFORM_DRM
-AM_CFLAGS += -DHAVE_DRM_PLATFORM
+if HAVE_PLATFORM_DRM
 libEGL_common_la_LIBADD += $(top_builddir)/src/gbm/libgbm.la
 dri2_backend_FILES += drivers/dri2/platform_drm.c
 endif

-if HAVE_EGL_PLATFORM_SURFACELESS
-AM_CFLAGS += -DHAVE_SURFACELESS_PLATFORM
+if HAVE_PLATFORM_SURFACELESS
 dri2_backend_FILES += drivers/dri2/platform_surfaceless.c
 endif

-if HAVE_EGL_PLATFORM_ANDROID
-AM_CFLAGS += -DHAVE_ANDROID_PLATFORM
+if HAVE_PLATFORM_ANDROID
 AM_CFLAGS += $(ANDROID_CFLAGS)
 libEGL_common_la_LIBADD += $(ANDROID_LIBS)
 dri2_backend_FILES += drivers/dri2/platform_android.c
@@ -138,7 +133,8 @@ libEGL_mesa_la_SOURCES = \
 	main/eglglvnd.c \
 	main/egldispatchstubs.h \
 	main/egldispatchstubs.c \
-	g_egldispatchstubs.c
+	g_egldispatchstubs.c \
+	g_egldispatchstubs.h
 libEGL_mesa_la_LIBADD = libEGL_common.la
 libEGL_mesa_la_LDFLAGS = \
 	-no-undefined \
--- a/src/egl/drivers/dri2/egl_dri2.h
+++ b/src/egl/drivers/dri2/egl_dri2.h
@@ -257,6 +257,10 @@ struct dri2_egl_surface
   struct wl_egl_window  *wl_win;
   int                    dx;
   int                    dy;
+   struct wl_event_queue *wl_queue;
+   struct wl_surface     *wl_surface_wrapper;
+   struct wl_display     *wl_dpy_wrapper;
+   struct wl_drm         *wl_drm_wrapper;
   struct wl_callback    *throttle_callback;
   int                    format;
 #endif
--- a/src/egl/drivers/dri2/platform_android.c
+++ b/src/egl/drivers/dri2/platform_android.c
@@ -629,14 +629,11 @@ droid_swap_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw)
         dri2_surf->color_buffers[i].age++;
   }

-   /* Make sure we have a back buffer in case we're swapping without
-    * ever rendering. */
-   if (get_back_bo(dri2_surf, 0) < 0) {
-      _eglError(EGL_BAD_ALLOC, "dri2_swap_buffers");
-      return EGL_FALSE;
-   }
-
-   dri2_surf->back->age = 1;
+   /* "XXX: we don't use get_back_bo() since it causes regressions in
+    * several dEQP tests.
+    */
+   if (dri2_surf->back)
+      dri2_surf->back->age = 1;

   dri2_flush_drawable_for_swapbuffers(disp, draw);

--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -55,34 +55,10 @@ static EGLBoolean
 dri2_wl_swap_interval(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf,
                      EGLint interval);

-static void
-sync_callback(void *data, struct wl_callback *callback, uint32_t serial)
-{
-   int *done = data;
-
-   *done = 1;
-   wl_callback_destroy(callback);
-}
-
-static const struct wl_callback_listener sync_listener = {
-   .done = sync_callback
-};
-
 static int
 roundtrip(struct dri2_egl_display *dri2_dpy)
 {
-   struct wl_callback *callback;
-   int done = 0, ret = 0;
-
-   callback = wl_display_sync(dri2_dpy->wl_dpy_wrapper);
-   wl_callback_add_listener(callback, &sync_listener, &done);
-   while (ret != -1 && !done)
-      ret = wl_display_dispatch_queue(dri2_dpy->wl_dpy, dri2_dpy->wl_queue);
-
-   if (!done)
-      wl_callback_destroy(callback);
-
-   return ret;
+   return wl_display_roundtrip_queue(dri2_dpy->wl_dpy, dri2_dpy->wl_queue);
 }

 static void
@@ -148,7 +124,7 @@ dri2_wl_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,
   if (!_eglInitSurface(&dri2_surf->base, disp, EGL_WINDOW_BIT, conf, attrib_list))
      goto cleanup_surf;

-   if (dri2_dpy->dri2) {
+   if (dri2_dpy->wl_drm) {
      if (conf->RedSize == 5)
         dri2_surf->format = WL_DRM_FORMAT_RGB565;
      else if (conf->AlphaSize == 0)
@@ -156,6 +132,7 @@ dri2_wl_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,
      else
         dri2_surf->format = WL_DRM_FORMAT_ARGB8888;
   } else {
+      assert(dri2_dpy->wl_shm);
      if (conf->RedSize == 5)
         dri2_surf->format = WL_SHM_FORMAT_RGB565;
      else if (conf->AlphaSize == 0)
@@ -170,6 +147,37 @@ dri2_wl_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,
   }

   dri2_surf->wl_win = window;
+   dri2_surf->wl_queue = wl_display_create_queue(dri2_dpy->wl_dpy);
+   if (!dri2_surf->wl_queue) {
+      _eglError(EGL_BAD_ALLOC, "dri2_create_surface");
+      goto cleanup_surf;
+   }
+
+   if (dri2_dpy->wl_drm) {
+      dri2_surf->wl_drm_wrapper = wl_proxy_create_wrapper(dri2_dpy->wl_drm);
+      if (!dri2_surf->wl_drm_wrapper) {
+         _eglError(EGL_BAD_ALLOC, "dri2_create_surface");
+         goto cleanup_queue;
+      }
+      wl_proxy_set_queue((struct wl_proxy *)dri2_surf->wl_drm_wrapper,
+                         dri2_surf->wl_queue);
+   }
+
+   dri2_surf->wl_dpy_wrapper = wl_proxy_create_wrapper(dri2_dpy->wl_dpy);
+   if (!dri2_surf->wl_dpy_wrapper) {
+      _eglError(EGL_BAD_ALLOC, "dri2_create_surface");
+      goto cleanup_drm;
+   }
+   wl_proxy_set_queue((struct wl_proxy *)dri2_surf->wl_dpy_wrapper,
+                      dri2_surf->wl_queue);
+
+   dri2_surf->wl_surface_wrapper = wl_proxy_create_wrapper(window->surface);
+   if (!dri2_surf->wl_surface_wrapper) {
+      _eglError(EGL_BAD_ALLOC, "dri2_create_surface");
+      goto cleanup_drm;
+   }
+   wl_proxy_set_queue((struct wl_proxy *)dri2_surf->wl_surface_wrapper,
+                      dri2_surf->wl_queue);

   dri2_surf->wl_win->private = dri2_surf;
   dri2_surf->wl_win->destroy_window_callback = destroy_window_callback;
@@ -200,6 +208,11 @@ dri2_wl_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,

   return &dri2_surf->base;

+ cleanup_drm:
+   if (dri2_surf->wl_drm_wrapper)
+      wl_proxy_wrapper_destroy(dri2_surf->wl_drm_wrapper);
+ cleanup_queue:
+   wl_event_queue_destroy(dri2_surf->wl_queue);
 cleanup_surf:
   free(dri2_surf);

@@ -265,6 +278,12 @@ dri2_wl_destroy_surface(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf)
      dri2_surf->wl_win->destroy_window_callback = NULL;
   }

+   if (dri2_surf->wl_drm_wrapper)
+      wl_proxy_wrapper_destroy(dri2_surf->wl_drm_wrapper);
+   wl_proxy_wrapper_destroy(dri2_surf->wl_surface_wrapper);
+   wl_proxy_wrapper_destroy(dri2_surf->wl_dpy_wrapper);
+   wl_event_queue_destroy(dri2_surf->wl_queue);
+
   free(surf);

   return EGL_TRUE;
@@ -333,9 +352,9 @@ get_back_bo(struct dri2_egl_surface *dri2_surf)
   }

   /* There might be a buffer release already queued that wasn't processed */
-   wl_display_dispatch_queue_pending(dri2_dpy->wl_dpy, dri2_dpy->wl_queue);
+   wl_display_dispatch_queue_pending(dri2_dpy->wl_dpy, dri2_surf->wl_queue);

-   if (dri2_surf->back == NULL) {
+   while (dri2_surf->back == NULL) {
      for (i = 0; i < ARRAY_SIZE(dri2_surf->color_buffers); i++) {
         /* Get an unlocked buffer, preferrably one with a dri_buffer
          * already allocated. */
@@ -346,6 +365,14 @@ get_back_bo(struct dri2_egl_surface *dri2_surf)
         else if (dri2_surf->back->dri_image == NULL)
            dri2_surf->back = &dri2_surf->color_buffers[i];
      }
+
+      if (dri2_surf->back)
+         break;
+
+      /* If we don't have a buffer, then block on the server to release one for
+       * us, and try again. */
+      if (wl_display_dispatch_queue(dri2_dpy->wl_dpy, dri2_surf->wl_queue) < 0)
+          return -1;
   }

   if (dri2_surf->back == NULL)
@@ -634,7 +661,7 @@ create_wl_buffer(struct dri2_egl_surface *dri2_surf)
      dri2_dpy->image->queryImage(image, __DRI_IMAGE_ATTRIB_STRIDE, &stride);

      dri2_surf->current->wl_buffer =
-         wl_drm_create_prime_buffer(dri2_dpy->wl_drm,
+         wl_drm_create_prime_buffer(dri2_surf->wl_drm_wrapper,
                                    fd,
                                    dri2_surf->base.Width,
                                    dri2_surf->base.Height,
@@ -648,7 +675,7 @@ create_wl_buffer(struct dri2_egl_surface *dri2_surf)
      dri2_dpy->image->queryImage(image, __DRI_IMAGE_ATTRIB_STRIDE, &stride);

      dri2_surf->current->wl_buffer =
-         wl_drm_create_buffer(dri2_dpy->wl_drm,
+         wl_drm_create_buffer(dri2_surf->wl_drm_wrapper,
                              name,
                              dri2_surf->base.Width,
                              dri2_surf->base.Height,
@@ -656,8 +683,6 @@ create_wl_buffer(struct dri2_egl_surface *dri2_surf)
                              dri2_surf->format);
   }

-   wl_proxy_set_queue((struct wl_proxy *) dri2_surf->current->wl_buffer,
-                      dri2_dpy->wl_queue);
   wl_buffer_add_listener(dri2_surf->current->wl_buffer,
                          &wl_buffer_listener, dri2_surf);
 }
@@ -669,14 +694,14 @@ try_damage_buffer(struct dri2_egl_surface *dri2_surf,
 {
   int i;

-   if (wl_proxy_get_version((struct wl_proxy *) dri2_surf->wl_win->surface)
+   if (wl_proxy_get_version((struct wl_proxy *) dri2_surf->wl_surface_wrapper)
       < WL_SURFACE_DAMAGE_BUFFER_SINCE_VERSION)
      return EGL_FALSE;

   for (i = 0; i < n_rects; i++) {
      const int *rect = &rects[i * 4];

-      wl_surface_damage_buffer(dri2_surf->wl_win->surface,
+      wl_surface_damage_buffer(dri2_surf->wl_surface_wrapper,
                               rect[0],
                               dri2_surf->base.Height - rect[1] - rect[3],
                               rect[2], rect[3]);
@@ -699,7 +724,7 @@ dri2_wl_swap_buffers_with_damage(_EGLDriver *drv,

   while (dri2_surf->throttle_callback != NULL)
      if (wl_display_dispatch_queue(dri2_dpy->wl_dpy,
-                                    dri2_dpy->wl_queue) == -1)
+                                    dri2_surf->wl_queue) == -1)
         return -1;

   for (i = 0; i < ARRAY_SIZE(dri2_surf->color_buffers); i++)
@@ -715,11 +740,9 @@ dri2_wl_swap_buffers_with_damage(_EGLDriver *drv,

   if (draw->SwapInterval > 0) {
      dri2_surf->throttle_callback =
-         wl_surface_frame(dri2_surf->wl_win->surface);
+         wl_surface_frame(dri2_surf->wl_surface_wrapper);
      wl_callback_add_listener(dri2_surf->throttle_callback,
                               &throttle_listener, dri2_surf);
-      wl_proxy_set_queue((struct wl_proxy *) dri2_surf->throttle_callback,
-                         dri2_dpy->wl_queue);
   }

   dri2_surf->back->age = 1;
@@ -728,7 +751,7 @@ dri2_wl_swap_buffers_with_damage(_EGLDriver *drv,

   create_wl_buffer(dri2_surf);

-   wl_surface_attach(dri2_surf->wl_win->surface,
+   wl_surface_attach(dri2_surf->wl_surface_wrapper,
                     dri2_surf->current->wl_buffer,
                     dri2_surf->dx, dri2_surf->dy);

@@ -742,7 +765,7 @@ dri2_wl_swap_buffers_with_damage(_EGLDriver *drv,
    * ignore the damage region and post maximum damage, due to
    * https://bugs.freedesktop.org/78190 */
   if (!n_rects || !try_damage_buffer(dri2_surf, rects, n_rects))
-      wl_surface_damage(dri2_surf->wl_win->surface,
+      wl_surface_damage(dri2_surf->wl_surface_wrapper,
                        0, 0, INT32_MAX, INT32_MAX);

   if (dri2_dpy->is_different_gpu) {
@@ -760,14 +783,14 @@ dri2_wl_swap_buffers_with_damage(_EGLDriver *drv,
   dri2_flush_drawable_for_swapbuffers(disp, draw);
   dri2_dpy->flush->invalidate(dri2_surf->dri_drawable);

-   wl_surface_commit(dri2_surf->wl_win->surface);
+   wl_surface_commit(dri2_surf->wl_surface_wrapper);

   /* If we're not waiting for a frame callback then we'll at least throttle
    * to a sync callback so that we always give a chance for the compositor to
    * handle the commit and send a release event before checking for a free
    * buffer */
   if (dri2_surf->throttle_callback == NULL) {
-      dri2_surf->throttle_callback = wl_display_sync(dri2_dpy->wl_dpy_wrapper);
+      dri2_surf->throttle_callback = wl_display_sync(dri2_surf->wl_dpy_wrapper);
      wl_callback_add_listener(dri2_surf->throttle_callback,
                               &throttle_listener, dri2_surf);
   }
@@ -1410,11 +1433,13 @@ os_create_anonymous_file(off_t size)


 static EGLBoolean
-dri2_wl_swrast_allocate_buffer(struct dri2_egl_display *dri2_dpy,
+dri2_wl_swrast_allocate_buffer(struct dri2_egl_surface *dri2_surf,
                               int format, int w, int h,
                               void **data, int *size,
                               struct wl_buffer **buffer)
 {
+   struct dri2_egl_display *dri2_dpy =
+      dri2_egl_display(dri2_surf->base.Resource.Display);
   struct wl_shm_pool *pool;
   int fd, stride, size_map;
   void *data_map;
@@ -1435,6 +1460,7 @@ dri2_wl_swrast_allocate_buffer(struct dri2_egl_display *dri2_dpy,

   /* Share it in a wl_buffer */
   pool = wl_shm_create_pool(dri2_dpy->wl_shm, fd, size_map);
+   wl_proxy_set_queue((struct wl_proxy *)pool, dri2_surf->wl_queue);
   *buffer = wl_shm_pool_create_buffer(pool, 0, w, h, stride, format);
   wl_shm_pool_destroy(pool);
   close(fd);
@@ -1470,7 +1496,7 @@ swrast_update_buffers(struct dri2_egl_surface *dri2_surf)
   /* find back buffer */

   /* There might be a buffer release already queued that wasn't processed */
-   wl_display_dispatch_queue_pending(dri2_dpy->wl_dpy, dri2_dpy->wl_queue);
+   wl_display_dispatch_queue_pending(dri2_dpy->wl_dpy, dri2_surf->wl_queue);

   /* try get free buffer already created */
   for (i = 0; i < ARRAY_SIZE(dri2_surf->color_buffers); i++) {
@@ -1486,7 +1512,7 @@ swrast_update_buffers(struct dri2_egl_surface *dri2_surf)
      for (i = 0; i < ARRAY_SIZE(dri2_surf->color_buffers); i++) {
         if (!dri2_surf->color_buffers[i].locked) {
             dri2_surf->back = &dri2_surf->color_buffers[i];
-             if (!dri2_wl_swrast_allocate_buffer(dri2_dpy,
+             if (!dri2_wl_swrast_allocate_buffer(dri2_surf,
                                                 dri2_surf->format,
                                                 dri2_surf->base.Width,
                                                 dri2_surf->base.Height,
@@ -1496,8 +1522,6 @@ swrast_update_buffers(struct dri2_egl_surface *dri2_surf)
                _eglError(EGL_BAD_ALLOC, "failed to allocate color buffer");
                 return -1;
             }
-             wl_proxy_set_queue((struct wl_proxy *) dri2_surf->back->wl_buffer,
-                                dri2_dpy->wl_queue);
             wl_buffer_add_listener(dri2_surf->back->wl_buffer,
                                    &wl_buffer_listener, dri2_surf);
             break;
@@ -1553,22 +1577,20 @@ dri2_wl_swrast_commit_backbuffer(struct dri2_egl_surface *dri2_surf)

   while (dri2_surf->throttle_callback != NULL)
      if (wl_display_dispatch_queue(dri2_dpy->wl_dpy,
-                                    dri2_dpy->wl_queue) == -1)
+                                    dri2_surf->wl_queue) == -1)
         return;

   if (dri2_surf->base.SwapInterval > 0) {
      dri2_surf->throttle_callback =
-         wl_surface_frame(dri2_surf->wl_win->surface);
+         wl_surface_frame(dri2_surf->wl_surface_wrapper);
      wl_callback_add_listener(dri2_surf->throttle_callback,
                               &throttle_listener, dri2_surf);
-      wl_proxy_set_queue((struct wl_proxy *) dri2_surf->throttle_callback,
-                         dri2_dpy->wl_queue);
   }

   dri2_surf->current = dri2_surf->back;
   dri2_surf->back = NULL;

-   wl_surface_attach(dri2_surf->wl_win->surface,
+   wl_surface_attach(dri2_surf->wl_surface_wrapper,
                     dri2_surf->current->wl_buffer,
                     dri2_surf->dx, dri2_surf->dy);

@@ -1578,9 +1600,9 @@ dri2_wl_swrast_commit_backbuffer(struct dri2_egl_surface *dri2_surf)
   dri2_surf->dx = 0;
   dri2_surf->dy = 0;

-   wl_surface_damage(dri2_surf->wl_win->surface,
+   wl_surface_damage(dri2_surf->wl_surface_wrapper,
                     0, 0, INT32_MAX, INT32_MAX);
-   wl_surface_commit(dri2_surf->wl_win->surface);
+   wl_surface_commit(dri2_surf->wl_surface_wrapper);

   /* If we're not waiting for a frame callback then we'll at least throttle
    * to a sync callback so that we always give a chance for the compositor to
--- a/src/egl/main/eglapi.c
+++ b/src/egl/main/eglapi.c
@@ -828,25 +828,6 @@ eglMakeCurrent(EGLDisplay dpy, EGLSurface draw, EGLSurface read,
         RETURN_EGL_ERROR(disp, EGL_BAD_MATCH, EGL_FALSE);
   }

-   _EGLThreadInfo *t =_eglGetCurrentThread();
-   _EGLContext *old_ctx = t->CurrentContext;
-   _EGLSurface *old_draw_surf = old_ctx ? old_ctx->DrawSurface : NULL;
-   _EGLSurface *old_read_surf = old_ctx ? old_ctx->ReadSurface : NULL;
-
-   /* From the EGL 1.5 spec, Section 3.7.3 Binding Context and Drawables:
-    *
-    *    If the previous context of the calling thread has unflushed commands,
-    *    and the previous surface is no longer valid, an
-    *    EGL_BAD_CURRENT_SURFACE error is generated.
-    *
-    * It's difficult to check if the context has unflushed commands, but it's
-    * easy to check if the surface is no longer valid.
-    */
-   if (old_draw_surf && old_draw_surf->Lost)
-      RETURN_EGL_ERROR(disp, EGL_BAD_CURRENT_SURFACE, EGL_FALSE);
-   if (old_read_surf && old_read_surf->Lost)
-      RETURN_EGL_ERROR(disp, EGL_BAD_CURRENT_SURFACE, EGL_FALSE);
-
   /*    If a native window underlying either draw or read is no longer valid,
    *    an EGL_BAD_NATIVE_WINDOW error is generated.
    */
--- a/src/egl/main/egldisplay.c
+++ b/src/egl/main/egldisplay.c
@@ -61,7 +61,7 @@


 /**
- * Map --with-egl-platforms names to platform types.
+ * Map --with-platforms names to platform types.
 */
 static const struct {
   _EGLPlatformType platform;
--- a/src/gallium/auxiliary/Makefile.am
+++ b/src/gallium/auxiliary/Makefile.am
@@ -72,6 +72,7 @@ if NEED_GALLIUM_VL
 COMMON_VL_CFLAGS = \
 	$(AM_CFLAGS) \
 	$(VL_CFLAGS) \
+	$(XCB_DRI3_CFLAGS) \
 	$(DRI2PROTO_CFLAGS) \
 	$(LIBDRM_CFLAGS)

@@ -87,22 +88,23 @@ if NEED_GALLIUM_VL_WINSYS

 noinst_LTLIBRARIES += libgalliumvlwinsys.la

-libgalliumvlwinsys_la_CFLAGS = \
-	$(COMMON_VL_CFLAGS)
+libgalliumvlwinsys_la_CFLAGS = $(COMMON_VL_CFLAGS)
+libgalliumvlwinsys_la_SOURCES = $(VL_WINSYS_SOURCES)

-libgalliumvlwinsys_la_SOURCES = \
-	$(VL_WINSYS_SOURCES)
+if HAVE_PLATFORM_X11
+libgalliumvlwinsys_la_SOURCES += $(VL_WINSYS_DRI2_SOURCES)

 if HAVE_DRI3
-
-libgalliumvlwinsys_la_SOURCES += \
-	$(VL_WINSYS_DRI3_SOURCES)
-
+libgalliumvlwinsys_la_SOURCES += $(VL_WINSYS_DRI3_SOURCES)
+endif
 endif

+if HAVE_PLATFORM_DRM
+libgalliumvlwinsys_la_SOURCES += $(VL_WINSYS_DRM_SOURCES)
 endif

-endif
+endif # NEED_GALLIUM_VL_WINSYS
+endif # NEED_GALLIUM_VL

 EXTRA_DIST = \
 	SConscript \
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -345,18 +345,22 @@ VL_SOURCES := \
 	vl/vl_video_buffer.c \
 	vl/vl_video_buffer.h \
 	vl/vl_vlc.h \
-	vl/vl_winsys.h \
 	vl/vl_zscan.c \
 	vl/vl_zscan.h

 # XXX: Nuke this as our dri targets no longer depend on VL.
 VL_WINSYS_SOURCES := \
-	vl/vl_winsys_dri.c \
-	vl/vl_winsys_drm.c
+	vl/vl_winsys.h
+
+VL_WINSYS_DRI2_SOURCES := \
+	vl/vl_winsys_dri.c

 VL_WINSYS_DRI3_SOURCES := \
 	vl/vl_winsys_dri3.c

+VL_WINSYS_DRM_SOURCES := \
+	vl/vl_winsys_drm.c
+
 VL_STUB_SOURCES := \
 	vl/vl_stubs.c

--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -125,19 +125,6 @@ create_pass_manager(struct gallivm_state *gallivm)
   LLVMAddTargetData(gallivm->target, gallivm->passmgr);
 #endif

-   /* Setting the module's DataLayout to an empty string will cause the
-    * ExecutionEngine to copy to the DataLayout string from its target
-    * machine to the module.  As of LLVM 3.8 the module and the execution
-    * engine are required to have the same DataLayout.
-    *
-    * TODO: This is just a temporary work-around.  The correct solution is
-    * for gallivm_init_state() to create a TargetMachine and pull the
-    * DataLayout from there.  Currently, the TargetMachine used by llvmpipe
-    * is being implicitly created by the EngineBuilder in
-    * lp_build_create_jit_compiler_for_module()
-    */
-
-#if HAVE_LLVM < 0x0308
   {
      char *td_str;
      // New ones from the Module.
@@ -145,9 +132,6 @@ create_pass_manager(struct gallivm_state *gallivm)
      LLVMSetDataLayout(gallivm->module, td_str);
      free(td_str);
   }
-#else
-   LLVMSetDataLayout(gallivm->module, "");
-#endif

   if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) {
      /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
@@ -628,6 +612,24 @@ gallivm_compile_module(struct gallivm_state *gallivm)
   }

   if (use_mcjit) {
+      /* Setting the module's DataLayout to an empty string will cause the
+       * ExecutionEngine to copy to the DataLayout string from its target
+       * machine to the module.  As of LLVM 3.8 the module and the execution
+       * engine are required to have the same DataLayout.
+       *
+       * We must make sure we do this after running the optimization passes,
+       * because those passes need a correct datalayout string.  For example,
+       * if those optimization passes see an empty datalayout, they will assume
+       * this is a little endian target and will do optimizations that break big
+       * endian machines.
+       *
+       * TODO: This is just a temporary work-around.  The correct solution is
+       * for gallivm_init_state() to create a TargetMachine and pull the
+       * DataLayout from there.  Currently, the TargetMachine used by llvmpipe
+       * is being implicitly created by the EngineBuilder in
+       * lp_build_create_jit_compiler_for_module()
+       */
+      LLVMSetDataLayout(gallivm->module, "");
      assert(!gallivm->engine);
      if (!init_gallivm_engine(gallivm)) {
         assert(0);
--- a/src/gallium/auxiliary/renderonly/renderonly.c
+++ b/src/gallium/auxiliary/renderonly/renderonly.c
@@ -117,6 +117,7 @@ renderonly_create_kms_dumb_buffer_for_resource(struct pipe_resource *rsc,
   }

   /* import dumb buffer */
+   memset(&handle, 0, sizeof(handle));
   handle.type = DRM_API_HANDLE_TYPE_FD;
   handle.handle = prime_fd;
   handle.stride = create_dumb.pitch;
--- a/src/gallium/auxiliary/vl/vl_winsys.h
+++ b/src/gallium/auxiliary/vl/vl_winsys.h
@@ -32,7 +32,9 @@
 #ifndef vl_winsys_h
 #define vl_winsys_h

+#ifdef HAVE_X11_PLATFORM
 #include <X11/Xlib.h>
+#endif
 #include "pipe/p_defines.h"
 #include "pipe/p_format.h"

@@ -68,15 +70,28 @@ struct vl_screen
   struct pipe_loader_device *dev;
 };

+#ifdef HAVE_X11_PLATFORM
 struct vl_screen *
 vl_dri2_screen_create(Display *display, int screen);
+#else
+static inline struct vl_screen *
+vl_dri2_screen_create(void *display, int screen) { return NULL; };
+#endif

-struct vl_screen *
-vl_drm_screen_create(int fd);
-
-#if defined(HAVE_DRI3)
+#if defined(HAVE_X11_PLATFORM) && defined(HAVE_DRI3)
 struct vl_screen *
 vl_dri3_screen_create(Display *display, int screen);
+#else
+static inline struct vl_screen *
+vl_dri3_screen_create(void *display, int screen) { return NULL; };
+#endif
+
+#ifdef HAVE_DRM_PLATFORM
+struct vl_screen *
+vl_drm_screen_create(int fd);
+#else
+static inline struct vl_screen *
+vl_drm_screen_create(int fd) { return NULL; };
 #endif

 #endif
--- a/src/gallium/drivers/etnaviv/etnaviv_resource.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_resource.c
@@ -180,7 +180,7 @@ etna_resource_alloc(struct pipe_screen *pscreen, unsigned layout,
                        &paddingY, &halign);
   assert(paddingX && paddingY);

-   if (templat->bind != PIPE_BUFFER) {
+   if (templat->target != PIPE_BUFFER) {
      unsigned min_paddingY = 4 * screen->specs.pixel_pipes;
      if (paddingY < min_paddingY)
         paddingY = min_paddingY;
--- a/src/gallium/drivers/etnaviv/etnaviv_translate.h
+++ b/src/gallium/drivers/etnaviv/etnaviv_translate.h
@@ -416,6 +416,8 @@ translate_clear_color(enum pipe_format format,
   switch (format) {
   case PIPE_FORMAT_B8G8R8A8_UNORM:
   case PIPE_FORMAT_B8G8R8X8_UNORM:
+   case PIPE_FORMAT_R8G8B8A8_UNORM:
+   case PIPE_FORMAT_R8G8B8X8_UNORM:
      clear_value = etna_cfloat_to_uintN(color->f[2], 8) |
                    (etna_cfloat_to_uintN(color->f[1], 8) << 8) |
                    (etna_cfloat_to_uintN(color->f[0], 8) << 16) |
--- a/src/gallium/drivers/freedreno/freedreno_context.c
+++ b/src/gallium/drivers/freedreno/freedreno_context.c
@@ -278,13 +278,7 @@ fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen,
 		goto fail;
 	pctx->const_uploader = pctx->stream_uploader;

-	/* TODO what about compute?  Ideally it creates it's own independent
-	 * batches per compute job (since it isn't using tiling, so no point
-	 * in getting involved with the re-ordering madness)..
-	 */
-	if (!screen->reorder) {
-		ctx->batch = fd_bc_alloc_batch(&screen->batch_cache, ctx);
-	}
+	ctx->batch = fd_bc_alloc_batch(&screen->batch_cache, ctx);

 	slab_create_child(&ctx->transfer_pool, &screen->transfer_pool);

--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -642,6 +642,8 @@ NVC0LegalizePostRA::replaceZero(Instruction *i)
   for (int s = 0; i->srcExists(s); ++s) {
      if (s == 2 && i->op == OP_SUCLAMP)
         continue;
+      if (s == 1 && i->op == OP_SHLADD)
+         continue;
      ImmediateValue *imm = i->getSrc(s)->asImm();
      if (imm) {
         if (i->op == OP_SELP && s == 2) {
--- a/src/gallium/drivers/radeon/Makefile.am
+++ b/src/gallium/drivers/radeon/Makefile.am
@@ -16,13 +16,11 @@ libradeon_la_SOURCES = \
 if HAVE_GALLIUM_LLVM

 AM_CFLAGS += \
-	$(LLVM_CFLAGS) \
-	$(LIBELF_CFLAGS)
+	$(LLVM_CFLAGS)

 libradeon_la_LIBADD = \
 	$(CLOCK_LIB) \
-	$(LLVM_LIBS) \
-	$(LIBELF_LIBS)
+	$(LLVM_LIBS)

 libradeon_la_LDFLAGS = \
 	$(LLVM_LDFLAGS)
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -833,6 +833,7 @@ static const char* r600_get_chip_name(struct r600_common_screen *rscreen)
 	case CHIP_POLARIS12: return "AMD POLARIS12";
 	case CHIP_STONEY: return "AMD STONEY";
 	case CHIP_VEGA10: return "AMD VEGA10";
+	case CHIP_RAVEN: return "AMD RAVEN";
 	default: return "AMD unknown";
 	}
 }
@@ -1006,6 +1007,7 @@ const char *r600_get_llvm_processor_name(enum radeon_family family)
 	case CHIP_POLARIS12: /* same as polaris11 */
 		return HAVE_LLVM >= 0x0309 ? "polaris11" : "carrizo";
 	case CHIP_VEGA10:
+	case CHIP_RAVEN:
 		return "gfx900";
 	default:
 		return "";
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -2720,8 +2720,15 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,

 			vi_dcc_clear_level(rctx, tex, 0, reset_value);

-			if (clear_words_needed)
-				tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
+			unsigned level_bit = 1 << fb->cbufs[i]->u.tex.level;
+			if (clear_words_needed) {
+				bool need_compressed_update = !tex->dirty_level_mask;
+
+				tex->dirty_level_mask |= level_bit;
+
+				if (need_compressed_update)
+					p_atomic_inc(&rctx->screen->compressed_colortex_counter);
+			}
 			tex->separate_dcc_dirty = true;
 		} else {
 			/* 128-bit formats are unusupported */
@@ -2744,7 +2751,12 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
 					   tex->cmask.offset, tex->cmask.size, 0,
 					   R600_COHERENCY_CB_META);

+			bool need_compressed_update = !tex->dirty_level_mask;
+
 			tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
+
+			if (need_compressed_update)
+				p_atomic_inc(&rctx->screen->compressed_colortex_counter);
 		}

 		/* We can change the micro tile mode before a full clear. */
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -33,9 +33,6 @@
 #include "vl/vl_decoder.h"
 #include "../ddebug/dd_util.h"

-#define SI_LLVM_DEFAULT_FEATURES \
-	"+DumpCode,+vgpr-spilling,-fp32-denormals,-xnack"
-
 /*
 * pipe_context
 */
@@ -126,12 +123,16 @@ static LLVMTargetMachineRef
 si_create_llvm_target_machine(struct si_screen *sscreen)
 {
 	const char *triple = "amdgcn--";
+	char features[256];
+
+	snprintf(features, sizeof(features),
+		 "+DumpCode,+vgpr-spilling,-fp32-denormals,+fp64-denormals%s%s",
+		 sscreen->b.chip_class >= GFX9 ? ",+xnack" : ",-xnack",
+		 sscreen->b.debug_flags & DBG_SI_SCHED ? ",+si-scheduler" : "");

 	return LLVMCreateTargetMachine(si_llvm_get_amdgpu_target(triple), triple,
 				       r600_get_llvm_processor_name(sscreen->b.family),
-				       sscreen->b.debug_flags & DBG_SI_SCHED ?
-					       SI_LLVM_DEFAULT_FEATURES ",+si-scheduler" :
-					       SI_LLVM_DEFAULT_FEATURES,
+				       features,
 				       LLVMCodeGenLevelDefault,
 				       LLVMRelocDefault,
 				       LLVMCodeModelDefault);
@@ -759,6 +760,7 @@ static bool si_init_gs_info(struct si_screen *sscreen)
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
 	case CHIP_VEGA10:
+	case CHIP_RAVEN:
 		sscreen->gs_table_depth = 32;
 		return true;
 	default:
@@ -897,7 +899,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)

 	sscreen->has_msaa_sample_loc_bug = (sscreen->b.family >= CHIP_POLARIS10 &&
 					    sscreen->b.family <= CHIP_POLARIS12) ||
-					   sscreen->b.family == CHIP_VEGA10;
+					   sscreen->b.family == CHIP_VEGA10 ||
+					   sscreen->b.family == CHIP_RAVEN;

 	sscreen->b.has_cp_dma = true;
 	sscreen->b.has_streamout = true;
@@ -911,7 +914,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)

 		sscreen->b.rbplus_allowed =
 			!(sscreen->b.debug_flags & DBG_NO_RB_PLUS) &&
-			sscreen->b.family == CHIP_STONEY;
+			(sscreen->b.family == CHIP_STONEY ||
+			 sscreen->b.family == CHIP_RAVEN);
 	}

 	(void) mtx_init(&sscreen->shader_parts_mutex, mtx_plain);
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -4568,6 +4568,9 @@ static void si_init_config(struct si_context *sctx)
 		case CHIP_VEGA10:
 			pc_lines = 4096;
 			break;
+		case CHIP_RAVEN:
+			pc_lines = 1024;
+			break;
 		default:
 			assert(0);
 		}
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -174,6 +174,20 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
 	if (sctx->b.chip_class == SI) {
 		unsigned one_wave = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp);
 		*num_patches = MIN2(*num_patches, one_wave);
+
+		if (sctx->screen->b.info.max_se == 1) {
+			/* The VGT HS block increments the patch ID unconditionally
+			 * within a single threadgroup. This results in incorrect
+			 * patch IDs when instanced draws are used.
+			 *
+			 * The intended solution is to restrict threadgroups to
+			 * a single instance by setting SWITCH_ON_EOI, which
+			 * should cause IA to split instances up. However, this
+			 * doesn't work correctly on SI when there is no other
+			 * SE to switch to.
+			 */
+			*num_patches = 1;
+		}
 	}

 	sctx->last_num_patches = *num_patches;
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1813,6 +1813,19 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
 	r600_update_vs_writes_viewport_index(&sctx->b, si_get_vs_info(sctx));
 }

+static void si_update_tcs_tes_uses_prim_id(struct si_context *sctx)
+{
+	sctx->ia_multi_vgt_param_key.u.tcs_tes_uses_prim_id =
+		(sctx->tes_shader.cso &&
+		 sctx->tes_shader.cso->info.uses_primid) ||
+		(sctx->tcs_shader.cso &&
+		 sctx->tcs_shader.cso->info.uses_primid) ||
+		(sctx->gs_shader.cso &&
+		 sctx->gs_shader.cso->info.uses_primid) ||
+		(sctx->ps_shader.cso && !sctx->gs_shader.cso &&
+		 sctx->ps_shader.cso->info.uses_primid);
+}
+
 static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
@@ -1829,20 +1842,14 @@ static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
 	si_mark_atom_dirty(sctx, &sctx->clip_regs);
 	sctx->last_rast_prim = -1; /* reset this so that it gets updated */

-	if (enable_changed)
+	if (enable_changed) {
 		si_shader_change_notify(sctx);
+		if (sctx->ia_multi_vgt_param_key.u.uses_tess)
+			si_update_tcs_tes_uses_prim_id(sctx);
+	}
 	r600_update_vs_writes_viewport_index(&sctx->b, si_get_vs_info(sctx));
 }

-static void si_update_tcs_tes_uses_prim_id(struct si_context *sctx)
-{
-	sctx->ia_multi_vgt_param_key.u.tcs_tes_uses_prim_id =
-		(sctx->tes_shader.cso &&
-		 sctx->tes_shader.cso->info.uses_primid) ||
-		(sctx->tcs_shader.cso &&
-		 sctx->tcs_shader.cso->info.uses_primid);
-}
-
 static void si_bind_tcs_shader(struct pipe_context *ctx, void *state)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
@@ -1897,6 +1904,8 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
 	sctx->ps_shader.cso = sel;
 	sctx->ps_shader.current = sel ? sel->first_variant : NULL;
 	sctx->do_update_shaders = true;
+	if (sel && sctx->ia_multi_vgt_param_key.u.uses_tess)
+		si_update_tcs_tes_uses_prim_id(sctx);
 	si_mark_atom_dirty(sctx, &sctx->cb_render_state);
 }

--- a/src/gallium/drivers/swr/Makefile.am
+++ b/src/gallium/drivers/swr/Makefile.am
@@ -56,6 +56,7 @@ BUILT_SOURCES = \
 	rasterizer/codegen/gen_knobs.cpp \
 	rasterizer/codegen/gen_knobs.h \
 	rasterizer/jitter/gen_state_llvm.h \
+	rasterizer/jitter/gen_builder.hpp \
 	rasterizer/jitter/gen_builder_x86.hpp \
 	rasterizer/archrast/gen_ar_event.hpp \
 	rasterizer/archrast/gen_ar_event.cpp \
@@ -168,20 +169,6 @@ COMMON_LDFLAGS = \
 	$(LLVM_LDFLAGS)


-# XXX: As we cannot use BUILT_SOURCES (the files will end up in the dist
-# tarball) just annotate the dependency directly.
-# As the single direct user of gen_builder.hpp is a header (builder.h) trace all
-# the translusive users (one that use the latter header).
-rasterizer/jitter/blend_jit.cpp: rasterizer/jitter/gen_builder.hpp
-rasterizer/jitter/builder.cpp: rasterizer/jitter/gen_builder.hpp
-rasterizer/jitter/builder_misc.cpp: rasterizer/jitter/gen_builder.hpp
-rasterizer/jitter/fetch_jit.cpp: rasterizer/jitter/gen_builder.hpp
-rasterizer/jitter/streamout_jit.cpp: rasterizer/jitter/gen_builder.hpp
-swr_shader.cpp: rasterizer/jitter/gen_builder.hpp
-
-CLEANFILES = \
-	rasterizer/jitter/gen_builder.hpp
-
 lib_LTLIBRARIES = libswrAVX.la libswrAVX2.la

 libswrAVX_la_CXXFLAGS = \
@@ -192,14 +179,6 @@ libswrAVX_la_CXXFLAGS = \
 libswrAVX_la_SOURCES = \
 	$(COMMON_SOURCES)

-# XXX: Don't ship these generated sources for now, since they are specific
-# to the LLVM version they are generated from. Thus a release tarball
-# containing the said files, generated against eg. LLVM 3.8 will fail to build
-# on systems with other versions of LLVM eg. 3.7 or 3.6.
-# Move these back to BUILT_SOURCES once that is resolved.
-nodist_libswrAVX_la_SOURCES = \
-	rasterizer/jitter/gen_builder.hpp
-
 libswrAVX_la_LIBADD = \
 	$(COMMON_LIBADD)

@@ -214,14 +193,6 @@ libswrAVX2_la_CXXFLAGS = \
 libswrAVX2_la_SOURCES = \
 	$(COMMON_SOURCES)

-# XXX: Don't ship these generated sources for now, since they are specific
-# to the LLVM version they are generated from. Thus a release tarball
-# containing the said files, generated against eg. LLVM 3.8 will fail to build
-# on systems with other versions of LLVM eg. 3.7 or 3.6.
-# Move these back to BUILT_SOURCES once that is resolved.
-nodist_libswrAVX2_la_SOURCES = \
-	rasterizer/jitter/gen_builder.hpp
-
 libswrAVX2_la_LIBADD = \
 	$(COMMON_LIBADD)

@@ -230,6 +201,16 @@ libswrAVX2_la_LDFLAGS = \

 include $(top_srcdir)/install-gallium-links.mk

+# Generated gen_builder.hpp is not backwards compatible. So ship only one
+# created with the oldest supported version of LLVM.
+dist-hook:
+if SWR_INVALID_LLVM_VERSION
+	@echo "*******************************************************"
+	@echo "LLVM 3.9.0 or LLVM 3.9.1 required to create the tarball"
+	@echo "*******************************************************"
+	@test
+endif
+
 EXTRA_DIST = \
 	SConscript \
 	rasterizer/archrast/events.proto \
--- a/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h
+++ b/src/gallium/drivers/swr/rasterizer/memory/StoreTile.h
@@ -1133,6 +1133,64 @@ struct StoreRasterTile
            }
        }
    }
+
+    //////////////////////////////////////////////////////////////////////////
+    /// @brief Resolves an 8x8 raster tile to the resolve destination surface.
+    /// @param pSrc - Pointer to raster tile.
+    /// @param pDstSurface - Destination surface state
+    /// @param x, y - Coordinates to raster tile.
+    /// @param sampleOffset - Offset between adjacent multisamples
+    INLINE static void Resolve(
+        uint8_t *pSrc,
+        SWR_SURFACE_STATE* pDstSurface,
+        uint32_t x, uint32_t y, uint32_t sampleOffset, uint32_t renderTargetArrayIndex) // (x, y) pixel coordinate to start of raster tile.
+    {
+        uint32_t lodWidth = std::max(pDstSurface->width >> pDstSurface->lod, 1U);
+        uint32_t lodHeight = std::max(pDstSurface->height >> pDstSurface->lod, 1U);
+
+        float oneOverNumSamples = 1.0f / pDstSurface->numSamples;
+
+        // For each raster tile pixel (rx, ry)
+        for (uint32_t ry = 0; ry < KNOB_TILE_Y_DIM; ++ry)
+        {
+            for (uint32_t rx = 0; rx < KNOB_TILE_X_DIM; ++rx)
+            {
+                // Perform bounds checking.
+                if (((x + rx) < lodWidth) &&
+                        ((y + ry) < lodHeight))
+                {
+                    // Sum across samples
+                    float resolveColor[4] = {0};
+                    for (uint32_t sampleNum = 0; sampleNum < pDstSurface->numSamples; sampleNum++)
+                    {
+                        float sampleColor[4] = {0};
+                        uint8_t *pSampleSrc = pSrc + sampleOffset * sampleNum;
+                        GetSwizzledSrcColor(pSampleSrc, rx, ry, sampleColor);
+                        resolveColor[0] += sampleColor[0];
+                        resolveColor[1] += sampleColor[1];
+                        resolveColor[2] += sampleColor[2];
+                        resolveColor[3] += sampleColor[3];
+                    }
+
+                    // Divide by numSamples to average
+                    resolveColor[0] *= oneOverNumSamples;
+                    resolveColor[1] *= oneOverNumSamples;
+                    resolveColor[2] *= oneOverNumSamples;
+                    resolveColor[3] *= oneOverNumSamples;
+
+                    // Use the resolve surface state
+                    SWR_SURFACE_STATE* pResolveSurface = (SWR_SURFACE_STATE*)pDstSurface->pAuxBaseAddress;
+                    uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>((x + rx), (y + ry),
+                        pResolveSurface->arrayIndex + renderTargetArrayIndex, pResolveSurface->arrayIndex + renderTargetArrayIndex,
+                        0, pResolveSurface->lod, pResolveSurface);
+                    {
+                        ConvertPixelFromFloat<DstFormat>(pDst, resolveColor);
+                    }
+                }
+            }
+        }
+    }
+
 };

 template<typename TTraits, SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>
@@ -2316,6 +2374,9 @@ struct StoreMacroTile
            pfnStore[sampleNum] = (bForceGeneric || KNOB_USE_GENERIC_STORETILE) ? StoreRasterTile<TTraits, SrcFormat, DstFormat>::Store : OptStoreRasterTile<TTraits, SrcFormat, DstFormat>::Store;
        }

+        // Save original for pSrcHotTile resolve.
+        uint8_t *pResolveSrcHotTile = pSrcHotTile;
+
        // Store each raster tile from the hot tile to the destination surface.
        for(uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
        {
@@ -2328,6 +2389,20 @@ struct StoreMacroTile
                }
            }
        }
+
+        if (pDstSurface->pAuxBaseAddress)
+        {
+            uint32_t sampleOffset = KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<SrcFormat>::bpp / 8);
+            // Store each raster tile from the hot tile to the destination surface.
+            for(uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
+            {
+                for(uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
+                {
+                    StoreRasterTile<TTraits, SrcFormat, DstFormat>::Resolve(pResolveSrcHotTile, pDstSurface, (x + col), (y + row), sampleOffset, renderTargetArrayIndex);
+                    pResolveSrcHotTile += sampleOffset * pDstSurface->numSamples;
+                }
+            }
+        }
    }
 };

--- a/src/gallium/drivers/swr/swr_context.cpp
+++ b/src/gallium/drivers/swr/swr_context.cpp
@@ -267,65 +267,6 @@ swr_resource_copy(struct pipe_context *pipe,
 }


-/* XXX: This resolve is incomplete and suboptimal. It will be removed once the
- * pipelined resolve blit works. */
-void
-swr_do_msaa_resolve(struct pipe_resource *src_resource,
-                    struct pipe_resource *dst_resource)
-{
-   /* This is a pretty dumb inline resolve.  It only supports 8-bit formats
-    * (ex RGBA8/BGRA8) - which are most common display formats anyway.
-    */
-
-   /* quick check for 8-bit and number of components */
-   uint8_t bits_per_component =
-      util_format_get_component_bits(src_resource->format,
-            UTIL_FORMAT_COLORSPACE_RGB, 0);
-
-   /* Unsupported resolve format */
-   assert(src_resource->format == dst_resource->format);
-   assert(bits_per_component == 8);
-   if ((src_resource->format != dst_resource->format) ||
-       (bits_per_component != 8)) {
-      return;
-   }
-
-   uint8_t src_num_comps = util_format_get_nr_components(src_resource->format);
-
-   SWR_SURFACE_STATE *src_surface = &swr_resource(src_resource)->swr;
-   SWR_SURFACE_STATE *dst_surface = &swr_resource(dst_resource)->swr;
-
-   uint32_t *src, *dst, offset;
-   uint32_t num_samples = src_surface->numSamples;
-   float recip_num_samples = 1.0f / num_samples;
-   for (uint32_t y = 0; y < src_surface->height; y++) {
-      for (uint32_t x = 0; x < src_surface->width; x++) {
-         float r = 0.0f;
-         float g = 0.0f;
-         float b = 0.0f;
-         float a = 0.0f;
-         for (uint32_t sampleNum = 0;  sampleNum < num_samples; sampleNum++) {
-            offset = ComputeSurfaceOffset<false>(x, y, 0, 0, sampleNum, 0, src_surface);
-            src = (uint32_t *) src_surface->pBaseAddress + offset/src_num_comps;
-            const uint32_t sample = *src;
-            r += (float)((sample >> 24) & 0xff) / 255.0f * recip_num_samples;
-            g += (float)((sample >> 16) & 0xff) / 255.0f * recip_num_samples;
-            b += (float)((sample >>  8) & 0xff) / 255.0f * recip_num_samples;
-            a += (float)((sample      ) & 0xff) / 255.0f * recip_num_samples;
-         }
-         uint32_t result = 0;
-         result  = ((uint8_t)(r * 255.0f) & 0xff) << 24;
-         result |= ((uint8_t)(g * 255.0f) & 0xff) << 16;
-         result |= ((uint8_t)(b * 255.0f) & 0xff) <<  8;
-         result |= ((uint8_t)(a * 255.0f) & 0xff);
-         offset = ComputeSurfaceOffset<false>(x, y, 0, 0, 0, 0, src_surface);
-         dst = (uint32_t *) dst_surface->pBaseAddress + offset/src_num_comps;
-         *dst = result;
-      }
-   }
-}
-
-
 static void
 swr_blit(struct pipe_context *pipe, const struct pipe_blit_info *blit_info)
 {
@@ -342,28 +283,14 @@ swr_blit(struct pipe_context *pipe, const struct pipe_blit_info *blit_info)
      debug_printf("swr_blit: color resolve : %d -> %d\n",
            info.src.resource->nr_samples, info.dst.resource->nr_samples);

-      /* Because the resolve is being done inline (not pipelined),
-       * resources need to be stored out of hottiles and the pipeline empty.
-       *
-       * Resources are marked unused following fence finish because all
-       * pipeline operations are complete.  Validation of the blit will mark
-       * them are read/write again.
-       */
+      /* Resolve is done as part of the surface store. */
      swr_store_dirty_resource(pipe, info.src.resource, SWR_TILE_RESOLVED);
-      swr_store_dirty_resource(pipe, info.dst.resource, SWR_TILE_RESOLVED);
-      swr_fence_finish(pipe->screen, NULL, swr_screen(pipe->screen)->flush_fence, 0);
-      swr_resource_unused(info.src.resource);
-      swr_resource_unused(info.dst.resource);

      struct pipe_resource *src_resource = info.src.resource;
      struct pipe_resource *resolve_target =
         swr_resource(src_resource)->resolve_target;

-      /* Inline resolve samples into resolve target resource, then continue
-       * the blit. */
-      swr_do_msaa_resolve(src_resource, resolve_target);
-
-      /* The resolve target becomes the new source for the blit.  */
+      /* The resolve target becomes the new source for the blit. */
      info.src.resource = resolve_target;
   }

--- a/src/gallium/drivers/swr/swr_screen.cpp
+++ b/src/gallium/drivers/swr/swr_screen.cpp
@@ -891,6 +891,10 @@ swr_create_resolve_resource(struct pipe_screen *_screen,

      /* Attach it to the multisample resource */
      msaa_res->resolve_target = alt;
+
+      /* Hang resolve surface state off the multisample surface state to so
+       * StoreTiles knows where to resolve the surface. */
+      msaa_res->swr.pAuxBaseAddress =  (uint8_t *)&swr_resource(alt)->swr;
   }

   return true; /* success */
@@ -1009,14 +1013,10 @@ swr_flush_frontbuffer(struct pipe_screen *p_screen,
      SwrEndFrame(swr_context(pipe)->swrContext);
   }

-   /* Multisample surfaces need to be resolved before present */
+   /* Multisample resolved into resolve_target at flush with store_resource */
   if (pipe && spr->swr.numSamples > 1) {
      struct pipe_resource *resolve_target = spr->resolve_target;

-      /* Do an inline surface resolve into the resolve target resource
-       * XXX: This works, just not optimal. Work on using a pipelined blit. */
-      swr_do_msaa_resolve(resource, resolve_target);
-
      /* Once resolved, copy into display target */
      SWR_SURFACE_STATE *resolve = &swr_resource(resolve_target)->swr;

--- a/src/gallium/drivers/vc4/vc4_resource.c
+++ b/src/gallium/drivers/vc4/vc4_resource.c
@@ -165,7 +165,8 @@ vc4_resource_transfer_map(struct pipe_context *pctx,
            prsc->width0 == box->width &&
            prsc->height0 == box->height &&
            prsc->depth0 == box->depth &&
-            prsc->array_size == 1) {
+            prsc->array_size == 1 &&
+            rsc->bo->private) {
                usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
        }

--- a/src/gallium/state_trackers/omx/Makefile.am
+++ b/src/gallium/state_trackers/omx/Makefile.am
@@ -27,6 +27,7 @@ AM_CFLAGS = \
 	$(GALLIUM_CFLAGS) \
 	$(VISIBILITY_CFLAGS) \
 	$(VL_CFLAGS) \
+	$(XCB_DRI3_CFLAGS) \
 	$(OMX_CFLAGS)

 noinst_LTLIBRARIES = libomxtracker.la
--- a/src/gallium/state_trackers/omx/entrypoint.c
+++ b/src/gallium/state_trackers/omx/entrypoint.c
@@ -35,7 +35,13 @@
 #include <string.h>
 #include <stdbool.h>

+#if defined(HAVE_X11_PLATFORM)
 #include <X11/Xlib.h>
+#else
+#define XOpenDisplay(x) NULL
+#define XCloseDisplay(x)
+#define Display void
+#endif

 #include "os/os_thread.h"
 #include "util/u_memory.h"
--- a/src/gallium/state_trackers/omx/vid_dec.h
+++ b/src/gallium/state_trackers/omx/vid_dec.h
@@ -34,8 +34,6 @@
 #ifndef OMX_VID_DEC_H
 #define OMX_VID_DEC_H

-#include <X11/Xlib.h>
-
 #include <string.h>

 #include <OMX_Types.h>
--- a/src/gallium/state_trackers/va/Makefile.am
+++ b/src/gallium/state_trackers/va/Makefile.am
@@ -27,6 +27,7 @@ AM_CFLAGS = \
 	$(GALLIUM_CFLAGS) \
 	$(VISIBILITY_CFLAGS) \
 	$(VL_CFLAGS) \
+	$(XCB_DRI3_CFLAGS) \
 	$(VA_CFLAGS) \
 	-DVA_DRIVER_INIT_FUNC="__vaDriverInit_$(VA_MAJOR)_$(VA_MINOR)"

--- a/src/gallium/state_trackers/va/context.c
+++ b/src/gallium/state_trackers/va/context.c
@@ -118,9 +118,7 @@ VA_DRIVER_INIT_FUNC(VADriverContextP ctx)
      return VA_STATUS_ERROR_UNIMPLEMENTED;
   case VA_DISPLAY_GLX:
   case VA_DISPLAY_X11:
-#if defined(HAVE_DRI3)
      drv->vscreen = vl_dri3_screen_create(ctx->native_dpy, ctx->x11_screen);
-#endif
      if (!drv->vscreen)
         drv->vscreen = vl_dri2_screen_create(ctx->native_dpy, ctx->x11_screen);
      if (!drv->vscreen)
@@ -139,8 +137,8 @@ VA_DRIVER_INIT_FUNC(VADriverContextP ctx)
      drv->vscreen = vl_drm_screen_create(drm_info->fd);
      if (!drv->vscreen)
         goto error_screen;
-      }
      break;
+   }
   default:
      FREE(drv);
      return VA_STATUS_ERROR_INVALID_DISPLAY;
--- a/src/gallium/state_trackers/vdpau/Makefile.am
+++ b/src/gallium/state_trackers/vdpau/Makefile.am
@@ -30,6 +30,7 @@ AM_CFLAGS = \
 	$(GALLIUM_CFLAGS) \
 	$(VISIBILITY_CFLAGS) \
 	$(VL_CFLAGS) \
+	$(XCB_DRI3_CFLAGS) \
 	$(VDPAU_CFLAGS)
 AM_CPPFLAGS = \
 	-I$(top_srcdir)/include \
--- a/src/gallium/state_trackers/vdpau/device.c
+++ b/src/gallium/state_trackers/vdpau/device.c
@@ -63,9 +63,7 @@ vdp_imp_device_create_x11(Display *display, int screen, VdpDevice *device,

   pipe_reference_init(&dev->reference, 1);

-#if defined(HAVE_DRI3)
   dev->vscreen = vl_dri3_screen_create(display, screen);
-#endif
   if (!dev->vscreen)
      dev->vscreen = vl_dri2_screen_create(display, screen);
   if (!dev->vscreen) {
--- a/src/gallium/state_trackers/xvmc/Makefile.am
+++ b/src/gallium/state_trackers/xvmc/Makefile.am
@@ -27,6 +27,7 @@ AM_CFLAGS = \
 	$(GALLIUM_CFLAGS) \
 	$(VISIBILITY_CFLAGS) \
 	$(VL_CFLAGS) \
+	$(XCB_DRI3_CFLAGS) \
 	$(XVMC_CFLAGS)

 noinst_LTLIBRARIES = libxvmctracker.la
@@ -45,7 +46,7 @@ noinst_PROGRAMS = \

 noinst_HEADERS = tests/testlib.h

-TEST_LIBS = $(XVMC_LIBS) -lXvMCW $(VL_LIBS)
+TEST_LIBS = $(XVMC_LIBS) -lXvMCW $(VL_LIBS) $(XCB_DRI3_LIBS)
 tests_test_context_SOURCES = tests/test_context.c tests/testlib.c
 tests_test_context_LDADD = $(TEST_LIBS)
 tests_test_surface_SOURCES = tests/test_surface.c tests/testlib.c
--- a/src/gallium/targets/dri/Android.mk
+++ b/src/gallium/targets/dri/Android.mk
@@ -35,7 +35,8 @@ LOCAL_CFLAGS :=
 LOCAL_SHARED_LIBRARIES := \
 	libdl \
 	libglapi \
-	libexpat
+	libexpat \
+	libz

 ifneq ($(filter freedreno,$(MESA_GPU_DRIVERS)),)
 LOCAL_CFLAGS += -DGALLIUM_FREEDRENO
--- a/src/gallium/targets/omx/Makefile.am
+++ b/src/gallium/targets/omx/Makefile.am
@@ -29,10 +29,15 @@ libomx_mesa_la_LIBADD = \
 	$(top_builddir)/src/gallium/auxiliary/libgallium.la \
 	$(top_builddir)/src/util/libmesautil.la \
 	$(OMX_LIBS) \
-	$(VL_LIBS) \
 	$(LIBDRM_LIBS) \
 	$(GALLIUM_COMMON_LIB_DEPS)

+if HAVE_PLATFORM_X11
+libomx_mesa_la_LIBADD += \
+	$(VL_LIBS) \
+	$(XCB_DRI3_LIBS)
+endif
+
 EXTRA_libomx_mesa_la_DEPENDENCIES = omx.sym
 EXTRA_DIST = omx.sym

--- a/src/gallium/targets/va/Makefile.am
+++ b/src/gallium/targets/va/Makefile.am
@@ -29,10 +29,15 @@ gallium_drv_video_la_LIBADD = \
 	$(top_builddir)/src/gallium/auxiliary/libgalliumvl.la \
 	$(top_builddir)/src/gallium/auxiliary/libgallium.la \
 	$(top_builddir)/src/util/libmesautil.la \
-	$(VL_LIBS) \
 	$(LIBDRM_LIBS) \
 	$(GALLIUM_COMMON_LIB_DEPS)

+if HAVE_PLATFORM_X11
+gallium_drv_video_la_LIBADD += \
+	$(VL_LIBS) \
+	$(XCB_DRI3_LIBS)
+endif
+
 EXTRA_gallium_drv_video_la_DEPENDENCIES = va.sym
 EXTRA_DIST = va.sym

--- a/src/gallium/targets/vdpau/Makefile.am
+++ b/src/gallium/targets/vdpau/Makefile.am
@@ -35,6 +35,7 @@ libvdpau_gallium_la_LIBADD = \
 	$(top_builddir)/src/gallium/auxiliary/libgallium.la \
 	$(top_builddir)/src/util/libmesautil.la \
 	$(VL_LIBS) \
+	$(XCB_DRI3_LIBS) \
 	$(LIBDRM_LIBS) \
 	$(GALLIUM_COMMON_LIB_DEPS)

--- a/src/gallium/targets/xvmc/Makefile.am
+++ b/src/gallium/targets/xvmc/Makefile.am
@@ -30,6 +30,7 @@ libXvMCgallium_la_LIBADD = \
 	$(top_builddir)/src/util/libmesautil.la \
 	$(XVMC_LIBS) \
 	$(VL_LIBS) \
+	$(XCB_DRI3_LIBS) \
 	$(LIBDRM_LIBS) \
 	$(GALLIUM_COMMON_LIB_DEPS)

--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -315,6 +315,10 @@ static bool do_winsys_init(struct amdgpu_winsys *ws, int fd)
      ws->family = FAMILY_AI;
      ws->rev_id = AI_VEGA10_P_A0;
      break;
+   case CHIP_RAVEN:
+      ws->family = FAMILY_RV;
+      ws->rev_id = RAVEN_A0;
+      break;
   default:
      fprintf(stderr, "amdgpu: Unknown family.\n");
      goto fail;
--- a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
+++ b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
@@ -258,7 +258,12 @@ virgl_bo_transfer_put(struct virgl_winsys *vws,

   memset(&tohostcmd, 0, sizeof(tohostcmd));
   tohostcmd.bo_handle = res->bo_handle;
-   tohostcmd.box = *(struct drm_virtgpu_3d_box *)box;
+   tohostcmd.box.x = box->x;
+   tohostcmd.box.y = box->y;
+   tohostcmd.box.z = box->z;
+   tohostcmd.box.w = box->width;
+   tohostcmd.box.h = box->height;
+   tohostcmd.box.d = box->depth;
   tohostcmd.offset = buf_offset;
   tohostcmd.level = level;
  // tohostcmd.stride = stride;
@@ -282,7 +287,12 @@ virgl_bo_transfer_get(struct virgl_winsys *vws,
   fromhostcmd.offset = buf_offset;
  // fromhostcmd.stride = stride;
  // fromhostcmd.layer_stride = layer_stride;
-   fromhostcmd.box = *(struct drm_virtgpu_3d_box *)box;
+   fromhostcmd.box.x = box->x;
+   fromhostcmd.box.y = box->y;
+   fromhostcmd.box.z = box->z;
+   fromhostcmd.box.w = box->width;
+   fromhostcmd.box.h = box->height;
+   fromhostcmd.box.d = box->depth;
   return drmIoctl(vdws->fd, DRM_IOCTL_VIRTGPU_TRANSFER_FROM_HOST, &fromhostcmd);
 }

--- a/src/gbm/Makefile.am
+++ b/src/gbm/Makefile.am
@@ -28,7 +28,6 @@ libgbm_la_LIBADD = \
 	$(DLOPEN_LIBS)

 if HAVE_PLATFORM_WAYLAND
-AM_CPPFLAGS = -DHAVE_WAYLAND_PLATFORM
 AM_CFLAGS += $(WAYLAND_CFLAGS)
 libgbm_la_LIBADD += $(top_builddir)/src/egl/wayland/wayland-drm/libwayland-drm.la $(WAYLAND_LIBS)
 endif
--- a/src/gbm/backends/dri/gbm_dri.c
+++ b/src/gbm/backends/dri/gbm_dri.c
@@ -772,7 +772,7 @@ gbm_dri_bo_get_modifier(struct gbm_bo *_bo)
                               &mod))
      return DRM_FORMAT_MOD_INVALID;

-   ret |= mod;
+   ret |= (uint64_t)(mod & 0xffffffff);

   return ret;
 }
--- a/src/glx/g_glxglvnddispatchfuncs.c
+++ b/src/glx/g_glxglvnddispatchfuncs.c
@@ -4,6 +4,7 @@
 */
 #include <stdlib.h>

+#include "glxclient.h"
 #include "glxglvnd.h"
 #include "glxglvnddispatchfuncs.h"
 #include "g_glxglvnddispatchindices.h"
@@ -50,6 +51,7 @@ const char * const __glXDispatchTableStrings[DI_LAST_INDEX] = {
    __ATTRIB(GetCurrentDisplayEXT),
    // glXGetCurrentDrawable implemented by libglvnd
    // glXGetCurrentReadDrawable implemented by libglvnd
+    __ATTRIB(GetDriverConfig),
    // glXGetFBConfigAttrib implemented by libglvnd
    __ATTRIB(GetFBConfigAttribSGIX),
    __ATTRIB(GetFBConfigFromVisualSGIX),
@@ -334,6 +336,17 @@ static Display *dispatch_GetCurrentDisplayEXT(void)



+static const char *dispatch_GetDriverConfig(const char *driverName)
+{
+    /*
+     * The options are constant for a given driverName, so we do not need
+     * a context (and apps expect to be able to call this without one).
+     */
+    return glXGetDriverConfig(driverName);
+}
+
+
+
 static int dispatch_GetFBConfigAttribSGIX(Display *dpy, GLXFBConfigSGIX config,
                                          int attribute, int *value_return)
 {
@@ -939,6 +952,7 @@ const void * const __glXDispatchFunctions[DI_LAST_INDEX + 1] = {
    __ATTRIB(DestroyGLXPbufferSGIX),
    __ATTRIB(GetContextIDEXT),
    __ATTRIB(GetCurrentDisplayEXT),
+    __ATTRIB(GetDriverConfig),
    __ATTRIB(GetFBConfigAttribSGIX),
    __ATTRIB(GetFBConfigFromVisualSGIX),
    __ATTRIB(GetMscRateOML),
--- a/src/glx/g_glxglvnddispatchindices.h
+++ b/src/glx/g_glxglvnddispatchindices.h
@@ -39,6 +39,7 @@ typedef enum __GLXdispatchIndex {
    DI_GetCurrentDisplayEXT,
    // GetCurrentDrawable implemented by libglvnd
    // GetCurrentReadDrawable implemented by libglvnd
+    DI_GetDriverConfig,
    // GetFBConfigAttrib implemented by libglvnd
    DI_GetFBConfigAttribSGIX,
    DI_GetFBConfigFromVisualSGIX,
--- a/src/intel/Android.vulkan.mk
+++ b/src/intel/Android.vulkan.mk
@@ -231,7 +231,7 @@ LOCAL_WHOLE_STATIC_LIBRARIES := \
 	libmesa_intel_compiler \
 	libmesa_anv_entrypoints

-LOCAL_SHARED_LIBRARIES := libdrm
+LOCAL_SHARED_LIBRARIES := libdrm libz

 include $(MESA_COMMON_MK)
 include $(BUILD_SHARED_LIBRARY)
--- a/src/intel/Makefile.vulkan.am
+++ b/src/intel/Makefile.vulkan.am
@@ -111,7 +111,21 @@ VULKAN_SOURCES = \
 	$(VULKAN_GENERATED_FILES) \
 	$(VULKAN_FILES)

-VULKAN_LIB_DEPS = $(LIBDRM_LIBS)
+VULKAN_LIB_DEPS = \
+	vulkan/libvulkan_common.la \
+	$(VULKAN_PER_GEN_LIBS) \
+	compiler/libintel_compiler.la \
+	common/libintel_common.la \
+	isl/libisl.la \
+	blorp/libblorp.la \
+	$(top_builddir)/src/vulkan/libvulkan_util.la \
+	$(top_builddir)/src/vulkan/libvulkan_wsi.la \
+	$(top_builddir)/src/compiler/nir/libnir.la \
+	$(top_builddir)/src/util/libmesautil.la \
+	$(LIBDRM_LIBS) \
+	$(PTHREAD_LIBS) \
+	$(DLOPEN_LIBS) \
+	-lm

 if HAVE_PLATFORM_X11
 VULKAN_CPPFLAGS += \
@@ -121,8 +135,7 @@ VULKAN_CPPFLAGS += \

 VULKAN_SOURCES += $(VULKAN_WSI_X11_FILES)

-# FIXME: Use pkg-config for X11-xcb ldflags.
-VULKAN_LIB_DEPS += $(XCB_DRI3_LIBS) -lX11-xcb
+VULKAN_LIB_DEPS += $(XCB_DRI3_LIBS)
 endif


@@ -141,21 +154,6 @@ vulkan_libvulkan_common_la_SOURCES = $(VULKAN_SOURCES)
 vulkan_libvulkan_common_la_CFLAGS = $(VULKAN_CFLAGS)
 vulkan_libvulkan_common_la_CPPFLAGS = $(VULKAN_CPPFLAGS)

-VULKAN_LIB_DEPS += \
-	vulkan/libvulkan_common.la \
-	$(VULKAN_PER_GEN_LIBS) \
-	compiler/libintel_compiler.la \
-	common/libintel_common.la \
-	isl/libisl.la \
-	blorp/libblorp.la \
-	$(top_builddir)/src/vulkan/libvulkan_util.la \
-	$(top_builddir)/src/vulkan/libvulkan_wsi.la \
-	$(top_builddir)/src/compiler/nir/libnir.la \
-	$(top_builddir)/src/util/libmesautil.la \
-	$(PTHREAD_LIBS) \
-	$(DLOPEN_LIBS) \
-	-lm
-
 nodist_EXTRA_vulkan_libvulkan_intel_la_SOURCES = dummy.cpp
 vulkan_libvulkan_intel_la_SOURCES = $(VULKAN_GEM_FILES)
 vulkan_libvulkan_intel_la_LIBADD = $(VULKAN_LIB_DEPS)
--- a/src/intel/compiler/brw_vec4.cpp
+++ b/src/intel/compiler/brw_vec4.cpp
@@ -583,16 +583,46 @@ vec4_visitor::split_uniform_registers()
   }
 }

+/* This function returns the register number where we placed the uniform */
+static int
+set_push_constant_loc(const int nr_uniforms, int *new_uniform_count,
+                      const int src, const int size, const int channel_size,
+                      int *new_loc, int *new_chan,
+                      int *new_chans_used)
+{
+   int dst;
+   /* Find the lowest place we can slot this uniform in. */
+   for (dst = 0; dst < nr_uniforms; dst++) {
+      if (ALIGN(new_chans_used[dst], channel_size) + size <= 4)
+         break;
+   }
+
+   assert(dst < nr_uniforms);
+
+   new_loc[src] = dst;
+   new_chan[src] = ALIGN(new_chans_used[dst], channel_size);
+   new_chans_used[dst] = ALIGN(new_chans_used[dst], channel_size) + size;
+
+   *new_uniform_count = MAX2(*new_uniform_count, dst + 1);
+   return dst;
+}
+
 void
 vec4_visitor::pack_uniform_registers()
 {
   uint8_t chans_used[this->uniforms];
   int new_loc[this->uniforms];
   int new_chan[this->uniforms];
+   bool is_aligned_to_dvec4[this->uniforms];
+   int new_chans_used[this->uniforms];
+   int channel_sizes[this->uniforms];

   memset(chans_used, 0, sizeof(chans_used));
   memset(new_loc, 0, sizeof(new_loc));
   memset(new_chan, 0, sizeof(new_chan));
+   memset(new_chans_used, 0, sizeof(new_chans_used));
+   memset(is_aligned_to_dvec4, 0, sizeof(is_aligned_to_dvec4));
+   memset(channel_sizes, 0, sizeof(channel_sizes));

   /* Find which uniform vectors are actually used by the program.  We
    * expect unused vector elements when we've moved array access out
@@ -622,7 +652,7 @@ vec4_visitor::pack_uniform_registers()
            continue;

         assert(type_sz(inst->src[i].type) % 4 == 0);
-         unsigned channel_size = type_sz(inst->src[i].type) / 4;
+         int channel_size = type_sz(inst->src[i].type) / 4;

         int reg = inst->src[i].nr;
         for (int c = 0; c < 4; c++) {
@@ -631,10 +661,15 @@ vec4_visitor::pack_uniform_registers()

            unsigned channel = BRW_GET_SWZ(inst->src[i].swizzle, c) + 1;
            unsigned used = MAX2(chans_used[reg], channel * channel_size);
-            if (used <= 4)
+            if (used <= 4) {
               chans_used[reg] = used;
-            else
+               channel_sizes[reg] = MAX2(channel_sizes[reg], channel_size);
+            } else {
+               is_aligned_to_dvec4[reg] = true;
+               is_aligned_to_dvec4[reg + 1] = true;
               chans_used[reg + 1] = used - 4;
+               channel_sizes[reg + 1] = MAX2(channel_sizes[reg + 1], channel_size);
+            }
         }
      }

@@ -659,42 +694,60 @@ vec4_visitor::pack_uniform_registers()

   int new_uniform_count = 0;

+   /* As the uniforms are going to be reordered, take the data from a temporary
+    * copy of the original param[].
+    */
+   gl_constant_value **param = ralloc_array(NULL, gl_constant_value*,
+                                            stage_prog_data->nr_params);
+   memcpy(param, stage_prog_data->param,
+          sizeof(gl_constant_value*) * stage_prog_data->nr_params);
+
   /* Now, figure out a packing of the live uniform vectors into our
-    * push constants.
+    * push constants. Start with dvec{3,4} because they are aligned to
+    * dvec4 size (2 vec4).
    */
   for (int src = 0; src < uniforms; src++) {
      int size = chans_used[src];

-      if (size == 0)
+      if (size == 0 || !is_aligned_to_dvec4[src])
         continue;

-      int dst;
-      /* Find the lowest place we can slot this uniform in. */
-      for (dst = 0; dst < src; dst++) {
-         if (chans_used[dst] + size <= 4)
-            break;
+      /* dvec3 are aligned to dvec4 size, apply the alignment of the size
+       * to 4 to avoid moving last component of a dvec3 to the available
+       * location at the end of a previous dvec3. These available locations
+       * could be filled by smaller variables in next loop.
+       */
+      size = ALIGN(size, 4);
+      int dst = set_push_constant_loc(uniforms, &new_uniform_count,
+                                      src, size, channel_sizes[src],
+                                      new_loc, new_chan,
+                                      new_chans_used);
+      /* Move the references to the data */
+      for (int j = 0; j < size; j++) {
+         stage_prog_data->param[dst * 4 + new_chan[src] + j] =
+            param[src * 4 + j];
      }
-
-      if (src == dst) {
-         new_loc[src] = dst;
-         new_chan[src] = 0;
-      } else {
-         new_loc[src] = dst;
-         new_chan[src] = chans_used[dst];
-
-         /* Move the references to the data */
-         for (int j = 0; j < size; j++) {
-            stage_prog_data->param[dst * 4 + new_chan[src] + j] =
-               stage_prog_data->param[src * 4 + j];
-         }
-
-         chans_used[dst] += size;
-         chans_used[src] = 0;
-      }
-
-      new_uniform_count = MAX2(new_uniform_count, dst + 1);
   }

+   /* Continue with the rest of data, which is aligned to vec4. */
+   for (int src = 0; src < uniforms; src++) {
+      int size = chans_used[src];
+
+      if (size == 0 || is_aligned_to_dvec4[src])
+         continue;
+
+      int dst = set_push_constant_loc(uniforms, &new_uniform_count,
+                                      src, size, channel_sizes[src],
+                                      new_loc, new_chan,
+                                      new_chans_used);
+      /* Move the references to the data */
+      for (int j = 0; j < size; j++) {
+         stage_prog_data->param[dst * 4 + new_chan[src] + j] =
+            param[src * 4 + j];
+      }
+   }
+
+   ralloc_free(param);
   this->uniforms = new_uniform_count;

   /* Now, update the instructions for our repacked uniforms. */
@@ -705,9 +758,9 @@ vec4_visitor::pack_uniform_registers()
         if (inst->src[i].file != UNIFORM)
            continue;

+         int chan = new_chan[src] / channel_sizes[src];
         inst->src[i].nr = new_loc[src];
-         inst->src[i].swizzle += BRW_SWIZZLE4(new_chan[src], new_chan[src],
-                                              new_chan[src], new_chan[src]);
+         inst->src[i].swizzle += BRW_SWIZZLE4(chan, chan, chan, chan);
      }
   }
 }
--- a/src/intel/compiler/brw_vec4_gs_visitor.cpp
+++ b/src/intel/compiler/brw_vec4_gs_visitor.cpp
@@ -868,10 +868,36 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,

         vec4_gs_visitor v(compiler, log_data, &c, prog_data, shader,
                           mem_ctx, true /* no_spills */, shader_time_index);
+
+         /* Backup 'nr_params' and 'param' as they can be modified by the
+          * the DUAL_OBJECT visitor. If it fails, we will run the fallback
+          * (DUAL_INSTANCED or SINGLE mode) and we need to restore original
+          * values.
+          */
+         const unsigned param_count = prog_data->base.base.nr_params;
+         gl_constant_value **param = ralloc_array(NULL, gl_constant_value*,
+                                                  param_count);
+         memcpy(param, prog_data->base.base.param,
+                sizeof(gl_constant_value*) * param_count);
+
         if (v.run()) {
+            /* Success! Backup is not needed */
+            ralloc_free(param);
            return brw_vec4_generate_assembly(compiler, log_data, mem_ctx,
                                              shader, &prog_data->base, v.cfg,
                                              final_assembly_size);
+         } else {
+            /* These variables could be modified by the execution of the GS
+             * visitor if it packed the uniforms in the push constant buffer.
+             * As it failed, we need restore them so we can start again with
+             * DUAL_INSTANCED or SINGLE mode.
+             *
+             * FIXME: Could more variables be modified by this execution?
+             */
+            memcpy(prog_data->base.base.param, param,
+                   sizeof(gl_constant_value*) * param_count);
+            prog_data->base.base.nr_params = param_count;
+            ralloc_free(param);
         }
      }
   }
--- a/src/intel/compiler/brw_vec4_nir.cpp
+++ b/src/intel/compiler/brw_vec4_nir.cpp
@@ -852,7 +852,8 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
       * The swizzle also works in the indirect case as the generator adds
       * the swizzle to the offset for us.
       */
-      unsigned shift = (nir_intrinsic_base(instr) % 16) / 4;
+      const int type_size = type_sz(src.type);
+      unsigned shift = (nir_intrinsic_base(instr) % 16) / type_size;
      assert(shift + instr->num_components <= 4);

      nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
@@ -860,14 +861,20 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
         /* Offsets are in bytes but they should always be multiples of 4 */
         assert(const_offset->u32[0] % 4 == 0);

-         unsigned offset = const_offset->u32[0] + shift * 4;
+         src.swizzle = brw_swizzle_for_size(instr->num_components);
+         dest.writemask = brw_writemask_for_size(instr->num_components);
+         unsigned offset = const_offset->u32[0] + shift * type_size;
         src.offset = ROUND_DOWN_TO(offset, 16);
-         shift = (offset % 16) / 4;
+         shift = (offset % 16) / type_size;
+         assert(shift + instr->num_components <= 4);
         src.swizzle += BRW_SWIZZLE4(shift, shift, shift, shift);

         emit(MOV(dest, src));
      } else {
-         src.swizzle += BRW_SWIZZLE4(shift, shift, shift, shift);
+         /* Uniform arrays are vec4 aligned, because of std140 alignment
+          * rules.
+          */
+         assert(shift == 0);

         src_reg indirect = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_UD, 1);

--- a/src/intel/isl/isl_gen7.c
+++ b/src/intel/isl/isl_gen7.c
@@ -352,30 +352,12 @@ gen7_choose_valign_el(const struct isl_device *dev,
   if (isl_surf_usage_is_stencil(info->usage)) {
      /* The Ivybridge PRM states that the stencil buffer's vertical alignment
       * is 8 [Ivybridge PRM, Volume 1, Part 1, Section 6.18.4.4 Alignment
-       * Unit Size]. However, valign=8 is outside the set of valid values of
-       * RENDER_SURFACE_STATE.SurfaceVerticalAlignment, which is VALIGN_2
-       * (0x0) and VALIGN_4 (0x1).
-       *
-       * The PRM is generally confused about the width, height, and alignment
-       * of the stencil buffer; and this confusion appears elsewhere. For
-       * example, the following PRM text effectively converts the stencil
-       * buffer's 8-pixel alignment to a 4-pixel alignment [Ivybridge PRM,
-       * Volume 1, Part 1, Section
-       * 6.18.4.2 Base Address and LOD Calculation]:
-       *
-       *    For separate stencil buffer, the width must be mutiplied by 2 and
-       *    height divided by 2 as follows:
-       *
-       *       w_L = 2*i*ceil(W_L/i)
-       *       h_L = 1/2*j*ceil(H_L/j)
-       *
-       * The root of the confusion is that, in W tiling, each pair of rows is
-       * interleaved into one.
-       *
-       * FINISHME(chadv): Decide to set valign=4 or valign=8 after isl's API
-       * is more polished.
+       * Unit Size]. valign=8 is outside the set of valid values of
+       * RENDER_SURFACE_STATE.SurfaceVerticalAlignment, but that's ok because
+       * a stencil buffer will never be used directly for texturing or
+       * rendering on gen7.
       */
-      require_valign4 = true;
+      return 8;
   }

   assert(!require_valign2 || !require_valign4);
--- a/src/intel/vulkan/anv_allocator.c
+++ b/src/intel/vulkan/anv_allocator.c
@@ -504,6 +504,9 @@ anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state)
   anv_bo_init(&pool->bo, gem_handle, size);
   pool->bo.map = map;

+   if (pool->device->instance->physicalDevice.has_exec_async)
+      pool->bo.flags |= EXEC_OBJECT_ASYNC;
+
 done:
   pthread_mutex_unlock(&pool->device->mutex);

@@ -881,6 +884,12 @@ anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo, uint32_t size)
   if (result != VK_SUCCESS)
      return result;

+   if (pool->device->instance->physicalDevice.supports_48bit_addresses)
+      new_bo.flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+
+   if (pool->device->instance->physicalDevice.has_exec_async)
+      new_bo.flags |= EXEC_OBJECT_ASYNC;
+
   assert(new_bo.size == pow2_size);

   new_bo.map = anv_gem_mmap(pool->device, new_bo.gem_handle, 0, pow2_size, 0);
@@ -1010,7 +1019,10 @@ anv_scratch_pool_alloc(struct anv_device *device, struct anv_scratch_pool *pool,
    *
    * so nothing will ever touch the top page.
    */
-   bo->bo.flags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+   assert(!(bo->bo.flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS));
+
+   if (device->instance->physicalDevice.has_exec_async)
+      bo->bo.flags |= EXEC_OBJECT_ASYNC;

   /* Set the exists last because it may be read by other threads */
   __sync_synchronize();
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -1373,6 +1373,73 @@ void anv_CmdResolveImage(
   blorp_batch_finish(&batch);
 }

+void
+anv_image_ccs_clear(struct anv_cmd_buffer *cmd_buffer,
+                    const struct anv_image *image,
+                    const struct isl_view *view,
+                    const VkImageSubresourceRange *subresourceRange)
+{
+   assert(image->type == VK_IMAGE_TYPE_3D || image->extent.depth == 1);
+
+   struct blorp_batch batch;
+   blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
+
+   struct blorp_surf surf;
+   get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
+                                image->aux_usage, &surf);
+
+   /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
+    *
+    *    "After Render target fast clear, pipe-control with color cache
+    *    write-flush must be issued before sending any DRAW commands on
+    *    that render target."
+    *
+    * This comment is a bit cryptic and doesn't really tell you what's going
+    * or what's really needed.  It appears that fast clear ops are not
+    * properly synchronized with other drawing.  This means that we cannot
+    * have a fast clear operation in the pipe at the same time as other
+    * regular drawing operations.  We need to use a PIPE_CONTROL to ensure
+    * that the contents of the previous draw hit the render target before we
+    * resolve and then use a second PIPE_CONTROL after the resolve to ensure
+    * that it is completed before any additional drawing occurs.
+    */
+   cmd_buffer->state.pending_pipe_bits |=
+      ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
+
+   const uint32_t level_count =
+      view ? view->levels : anv_get_levelCount(image, subresourceRange);
+   for (uint32_t l = 0; l < level_count; l++) {
+      const uint32_t level =
+         (view ? view->base_level : subresourceRange->baseMipLevel) + l;
+
+      const VkExtent3D extent = {
+         .width = anv_minify(image->extent.width, level),
+         .height = anv_minify(image->extent.height, level),
+         .depth = anv_minify(image->extent.depth, level),
+      };
+
+      /* Blorp likes to treat 2D_ARRAY and 3D the same. */
+      uint32_t blorp_base_layer, blorp_layer_count;
+      if (view) {
+         blorp_base_layer = view->base_array_layer;
+         blorp_layer_count = view->array_len;
+      } else if (image->type == VK_IMAGE_TYPE_3D) {
+         blorp_base_layer = 0;
+         blorp_layer_count = extent.depth;
+      } else {
+         blorp_base_layer = subresourceRange->baseArrayLayer;
+         blorp_layer_count = anv_get_layerCount(image, subresourceRange);
+      }
+
+      blorp_fast_clear(&batch, &surf, surf.surf->format,
+                       level, blorp_base_layer, blorp_layer_count,
+                       0, 0, extent.width, extent.height);
+   }
+
+   cmd_buffer->state.pending_pipe_bits |=
+      ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
+}
+
 static void
 ccs_resolve_attachment(struct anv_cmd_buffer *cmd_buffer,
                       struct blorp_batch *batch,
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -97,6 +97,113 @@ anv_compute_heap_size(int fd, uint64_t *heap_size)
   return VK_SUCCESS;
 }

+static VkResult
+anv_physical_device_init_heaps(struct anv_physical_device *device, int fd)
+{
+   /* The kernel query only tells us whether or not the kernel supports the
+    * EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag and not whether or not the
+    * hardware has actual 48bit address support.
+    */
+   device->supports_48bit_addresses =
+      (device->info.gen >= 8) && anv_gem_supports_48b_addresses(fd);
+
+   uint64_t heap_size;
+   VkResult result = anv_compute_heap_size(fd, &heap_size);
+   if (result != VK_SUCCESS)
+      return result;
+
+   if (heap_size <= 3ull * (1ull << 30)) {
+      /* In this case, everything fits nicely into the 32-bit address space,
+       * so there's no need for supporting 48bit addresses on client-allocated
+       * memory objects.
+       */
+      device->memory.heap_count = 1;
+      device->memory.heaps[0] = (struct anv_memory_heap) {
+         .size = heap_size,
+         .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
+         .supports_48bit_addresses = false,
+      };
+   } else {
+      /* Not everything will fit nicely into a 32-bit address space.  In this
+       * case we need a 64-bit heap.  Advertise a small 32-bit heap and a
+       * larger 48-bit heap.  If we're in this case, then we have a total heap
+       * size larger than 3GiB which most likely means they have 8 GiB of
+       * video memory and so carving off 1 GiB for the 32-bit heap should be
+       * reasonable.
+       */
+      const uint64_t heap_size_32bit = 1ull << 30;
+      const uint64_t heap_size_48bit = heap_size - heap_size_32bit;
+
+      assert(device->supports_48bit_addresses);
+
+      device->memory.heap_count = 2;
+      device->memory.heaps[0] = (struct anv_memory_heap) {
+         .size = heap_size_48bit,
+         .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
+         .supports_48bit_addresses = true,
+      };
+      device->memory.heaps[1] = (struct anv_memory_heap) {
+         .size = heap_size_32bit,
+         .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
+         .supports_48bit_addresses = false,
+      };
+   }
+
+   uint32_t type_count = 0;
+   for (uint32_t heap = 0; heap < device->memory.heap_count; heap++) {
+      uint32_t valid_buffer_usage = ~0;
+
+      /* There appears to be a hardware issue in the VF cache where it only
+       * considers the bottom 32 bits of memory addresses.  If you happen to
+       * have two vertex buffers which get placed exactly 4 GiB apart and use
+       * them in back-to-back draw calls, you can get collisions.  In order to
+       * solve this problem, we require vertex and index buffers be bound to
+       * memory allocated out of the 32-bit heap.
+       */
+      if (device->memory.heaps[heap].supports_48bit_addresses) {
+         valid_buffer_usage &= ~(VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
+                                 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
+      }
+
+      if (device->info.has_llc) {
+         /* Big core GPUs share LLC with the CPU and thus one memory type can be
+          * both cached and coherent at the same time.
+          */
+         device->memory.types[type_count++] = (struct anv_memory_type) {
+            .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+                             VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+                             VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
+                             VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
+            .heapIndex = heap,
+            .valid_buffer_usage = valid_buffer_usage,
+         };
+      } else {
+         /* The spec requires that we expose a host-visible, coherent memory
+          * type, but Atom GPUs don't share LLC. Thus we offer two memory types
+          * to give the application a choice between cached, but not coherent and
+          * coherent but uncached (WC though).
+          */
+         device->memory.types[type_count++] = (struct anv_memory_type) {
+            .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+                             VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+                             VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+            .heapIndex = heap,
+            .valid_buffer_usage = valid_buffer_usage,
+         };
+         device->memory.types[type_count++] = (struct anv_memory_type) {
+            .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+                             VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+                             VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
+            .heapIndex = heap,
+            .valid_buffer_usage = valid_buffer_usage,
+         };
+      }
+   }
+   device->memory.type_count = type_count;
+
+   return VK_SUCCESS;
+}
+
 static bool
 anv_device_get_cache_uuid(void *uuid, uint16_t pci_id)
 {
@@ -196,12 +303,12 @@ anv_physical_device_init(struct anv_physical_device *device,
      goto fail;
   }

-   device->supports_48bit_addresses = anv_gem_supports_48b_addresses(fd);
-
-   result = anv_compute_heap_size(fd, &device->heap_size);
+   result = anv_physical_device_init_heaps(device, fd);
   if (result != VK_SUCCESS)
      goto fail;

+   device->has_exec_async = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_ASYNC);
+
   if (!anv_device_get_cache_uuid(device->uuid, device->chipset_id)) {
      result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
                         "cannot generate UUID");
@@ -452,7 +559,7 @@ anv_enumerate_devices(struct anv_instance *instance)

   instance->physicalDeviceCount = 0;

-   max_devices = drmGetDevices2(0, devices, sizeof(devices));
+   max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
   if (max_devices < 1)
      return VK_ERROR_INCOMPATIBLE_DRIVER;

@@ -468,6 +575,7 @@ anv_enumerate_devices(struct anv_instance *instance)
            break;
      }
   }
+   drmFreeDevices(devices, max_devices);

   if (result == VK_SUCCESS)
      instance->physicalDeviceCount = 1;
@@ -787,44 +895,21 @@ void anv_GetPhysicalDeviceMemoryProperties(
 {
   ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);

-   if (physical_device->info.has_llc) {
-      /* Big core GPUs share LLC with the CPU and thus one memory type can be
-       * both cached and coherent at the same time.
-       */
-      pMemoryProperties->memoryTypeCount = 1;
-      pMemoryProperties->memoryTypes[0] = (VkMemoryType) {
-         .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
-                          VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
-                          VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
-                          VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
-         .heapIndex = 0,
-      };
-   } else {
-      /* The spec requires that we expose a host-visible, coherent memory
-       * type, but Atom GPUs don't share LLC. Thus we offer two memory types
-       * to give the application a choice between cached, but not coherent and
-       * coherent but uncached (WC though).
-       */
-      pMemoryProperties->memoryTypeCount = 2;
-      pMemoryProperties->memoryTypes[0] = (VkMemoryType) {
-         .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
-                          VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
-                          VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
-         .heapIndex = 0,
-      };
-      pMemoryProperties->memoryTypes[1] = (VkMemoryType) {
-         .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
-                          VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
-                          VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
-         .heapIndex = 0,
+   pMemoryProperties->memoryTypeCount = physical_device->memory.type_count;
+   for (uint32_t i = 0; i < physical_device->memory.type_count; i++) {
+      pMemoryProperties->memoryTypes[i] = (VkMemoryType) {
+         .propertyFlags = physical_device->memory.types[i].propertyFlags,
+         .heapIndex     = physical_device->memory.types[i].heapIndex,
      };
   }

-   pMemoryProperties->memoryHeapCount = 1;
-   pMemoryProperties->memoryHeaps[0] = (VkMemoryHeap) {
-      .size = physical_device->heap_size,
-      .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
-   };
+   pMemoryProperties->memoryHeapCount = physical_device->memory.heap_count;
+   for (uint32_t i = 0; i < physical_device->memory.heap_count; i++) {
+      pMemoryProperties->memoryHeaps[i] = (VkMemoryHeap) {
+         .size    = physical_device->memory.heaps[i].size,
+         .flags   = physical_device->memory.heaps[i].flags,
+      };
+   }
 }

 void anv_GetPhysicalDeviceMemoryProperties2KHR(
@@ -1524,9 +1609,6 @@ anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size)

   anv_bo_init(bo, gem_handle, size);

-   if (device->instance->physicalDevice.supports_48bit_addresses)
-      bo->flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
-
   return VK_SUCCESS;
 }

@@ -1537,6 +1619,7 @@ VkResult anv_AllocateMemory(
    VkDeviceMemory*                             pMem)
 {
   ANV_FROM_HANDLE(anv_device, device, _device);
+   struct anv_physical_device *pdevice = &device->instance->physicalDevice;
   struct anv_device_memory *mem;
   VkResult result;

@@ -1545,10 +1628,6 @@ VkResult anv_AllocateMemory(
   /* The Vulkan 1.0.33 spec says "allocationSize must be greater than 0". */
   assert(pAllocateInfo->allocationSize > 0);

-   /* We support exactly one memory heap. */
-   assert(pAllocateInfo->memoryTypeIndex == 0 ||
-          (!device->info.has_llc && pAllocateInfo->memoryTypeIndex < 2));
-
   /* The kernel relocation API has a limitation of a 32-bit delta value
    * applied to the address before it is written which, in spite of it being
    * unsigned, is treated as signed .  Because of the way that this maps to
@@ -1583,11 +1662,19 @@ VkResult anv_AllocateMemory(
   if (result != VK_SUCCESS)
      goto fail;

-   mem->type_index = pAllocateInfo->memoryTypeIndex;
+   assert(pAllocateInfo->memoryTypeIndex < pdevice->memory.type_count);
+   mem->type = &pdevice->memory.types[pAllocateInfo->memoryTypeIndex];

   mem->map = NULL;
   mem->map_size = 0;

+   assert(mem->type->heapIndex < pdevice->memory.heap_count);
+   if (pdevice->memory.heaps[mem->type->heapIndex].supports_48bit_addresses)
+      mem->bo.flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+
+   if (pdevice->has_exec_async)
+      mem->bo.flags |= EXEC_OBJECT_ASYNC;
+
   *pMem = anv_device_memory_to_handle(mem);

   return VK_SUCCESS;
@@ -1657,7 +1744,9 @@ VkResult anv_MapMemory(
    * userspace. */

   uint32_t gem_flags = 0;
-   if (!device->info.has_llc && mem->type_index == 0)
+
+   if (!device->info.has_llc &&
+       (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
      gem_flags |= I915_MMAP_WC;

   /* GEM will fail to map if the offset isn't 4k-aligned.  Round down. */
@@ -1754,6 +1843,7 @@ void anv_GetBufferMemoryRequirements(
 {
   ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
   ANV_FROM_HANDLE(anv_device, device, _device);
+   struct anv_physical_device *pdevice = &device->instance->physicalDevice;

   /* The Vulkan spec (git aaed022) says:
    *
@@ -1761,13 +1851,17 @@ void anv_GetBufferMemoryRequirements(
    *    supported memory type for the resource. The bit `1<<i` is set if and
    *    only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
    *    structure for the physical device is supported.
-    *
-    * We support exactly one memory type on LLC, two on non-LLC.
    */
-   pMemoryRequirements->memoryTypeBits = device->info.has_llc ? 1 : 3;
+   uint32_t memory_types = 0;
+   for (uint32_t i = 0; i < pdevice->memory.type_count; i++) {
+      uint32_t valid_usage = pdevice->memory.types[i].valid_buffer_usage;
+      if ((valid_usage & buffer->usage) == buffer->usage)
+         memory_types |= (1u << i);
+   }

   pMemoryRequirements->size = buffer->size;
   pMemoryRequirements->alignment = 16;
+   pMemoryRequirements->memoryTypeBits = memory_types;
 }

 void anv_GetImageMemoryRequirements(
@@ -1777,6 +1871,7 @@ void anv_GetImageMemoryRequirements(
 {
   ANV_FROM_HANDLE(anv_image, image, _image);
   ANV_FROM_HANDLE(anv_device, device, _device);
+   struct anv_physical_device *pdevice = &device->instance->physicalDevice;

   /* The Vulkan spec (git aaed022) says:
    *
@@ -1785,12 +1880,13 @@ void anv_GetImageMemoryRequirements(
    *    only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
    *    structure for the physical device is supported.
    *
-    * We support exactly one memory type on LLC, two on non-LLC.
+    * All types are currently supported for images.
    */
-   pMemoryRequirements->memoryTypeBits = device->info.has_llc ? 1 : 3;
+   uint32_t memory_types = (1ull << pdevice->memory.type_count) - 1;

   pMemoryRequirements->size = image->size;
   pMemoryRequirements->alignment = image->alignment;
+   pMemoryRequirements->memoryTypeBits = memory_types;
 }

 void anv_GetImageSparseMemoryRequirements(
@@ -1820,6 +1916,7 @@ VkResult anv_BindBufferMemory(
   ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);

   if (mem) {
+      assert((buffer->usage & mem->type->valid_buffer_usage) == buffer->usage);
      buffer->bo = &mem->bo;
      buffer->offset = memoryOffset;
   } else {
--- a/src/intel/vulkan/anv_formats.c
+++ b/src/intel/vulkan/anv_formats.c
@@ -179,8 +179,8 @@ static const struct anv_format anv_formats[] = {
   fmt(VK_FORMAT_D24_UNORM_S8_UINT,       ISL_FORMAT_R24_UNORM_X8_TYPELESS),
   fmt(VK_FORMAT_D32_SFLOAT_S8_UINT,      ISL_FORMAT_R32_FLOAT),

-   fmt(VK_FORMAT_BC1_RGB_UNORM_BLOCK,     ISL_FORMAT_DXT1_RGB),
-   fmt(VK_FORMAT_BC1_RGB_SRGB_BLOCK,      ISL_FORMAT_DXT1_RGB_SRGB),
+   swiz_fmt(VK_FORMAT_BC1_RGB_UNORM_BLOCK,     ISL_FORMAT_BC1_UNORM, RGB1),
+   swiz_fmt(VK_FORMAT_BC1_RGB_SRGB_BLOCK,      ISL_FORMAT_BC1_UNORM_SRGB, RGB1),
   fmt(VK_FORMAT_BC1_RGBA_UNORM_BLOCK,    ISL_FORMAT_BC1_UNORM),
   fmt(VK_FORMAT_BC1_RGBA_SRGB_BLOCK,     ISL_FORMAT_BC1_UNORM_SRGB),
   fmt(VK_FORMAT_BC2_UNORM_BLOCK,         ISL_FORMAT_BC2_UNORM),
--- a/src/intel/vulkan/anv_image.c
+++ b/src/intel/vulkan/anv_image.c
@@ -332,7 +332,6 @@ VkResult anv_BindImageMemory(
    VkDeviceMemory                              _memory,
    VkDeviceSize                                memoryOffset)
 {
-   ANV_FROM_HANDLE(anv_device, device, _device);
   ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
   ANV_FROM_HANDLE(anv_image, image, _image);

@@ -345,33 +344,6 @@ VkResult anv_BindImageMemory(
   image->bo = &mem->bo;
   image->offset = memoryOffset;

-   if (image->aux_surface.isl.size > 0) {
-
-      /* The offset and size must be a multiple of 4K or else the
-       * anv_gem_mmap call below will fail.
-       */
-      assert((image->offset + image->aux_surface.offset) % 4096 == 0);
-      assert(image->aux_surface.isl.size % 4096 == 0);
-
-      /* Auxiliary surfaces need to have their memory cleared to 0 before they
-       * can be used.  For CCS surfaces, this puts them in the "resolved"
-       * state so they can be used with CCS enabled before we ever touch it
-       * from the GPU.  For HiZ, we need something valid or else we may get
-       * GPU hangs on some hardware and 0 works fine.
-       */
-      void *map = anv_gem_mmap(device, image->bo->gem_handle,
-                               image->offset + image->aux_surface.offset,
-                               image->aux_surface.isl.size,
-                               device->info.has_llc ? 0 : I915_MMAP_WC);
-
-      if (map == MAP_FAILED)
-         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
-
-      memset(map, 0, image->aux_surface.isl.size);
-
-      anv_gem_munmap(map, image->aux_surface.isl.size);
-   }
-
   return VK_SUCCESS;
 }

@@ -423,12 +395,10 @@ void anv_GetImageSubresourceLayout(
 }

 /**
- * This function determines the optimal buffer to use for device
- * accesses given a VkImageLayout and other pieces of information needed to
- * make that determination. This does not determine the optimal buffer to
- * use during a resolve operation.
- *
- * NOTE: Some layouts do not support device access.
+ * This function determines the optimal buffer to use for a given
+ * VkImageLayout and other pieces of information needed to make that
+ * determination. This does not determine the optimal buffer to use
+ * during a resolve operation.
 *
 * @param devinfo The device information of the Intel GPU.
 * @param image The image that may contain a collection of buffers.
@@ -484,15 +454,19 @@ anv_layout_to_aux_usage(const struct gen_device_info * const devinfo,
   switch (layout) {

   /* Invalid Layouts */
+   case VK_IMAGE_LAYOUT_RANGE_SIZE:
+   case VK_IMAGE_LAYOUT_MAX_ENUM:
+      unreachable("Invalid image layout.");

-   /* According to the Vulkan Spec, the following layouts are valid only as
-    * initial layouts in a layout transition and don't support device access.
+   /* Undefined layouts
+    *
+    * The pre-initialized layout is equivalent to the undefined layout for
+    * optimally-tiled images.  We can only do color compression (CCS or HiZ)
+    * on tiled images.
    */
   case VK_IMAGE_LAYOUT_UNDEFINED:
   case VK_IMAGE_LAYOUT_PREINITIALIZED:
-   case VK_IMAGE_LAYOUT_RANGE_SIZE:
-   case VK_IMAGE_LAYOUT_MAX_ENUM:
-      unreachable("Invalid image layout for device access.");
+      return ISL_AUX_USAGE_NONE;


   /* Transfer Layouts
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -604,6 +604,24 @@ struct anv_bo *anv_scratch_pool_alloc(struct anv_device *device,
                                      gl_shader_stage stage,
                                      unsigned per_thread_scratch);

+struct anv_memory_type {
+   /* Standard bits passed on to the client */
+   VkMemoryPropertyFlags   propertyFlags;
+   uint32_t                heapIndex;
+
+   /* Driver-internal book-keeping */
+   VkBufferUsageFlags      valid_buffer_usage;
+};
+
+struct anv_memory_heap {
+   /* Standard bits passed on to the client */
+   VkDeviceSize      size;
+   VkMemoryHeapFlags flags;
+
+   /* Driver-internal book-keeping */
+   bool              supports_48bit_addresses;
+};
+
 struct anv_physical_device {
    VK_LOADER_DATA                              _loader_data;

@@ -620,17 +638,28 @@ struct anv_physical_device {
     * practically unlimited.  However, we will never report more than 3/4 of
     * the total system ram to try and avoid running out of RAM.
     */
-    uint64_t                                    heap_size;
    bool                                        supports_48bit_addresses;
    struct brw_compiler *                       compiler;
    struct isl_device                           isl_dev;
    int                                         cmd_parser_version;
+    bool                                        has_exec_async;

    uint32_t                                    eu_total;
    uint32_t                                    subslice_total;

    uint8_t                                     uuid[VK_UUID_SIZE];

+    struct {
+      uint32_t                                  type_count;
+      struct anv_memory_type                    types[VK_MAX_MEMORY_TYPES];
+      uint32_t                                  heap_count;
+      struct anv_memory_heap                    heaps[VK_MAX_MEMORY_HEAPS];
+    } memory;
+
+    uint8_t                                     pipeline_cache_uuid[VK_UUID_SIZE];
+    uint8_t                                     driver_uuid[VK_UUID_SIZE];
+    uint8_t                                     device_uuid[VK_UUID_SIZE];
+
    struct wsi_device                       wsi_device;
    int                                         local_fd;
 };
@@ -960,7 +989,7 @@ _anv_combine_address(struct anv_batch *batch, void *location,

 struct anv_device_memory {
   struct anv_bo                                bo;
-   uint32_t                                     type_index;
+   struct anv_memory_type *                     type;
   VkDeviceSize                                 map_size;
   void *                                       map;
 };
@@ -1994,6 +2023,12 @@ anv_gen8_hiz_op_resolve(struct anv_cmd_buffer *cmd_buffer,
                        const struct anv_image *image,
                        enum blorp_hiz_op op);

+void
+anv_image_ccs_clear(struct anv_cmd_buffer *cmd_buffer,
+                    const struct anv_image *image,
+                    const struct isl_view *view,
+                    const VkImageSubresourceRange *subresourceRange);
+
 enum isl_aux_usage
 anv_layout_to_aux_usage(const struct gen_device_info * const devinfo,
                        const struct anv_image *image,
--- a/src/intel/vulkan/anv_wsi.c
+++ b/src/intel/vulkan/anv_wsi.c
@@ -208,6 +208,7 @@ x11_anv_wsi_image_create(VkDevice device_h,
    * know we're writing to them and synchronize uses on other rings (eg if
    * the display server uses the blitter ring).
    */
+   memory->bo.flags &= ~EXEC_OBJECT_ASYNC;
   memory->bo.flags |= EXEC_OBJECT_WRITE;

   anv_BindImageMemory(device_h, image_h, memory_h, 0);
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -343,15 +343,8 @@ transition_depth_buffer(struct anv_cmd_buffer *cmd_buffer,
    * The undefined layout indicates that the user doesn't care about the data
    * that's currently in the buffer. Therefore, a data-preserving resolve
    * operation is not needed.
-    *
-    * The pre-initialized layout is equivalent to the undefined layout for
-    * optimally-tiled images. Anv only exposes support for optimally-tiled
-    * depth buffers.
    */
-   if (image->aux_usage != ISL_AUX_USAGE_HIZ ||
-       initial_layout == final_layout ||
-       initial_layout == VK_IMAGE_LAYOUT_UNDEFINED ||
-       initial_layout == VK_IMAGE_LAYOUT_PREINITIALIZED)
+   if (image->aux_usage != ISL_AUX_USAGE_HIZ || initial_layout == final_layout)
      return;

   const bool hiz_enabled = ISL_AUX_USAGE_HIZ ==
@@ -376,6 +369,30 @@ transition_depth_buffer(struct anv_cmd_buffer *cmd_buffer,
      anv_gen8_hiz_op_resolve(cmd_buffer, image, hiz_op);
 }

+static void
+transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
+                        const struct anv_image *image,
+                        VkImageLayout initial_layout,
+                        VkImageLayout final_layout,
+                        const struct isl_view *view,
+                        const VkImageSubresourceRange *subresourceRange)
+{
+   if (image->aux_usage != ISL_AUX_USAGE_CCS_E)
+      return;
+
+   if (initial_layout != VK_IMAGE_LAYOUT_UNDEFINED &&
+       initial_layout != VK_IMAGE_LAYOUT_PREINITIALIZED)
+      return;
+
+#if GEN_GEN >= 9
+   /* We're transitioning from an undefined layout so it doesn't really matter
+    * what data ends up in the color buffer.  We do, however, need to ensure
+    * that the CCS has valid data in it.  One easy way to do that is to
+    * fast-clear the specified range.
+    */
+   anv_image_ccs_clear(cmd_buffer, image, view, subresourceRange);
+#endif
+}

 /**
 * Setup anv_cmd_state::attachments for vkCmdBeginRenderPass.
@@ -963,6 +980,14 @@ void genX(CmdPipelineBarrier)(
                                 pImageMemoryBarriers[i].oldLayout,
                                 pImageMemoryBarriers[i].newLayout);
      }
+      if (pImageMemoryBarriers[i].subresourceRange.aspectMask &
+          VK_IMAGE_ASPECT_COLOR_BIT) {
+         transition_color_buffer(cmd_buffer, image,
+                                 pImageMemoryBarriers[i].oldLayout,
+                                 pImageMemoryBarriers[i].newLayout,
+                                 NULL,
+                                 &pImageMemoryBarriers[i].subresourceRange);
+      }
   }

   cmd_buffer->state.pending_pipe_bits |=
@@ -2312,8 +2337,9 @@ cmd_buffer_subpass_transition_layouts(struct anv_cmd_buffer * const cmd_buffer,
       */
      assert(att_ref->attachment < cmd_state->framebuffer->attachment_count);

-      const struct anv_image * const image =
-         cmd_state->framebuffer->attachments[att_ref->attachment]->image;
+      const struct anv_image_view * const iview =
+         cmd_state->framebuffer->attachments[att_ref->attachment];
+      const struct anv_image * const image = iview->image;

      /* Perform the layout transition. */
      if (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
@@ -2323,6 +2349,11 @@ cmd_buffer_subpass_transition_layouts(struct anv_cmd_buffer * const cmd_buffer,
            anv_layout_to_aux_usage(&cmd_buffer->device->info, image,
                                    image->aspects, target_layout);
      }
+      if (image->aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
+         transition_color_buffer(cmd_buffer, image,
+                                 att_state->current_layout, target_layout,
+                                 &iview->isl, NULL);
+      }

      att_state->current_layout = target_layout;
   }
--- a/src/intel/vulkan/genX_query.c
+++ b/src/intel/vulkan/genX_query.c
@@ -39,6 +39,7 @@ VkResult genX(CreateQueryPool)(
    VkQueryPool*                                pQueryPool)
 {
   ANV_FROM_HANDLE(anv_device, device, _device);
+   const struct anv_physical_device *pdevice = &device->instance->physicalDevice;
   struct anv_query_pool *pool;
   VkResult result;

@@ -90,6 +91,12 @@ VkResult genX(CreateQueryPool)(
   if (result != VK_SUCCESS)
      goto fail;

+   if (pdevice->supports_48bit_addresses)
+      pool->bo.flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+
+   if (pdevice->has_exec_async)
+      pool->bo.flags |= EXEC_OBJECT_ASYNC;
+
   /* For query pools, we set the caching mode to I915_CACHING_CACHED.  On LLC
    * platforms, this does nothing.  On non-LLC platforms, this means snooping
    * which comes at a slight cost.  However, the buffers aren't big, won't be
--- a/src/intel/vulkan/tests/block_pool_no_free.c
+++ b/src/intel/vulkan/tests/block_pool_no_free.c
@@ -107,7 +107,10 @@ static void validate_monotonic(uint32_t **blocks)

 static void run_test()
 {
-   struct anv_device device;
+   struct anv_instance instance;
+   struct anv_device device = {
+      .instance = &instance,
+   };
   struct anv_block_pool pool;

   pthread_mutex_init(&device.mutex, NULL);
--- a/src/intel/vulkan/tests/state_pool.c
+++ b/src/intel/vulkan/tests/state_pool.c
@@ -34,7 +34,10 @@

 int main(int argc, char **argv)
 {
-   struct anv_device device;
+   struct anv_instance instance;
+   struct anv_device device = {
+      .instance = &instance,
+   };
   struct anv_block_pool block_pool;
   struct anv_state_pool state_pool;

--- a/src/intel/vulkan/tests/state_pool_free_list_only.c
+++ b/src/intel/vulkan/tests/state_pool_free_list_only.c
@@ -33,7 +33,10 @@

 int main(int argc, char **argv)
 {
-   struct anv_device device;
+   struct anv_instance instance;
+   struct anv_device device = {
+      .instance = &instance,
+   };
   struct anv_block_pool block_pool;
   struct anv_state_pool state_pool;

--- a/src/intel/vulkan/tests/state_pool_no_free.c
+++ b/src/intel/vulkan/tests/state_pool_no_free.c
@@ -54,7 +54,10 @@ static void *alloc_states(void *_job)

 static void run_test()
 {
-   struct anv_device device;
+   struct anv_instance instance;
+   struct anv_device device = {
+      .instance = &instance,
+   };
   struct anv_block_pool block_pool;
   struct anv_state_pool state_pool;

--- a/src/loader/Makefile.am
+++ b/src/loader/Makefile.am
@@ -55,6 +55,7 @@ libloader_la_LIBADD += \
 	$(LIBDRM_LIBS)
 endif

+if HAVE_PLATFORM_X11
 if HAVE_DRI3
 noinst_LTLIBRARIES += libloader_dri3_helper.la

@@ -63,3 +64,4 @@ libloader_dri3_helper_la_SOURCES = \
 	loader_dri3_helper.h
 libloader_dri3_helper_la_LIBADD = $(XCB_DRI3_LIBS)
 endif
+endif
--- a/src/mesa/drivers/dri/Android.mk
+++ b/src/mesa/drivers/dri/Android.mk
@@ -51,7 +51,8 @@ MESA_DRI_SHARED_LIBRARIES := \
 	libdl \
 	libexpat \
 	libglapi \
-	liblog
+	liblog \
+	libz

 #-----------------------------------------------
 # Build drivers and libmesa_dri_common
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -113,7 +113,7 @@ apply_gen6_stencil_hiz_offset(struct isl_surf *surf,
                              uint32_t lod,
                              uint32_t *offset)
 {
-   assert(mt->array_layout == ALL_SLICES_AT_EACH_LOD);
+   assert(mt->array_layout == GEN6_HIZ_STENCIL);

   if (mt->format == MESA_FORMAT_S_UINT8) {
      /* Note: we can't compute the stencil offset using
@@ -172,12 +172,12 @@ blorp_surf_for_miptree(struct brw_context *brw,
   };

   if (brw->gen == 6 && mt->format == MESA_FORMAT_S_UINT8 &&
-       mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
-      /* Sandy bridge stencil and HiZ use this ALL_SLICES_AT_EACH_LOD hack in
+       mt->array_layout == GEN6_HIZ_STENCIL) {
+      /* Sandy bridge stencil and HiZ use this GEN6_HIZ_STENCIL hack in
       * order to allow for layered rendering.  The hack makes each LOD of the
       * stencil or HiZ buffer a single tightly packed array surface at some
       * offset into the surface.  Since ISL doesn't know how to deal with the
-       * crazy ALL_SLICES_AT_EACH_LOD layout and since we have to do a manual
+       * crazy GEN6_HIZ_STENCIL layout and since we have to do a manual
       * offset of it anyway, we might as well do the offset here and keep the
       * hacks inside the i965 driver.
       *
@@ -251,8 +251,7 @@ blorp_surf_for_miptree(struct brw_context *brw,

         struct intel_mipmap_tree *hiz_mt = mt->hiz_buf->mt;
         if (hiz_mt) {
-            assert(brw->gen == 6 &&
-                   hiz_mt->array_layout == ALL_SLICES_AT_EACH_LOD);
+            assert(brw->gen == 6 && hiz_mt->array_layout == GEN6_HIZ_STENCIL);

            /* gen6 requires the HiZ buffer to be manually offset to the
             * right location.  We could fixup the surf but it doesn't
@@ -876,6 +875,22 @@ do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,
      DBG("%s (fast) to mt %p level %d layers %d+%d\n", __FUNCTION__,
          irb->mt, irb->mt_level, irb->mt_layer, num_layers);

+      /* Ivybrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
+       *
+       *    "Any transition from any value in {Clear, Render, Resolve} to a
+       *    different value in {Clear, Render, Resolve} requires end of pipe
+       *    synchronization."
+       *
+       * In other words, fast clear ops are not properly synchronized with
+       * other drawing.  We need to use a PIPE_CONTROL to ensure that the
+       * contents of the previous draw hit the render target before we resolve
+       * and again afterwards to ensure that the resolve is complete before we
+       * do any more regular drawing.
+       */
+      brw_emit_pipe_control_flush(brw,
+                                  PIPE_CONTROL_RENDER_TARGET_FLUSH |
+                                  PIPE_CONTROL_CS_STALL);
+
      struct blorp_batch batch;
      blorp_batch_init(&brw->blorp, &batch, brw, 0);
      blorp_fast_clear(&batch, &surf,
@@ -884,6 +899,10 @@ do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,
                       x0, y0, x1, y1);
      blorp_batch_finish(&batch);

+      brw_emit_pipe_control_flush(brw,
+                                  PIPE_CONTROL_RENDER_TARGET_FLUSH |
+                                  PIPE_CONTROL_CS_STALL);
+
      /* Now that the fast clear has occurred, put the buffer in
       * INTEL_FAST_CLEAR_STATE_CLEAR so that we won't waste time doing
       * redundant clears.
@@ -909,17 +928,6 @@ do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,
      blorp_batch_finish(&batch);
   }

-   /*
-    * Ivybrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
-    *
-    *  Any transition from any value in {Clear, Render, Resolve} to a
-    *  different value in {Clear, Render, Resolve} requires end of pipe
-    *  synchronization.
-    */
-   brw_emit_pipe_control_flush(brw,
-                               PIPE_CONTROL_RENDER_TARGET_FLUSH |
-                               PIPE_CONTROL_CS_STALL);
-
   return true;
 }

@@ -981,6 +989,23 @@ brw_blorp_resolve_color(struct brw_context *brw, struct intel_mipmap_tree *mt,
      resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
   }

+   /* Ivybrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
+    *
+    *    "Any transition from any value in {Clear, Render, Resolve} to a
+    *    different value in {Clear, Render, Resolve} requires end of pipe
+    *    synchronization."
+    *
+    * In other words, fast clear ops are not properly synchronized with
+    * other drawing.  We need to use a PIPE_CONTROL to ensure that the
+    * contents of the previous draw hit the render target before we resolve
+    * and again afterwards to ensure that the resolve is complete before we
+    * do any more regular drawing.
+    */
+   brw_emit_pipe_control_flush(brw,
+                               PIPE_CONTROL_RENDER_TARGET_FLUSH |
+                               PIPE_CONTROL_CS_STALL);
+
+
   struct blorp_batch batch;
   blorp_batch_init(&brw->blorp, &batch, brw, 0);
   blorp_ccs_resolve(&batch, &surf, level, layer,
@@ -988,13 +1013,7 @@ brw_blorp_resolve_color(struct brw_context *brw, struct intel_mipmap_tree *mt,
                     resolve_op);
   blorp_batch_finish(&batch);

-   /*
-    * Ivybrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
-    *
-    *  Any transition from any value in {Clear, Render, Resolve} to a
-    *  different value in {Clear, Render, Resolve} requires end of pipe
-    *  synchronization.
-    */
+   /* See comment above */
   brw_emit_pipe_control_flush(brw,
                               PIPE_CONTROL_RENDER_TARGET_FLUSH |
                               PIPE_CONTROL_CS_STALL);
--- a/src/mesa/drivers/dri/i965/brw_surface_formats.c
+++ b/src/mesa/drivers/dri/i965/brw_surface_formats.c
@@ -94,14 +94,14 @@ brw_isl_format_for_mesa_format(mesa_format mesa_format)
      [MESA_FORMAT_L_SRGB8] = ISL_FORMAT_L8_UNORM_SRGB,
      [MESA_FORMAT_L8A8_SRGB] = ISL_FORMAT_L8A8_UNORM_SRGB,
      [MESA_FORMAT_A8L8_SRGB] = 0,
-      [MESA_FORMAT_SRGB_DXT1] = ISL_FORMAT_DXT1_RGB_SRGB,
+      [MESA_FORMAT_SRGB_DXT1] = ISL_FORMAT_BC1_UNORM_SRGB,
      [MESA_FORMAT_SRGBA_DXT1] = ISL_FORMAT_BC1_UNORM_SRGB,
      [MESA_FORMAT_SRGBA_DXT3] = ISL_FORMAT_BC2_UNORM_SRGB,
      [MESA_FORMAT_SRGBA_DXT5] = ISL_FORMAT_BC3_UNORM_SRGB,

      [MESA_FORMAT_RGB_FXT1] = ISL_FORMAT_FXT1,
      [MESA_FORMAT_RGBA_FXT1] = ISL_FORMAT_FXT1,
-      [MESA_FORMAT_RGB_DXT1] = ISL_FORMAT_DXT1_RGB,
+      [MESA_FORMAT_RGB_DXT1] = ISL_FORMAT_BC1_UNORM,
      [MESA_FORMAT_RGBA_DXT1] = ISL_FORMAT_BC1_UNORM,
      [MESA_FORMAT_RGBA_DXT3] = ISL_FORMAT_BC2_UNORM,
      [MESA_FORMAT_RGBA_DXT5] = ISL_FORMAT_BC3_UNORM,
@@ -541,17 +541,6 @@ translate_tex_format(struct brw_context *brw,
       */
      return ISL_FORMAT_R32G32B32A32_FLOAT;

-   case MESA_FORMAT_SRGB_DXT1:
-      if (brw->gen == 4 && !brw->is_g4x) {
-         /* Work around missing SRGB DXT1 support on original gen4 by just
-          * skipping SRGB decode.  It's not worth not supporting sRGB in
-          * general to prevent this.
-          */
-         WARN_ONCE(true, "Demoting sRGB DXT1 texture to non-sRGB\n");
-         mesa_format = MESA_FORMAT_RGB_DXT1;
-      }
-      return brw_isl_format_for_mesa_format(mesa_format);
-
   case MESA_FORMAT_RGBA_ASTC_4x4:
   case MESA_FORMAT_RGBA_ASTC_5x4:
   case MESA_FORMAT_RGBA_ASTC_5x5:
--- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
+++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
@@ -216,6 +216,8 @@ brw_miptree_layout_2d(struct intel_mipmap_tree *mt)
      mt->total_height = MAX2(mt->total_height, y + img_height);

      /* Layout_below: step right after second mipmap.
+       *
+       * For Sandy Bridge HiZ and stencil, we always step down.
       */
      if (level == mt->first_level + 1) {
 	 x += ALIGN_NPOT(width, mt->halign) / bw;
@@ -231,6 +233,67 @@ brw_miptree_layout_2d(struct intel_mipmap_tree *mt)
   }
 }

+static void
+brw_miptree_layout_gen6_hiz_stencil(struct intel_mipmap_tree *mt)
+{
+   unsigned x = 0;
+   unsigned y = 0;
+   unsigned width = mt->physical_width0;
+   unsigned height = mt->physical_height0;
+   /* Number of layers of array texture. */
+   unsigned depth = mt->physical_depth0;
+   unsigned tile_width, tile_height, bw, bh;
+
+   if (mt->format == MESA_FORMAT_S_UINT8) {
+      bw = bh = 1;
+      /* W-tiled */
+      tile_width = 64;
+      tile_height = 64;
+   } else {
+      assert(_mesa_get_format_base_format(mt->format) == GL_DEPTH_COMPONENT ||
+             _mesa_get_format_base_format(mt->format) == GL_DEPTH_STENCIL);
+      /* Each 128-bit HiZ block corresponds to a region of of 8x4 depth
+       * samples.  Each cache line in the Y-Tiled HiZ image contains 2x2 HiZ
+       * blocks.  Therefore, each Y-tiled cache line corresponds to an 16x8
+       * region in the depth surface.  Since we're representing it as
+       * RGBA_FLOAT32, the miptree calculations will think that each cache
+       * line is 1x4 pixels.  Therefore, we need a scale-down factor of 16x2
+       * and a vertical alignment of 2.
+       */
+      mt->cpp = 16;
+      bw = 16;
+      bh = 2;
+      /* Y-tiled */
+      tile_width = 128 / mt->cpp;
+      tile_height = 32;
+   }
+
+   mt->total_width = 0;
+   mt->total_height = 0;
+
+   for (unsigned level = mt->first_level; level <= mt->last_level; level++) {
+      intel_miptree_set_level_info(mt, level, x, y, depth);
+
+      const unsigned img_width = ALIGN(DIV_ROUND_UP(width, bw), mt->halign);
+      const unsigned img_height =
+         ALIGN(DIV_ROUND_UP(height, bh), mt->valign) * depth;
+
+      mt->total_width = MAX2(mt->total_width, x + img_width);
+      mt->total_height = MAX2(mt->total_height, y + img_height);
+
+      if (level == mt->first_level) {
+         y += ALIGN(img_height, tile_height);
+      } else {
+         x += ALIGN(img_width, tile_width);
+      }
+
+      /* We only minify the width.  We want qpitch to match for all miplevels
+       * because the hardware doesn't know we aren't on LOD0.
+       */
+      width = minify(width, 1);
+   }
+}
+
 unsigned
 brw_miptree_get_horizontal_slice_pitch(const struct brw_context *brw,
                                       const struct intel_mipmap_tree *mt,
@@ -249,6 +312,8 @@ brw_miptree_get_vertical_slice_pitch(const struct brw_context *brw,
                                     const struct intel_mipmap_tree *mt,
                                     unsigned level)
 {
+   assert(mt->array_layout != GEN6_HIZ_STENCIL || brw->gen == 6);
+
   if (brw->gen >= 9) {
      /* ALL_SLICES_AT_EACH_LOD isn't supported on Gen8+ but this code will
       * effectively end up with a packed qpitch anyway whenever
@@ -281,6 +346,15 @@ brw_miptree_get_vertical_slice_pitch(const struct brw_context *brw,
              mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
      return ALIGN_NPOT(minify(mt->physical_height0, level), mt->valign);

+   } else if (mt->array_layout == GEN6_HIZ_STENCIL) {
+      /* For HiZ and stencil on Sandy Bridge, we don't minify the height. */
+      if (mt->format == MESA_FORMAT_S_UINT8) {
+         return ALIGN(mt->physical_height0, mt->valign);
+      } else {
+         /* HiZ has a vertical scale factor of 2. */
+         return ALIGN(DIV_ROUND_UP(mt->physical_height0, 2), mt->valign);
+      }
+
   } else {
      const unsigned h0 = ALIGN_NPOT(mt->physical_height0, mt->valign);
      const unsigned h1 = ALIGN_NPOT(minify(mt->physical_height0, 1), mt->valign);
@@ -333,6 +407,8 @@ brw_miptree_layout_texture_array(struct brw_context *brw,

   if (layout_1d)
      gen9_miptree_layout_1d(mt);
+   else if (mt->array_layout == GEN6_HIZ_STENCIL)
+      brw_miptree_layout_gen6_hiz_stencil(mt);
   else
      brw_miptree_layout_2d(mt);

@@ -556,6 +632,8 @@ intel_miptree_set_total_width_height(struct brw_context *brw,
      case INTEL_MSAA_LAYOUT_IMS:
         if (gen9_use_linear_1d_layout(brw, mt))
            gen9_miptree_layout_1d(mt);
+         else if (mt->array_layout == GEN6_HIZ_STENCIL)
+            brw_miptree_layout_gen6_hiz_stencil(mt);
         else
            brw_miptree_layout_2d(mt);
         break;
@@ -579,15 +657,9 @@ intel_miptree_set_alignment(struct brw_context *brw,
    * - Ironlake and Sandybridge PRMs: Volume 1, Part 1, Section 7.18.3.4
    * - BSpec (for Ivybridge and slight variations in separate stencil)
    */
-   bool gen6_hiz_or_stencil = false;

-   if (brw->gen == 6 && mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
-      const GLenum base_format = _mesa_get_format_base_format(mt->format);
-      gen6_hiz_or_stencil = _mesa_is_depth_or_stencil_format(base_format);
-   }
-
-   if (gen6_hiz_or_stencil) {
-      /* On gen6, we use ALL_SLICES_AT_EACH_LOD for stencil/hiz because the
+   if (mt->array_layout == GEN6_HIZ_STENCIL) {
+      /* On gen6, we use GEN6_HIZ_STENCIL for stencil/hiz because the
       * hardware doesn't support multiple mip levels on stencil/hiz.
       *
       * PRM Vol 2, Part 1, 7.5.3 Hierarchical Depth Buffer:
@@ -600,15 +672,13 @@ intel_miptree_set_alignment(struct brw_context *brw,
         /* Stencil uses W tiling, so we force W tiling alignment for the
          * ALL_SLICES_AT_EACH_LOD miptree layout.
          */
-         mt->halign = 64;
-         mt->valign = 64;
+         mt->halign = 4;
+         mt->valign = 2;
         assert((layout_flags & MIPTREE_LAYOUT_FORCE_HALIGN16) == 0);
      } else {
-         /* Depth uses Y tiling, so we force need Y tiling alignment for the
-          * ALL_SLICES_AT_EACH_LOD miptree layout.
-          */
-         mt->halign = 128 / mt->cpp;
-         mt->valign = 32;
+         /* See brw_miptree_layout_gen6_hiz_stencil() */
+         mt->halign = 1;
+         mt->valign = 2;
      }
   } else if (mt->compressed) {
       /* The hardware alignment requirements for compressed textures
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -389,7 +389,9 @@ brw_get_texture_swizzle(const struct gl_context *ctx,
   case GL_RED:
   case GL_RG:
   case GL_RGB:
-      if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
+      if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0 ||
+          img->TexFormat == MESA_FORMAT_RGB_DXT1 ||
+          img->TexFormat == MESA_FORMAT_SRGB_DXT1)
         swizzles[3] = SWIZZLE_ONE;
      break;
   }
--- a/Show More
+++ b/Show More