docs: add sha256 checksums for 17.2.8

Signed-off-by: Andres Gomez <agomez@igalia.com>
docs: add release notes for 17.2.8
2017-12-23 00:54:11 +02:00 · 2017-12-22 22:39:47 +02:00 · 2017-12-22 22:34:12 +02:00 · 2017-12-20 19:40:38 +02:00 · 2017-12-20 19:40:38 +02:00 · 2017-12-20 19:40:38 +02:00
340 changed files with 8111 additions and 2658 deletions
--- a/.travis.yml
+++ b/.travis.yml
@@ -40,6 +40,7 @@ matrix:
        - GALLIUM_ST="--enable-dri --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx --disable-gallium-osmesa"
        - GALLIUM_DRIVERS=""
        - VULKAN_DRIVERS=""
+        - LIBUNWIND_FLAGS="--disable-libunwind"
      addons:
        apt:
          packages:
@@ -66,6 +67,7 @@ matrix:
        - GALLIUM_ST="--enable-dri --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx --disable-gallium-osmesa"
        - GALLIUM_DRIVERS="swr"
        - VULKAN_DRIVERS=""
+        - LIBUNWIND_FLAGS="--enable-libunwind"
      addons:
        apt:
          sources:
@@ -81,6 +83,7 @@ matrix:
            - libexpat1-dev
            - libx11-xcb-dev
            - libelf-dev
+            - libunwind8-dev
    - env:
        - LABEL="make Gallium Drivers Other"
        - BUILD=make
@@ -93,6 +96,7 @@ matrix:
        - GALLIUM_ST="--enable-dri --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx --disable-gallium-osmesa"
        - GALLIUM_DRIVERS="i915,nouveau,pl111,r300,r600,radeonsi,freedreno,svga,swrast,vc4,virgl,etnaviv,imx"
        - VULKAN_DRIVERS=""
+        - LIBUNWIND_FLAGS="--enable-libunwind"
      addons:
        apt:
          sources:
@@ -108,6 +112,7 @@ matrix:
            - libexpat1-dev
            - libx11-xcb-dev
            - libelf-dev
+            - libunwind8-dev
    - env:
        # NOTE: Analogous to SWR above, building Clover is quite slow.
        - LABEL="make Gallium ST Clover"
@@ -125,6 +130,7 @@ matrix:
        # Regardless - we're doing a quick build test here.
        - GALLIUM_DRIVERS="i915"
        - VULKAN_DRIVERS=""
+        - LIBUNWIND_FLAGS="--enable-libunwind"
      addons:
        apt:
          sources:
@@ -144,11 +150,14 @@ matrix:
            - libexpat1-dev
            - libx11-xcb-dev
            - libelf-dev
+            - libunwind8-dev
    - env:
        - LABEL="make Gallium ST Other"
        - BUILD=make
        - MAKEFLAGS="-j4"
        - MAKE_CHECK_COMMAND="true"
+        - LLVM_VERSION=3.3
+        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
        - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
        - DRI_DRIVERS=""
        - GALLIUM_ST="--enable-dri --disable-opencl --enable-xa --enable-nine --enable-xvmc --enable-vdpau --enable-va --enable-omx --enable-gallium-osmesa"
@@ -157,9 +166,12 @@ matrix:
        # Regardless - we're doing a quick build test here.
        - GALLIUM_DRIVERS="i915,swrast"
        - VULKAN_DRIVERS=""
+        - LIBUNWIND_FLAGS="--enable-libunwind"
      addons:
        apt:
          packages:
+            # We actually want to test against llvm-3.3
+            - llvm-3.3-dev
            # Nine requires gcc 4.6... which is the one we have right ?
            - libxvmc-dev
            # Build locally, for now.
@@ -174,6 +186,7 @@ matrix:
            - libexpat1-dev
            - libx11-xcb-dev
            - libelf-dev
+            - libunwind8-dev
    - env:
        - LABEL="make Vulkan"
        - BUILD=make
@@ -186,6 +199,7 @@ matrix:
        - GALLIUM_ST="--enable-dri --enable-dri3 --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx --disable-gallium-osmesa"
        - GALLIUM_DRIVERS=""
        - VULKAN_DRIVERS="intel,radeon"
+        - LIBUNWIND_FLAGS="--disable-libunwind"
      addons:
        apt:
          sources:
@@ -367,6 +381,7 @@ script:
      export CC="$CC -isystem`pwd`";

      ./autogen.sh --enable-debug
+        $LIBUNWIND_FLAGS
        $DRI_LOADERS
        --with-dri-drivers=$DRI_DRIVERS
        $GALLIUM_ST
--- a/Android.common.mk
+++ b/Android.common.mk
@@ -88,6 +88,10 @@ LOCAL_CFLAGS += \

 endif
 endif
+ifeq ($(ARCH_ARM_HAVE_NEON),true)
+LOCAL_CFLAGS_arm += -DUSE_ARM_ASM
+endif
+LOCAL_CFLAGS_arm64 += -DUSE_AARCH64_ASM

 ifneq ($(LOCAL_IS_HOST_MODULE),true)
 LOCAL_CFLAGS += -DHAVE_LIBDRM
--- a/Android.mk
+++ b/Android.mk
@@ -92,16 +92,12 @@ define mesa-build-with-llvm
  $(if $(filter $(MESA_ANDROID_MAJOR_VERSION), 4 5), \
    $(warning Unsupported LLVM version in Android $(MESA_ANDROID_MAJOR_VERSION)),) \
  $(if $(filter 6,$(MESA_ANDROID_MAJOR_VERSION)), \
-    $(eval LOCAL_CFLAGS += -DHAVE_LLVM=0x0307 -DMESA_LLVM_VERSION_PATCH=0) \
-    $(eval LOCAL_STATIC_LIBRARIES += libLLVMCore) \
-    $(eval LOCAL_C_INCLUDES += external/llvm/include external/llvm/device/include),) \
+    $(eval LOCAL_CFLAGS += -DHAVE_LLVM=0x0307 -DMESA_LLVM_VERSION_PATCH=0)) \
  $(if $(filter 7,$(MESA_ANDROID_MAJOR_VERSION)), \
-    $(eval LOCAL_CFLAGS += -DHAVE_LLVM=0x0308 -DMESA_LLVM_VERSION_PATCH=0) \
-    $(eval LOCAL_STATIC_LIBRARIES += libLLVMCore) \
-    $(eval LOCAL_C_INCLUDES += external/llvm/include external/llvm/device/include),) \
+    $(eval LOCAL_CFLAGS += -DHAVE_LLVM=0x0308 -DMESA_LLVM_VERSION_PATCH=0)) \
  $(if $(filter O,$(MESA_ANDROID_MAJOR_VERSION)), \
-    $(eval LOCAL_CFLAGS += -DHAVE_LLVM=0x0309 -DMESA_LLVM_VERSION_PATCH=0) \
-    $(eval LOCAL_HEADER_LIBRARIES += llvm-headers),)
+    $(eval LOCAL_CFLAGS += -DHAVE_LLVM=0x0309 -DMESA_LLVM_VERSION_PATCH=0)) \
+  $(eval LOCAL_SHARED_LIBRARIES += libLLVM)
 endef

 # add subdirectories
--- a/Makefile.am
+++ b/Makefile.am
@@ -41,6 +41,7 @@ AM_DISTCHECK_CONFIGURE_FLAGS = \
 	--enable-xa \
 	--enable-xvmc \
 	--enable-llvm-shared-libs \
+	--enable-libunwind \
 	--with-platforms=x11,wayland,drm,surfaceless \
 	--with-dri-drivers=i915,i965,nouveau,radeon,r200,swrast \
 	--with-gallium-drivers=i915,nouveau,r300,pl111,r600,radeonsi,freedreno,svga,swrast,vc4,virgl,swr,etnaviv,imx \
--- a/8
+++ b/8
@@ -50,10 +50,10 @@ except KeyError:
    pass
 else:
    targets = targets.split(',')
-    print 'scons: warning: targets option is deprecated; pass the targets on their own such as'
-    print
-    print '  scons %s' % ' '.join(targets)
-    print
+    print('scons: warning: targets option is deprecated; pass the targets on their own such as')
+    print()
+    print('  scons %s' % ' '.join(targets))
+    print()
    COMMAND_LINE_TARGETS.append(targets)


--- a/2
+++ b/2
@@ -1 +1 @@
-17.2.0-devel
+17.2.8
--- a/bin/.cherry-ignore
+++ b/bin/.cherry-ignore
@@ -0,0 +1,161 @@
+# fixes:  The commits are too invasive for stable. Instead the offending patches
+#         causing regressions have been reverted.
+365d34540f331df57780dddf8da87235be0a6bcb mesa: correctly calculate the storage offset for i915
+de0e62e1065e2d9172acf3ab7c70bba0160125c8 st/mesa: correctly calculate the storage offset
+
+# stable: Add loader::getCapability patches. It's rather invasive infra
+#         not suitable as a bugfix.
+1bf703e4ea5c4f742bc7ba55d01e5afc3f4e11f9 dri_interface,egl,gallium: only expose RGBA visuals on Android
+be5773fa8dfe9255d9abaf5c7d5bbbd2d922da08 Android: fix compile error for DRI2 loader getCapability
+31a6750988d7dd431f72ff1ff11bfca83bde5d8c st/dri: NULL check before deref DRI loader .getCapability
+
+# stable: The commit addresses code that did not land in the stable branch
+31bb8517a194af733deefe2d821537d994d39365 radv/gfx9: fix tile swizzle handling for gfx9
+
+# stable: Commit is not applicable when 4fab67a4415 is missing.
+d496780fb2c7f2cf0e32b6a79dc528e5156dfcb3 intel/eu/validate: Look up types on demand in execution_type()
+
+# fixes: Depend on preseding commit which adds new public GBM API
+3a5e3aa5a53cff55a5e31766d713a41ffa5a93d7 egl/drm: Fix misused x and y offsets in swrast_put_image2()
+fe2a6281b3b299998fe7399e7dbcc2077d773824 egl/drm: Fix misused x and y offsets in swrast_get_image()
+
+# fixes: This commit addressed an earlier commit c7e9ebb3ab8 which did not
+#        land in branch
+45c5c444518b7e83d9accd9f44702fa49282a3b8 radeonsi/gfx9: proper workaround for LS/HS VGPR initialization bug
+
+# fixes: This commit addressed earlier commits 61ad2f13 and 6dcc54b4 which did
+#        not land in branch
+979978ee06867a531b8d56cee252f5c83920a339 radv: Check for GFX9 for 1D arrays in image_size intrinsic.
+
+# fixes: This commit addressed earlier commits dcf46e99 and 60878dd0 which did
+#        not land in branch
+8e9e339c530c7b82b5a29d4b3183e8f5a01eae28 radv: copy the number of viewports/scissors at pipeline bind time
+
+# stable: The commit regresses a few dEQP tests. Namely:
+#         dEQP-VK.api.copy_and_blit.core.buffer_to_buffer.partial
+#         dEQP-VK.api.copy_and_blit.dedicated_allocation.buffer_to_buffer.partial
+14555d0b7a51bd3701764fd213c2459410143431 anv: Remove unreachable cases from isl_format_for_size()
+
+# stable: The commit addresses earlier commit a62a9793357 which is no applicable
+#         for the stable branch
+6c7720ed78db754d52f204cbb74897aa9e65ea7e anv/wsi: Allocate enough memory for the entire image
+
+# stable: Commits are too invasive for 17.2.
+98fdff7247b6877d028d33284f9cc63189ee204e configure.ac: factor out detection for old and buggy llvm
+13a53c4f5cdd664fd155c9e78fb46a4387af006c configure.ac: rework llvm libs handling for 3.9+
+a7ecf7b86f4eae59f3ceac2125e5d1725c403c07 Travis: add binutils 2.26 for a few more LLVM 3.9 builds
+36d6d1e931936a80da327889862ba02942ac427b configure.ac: add llvm_add_optional_component helper
+df3a43018020c16c1dfa88a76c9a84c9fb85be38 configure.ac: add missing LLVM components for OpenCL
+
+# stable: Commit is too big for stable at this point.
+4d24a7cb97641cacecd371d1968f6964785822e4 glsl: fix derived cs variables
+
+# stable: 17.3 nomination only.
+fee9d05e2136b2b7c5a1ad2be7180b99f733f539 radv: Update code pointer correctly if a variant is already created
+
+# stable: 17.3 nomination only.
+d8cefaa197f02944812ef535b1b303dd5bf26848 radv: use device name in cache creation like radeonsi.
+
+# fixes:  This commit addressed earlier commit 35ac13ed3 which did not
+#         land in branch.
+11d688d9f0d2ee4d0178d1807c0075e5e8364b1d mesa/bufferobj: don't double negate the range
+
+# extra:  Commit is not applicable when ade416d0236 is missing.
+07bfdb478bf844a0ac9cf3679f51f83c4abea5a1 broadcom/vc5: Propagate vc4 aliasing fix to vc5.
+
+# stable: This commit addressed earlier commit 8d90e28839 which did
+#         not land in branch.
+446c5726ecb968d06a6607e0df42be1cb74948c4 i965: fix blorp stage_prog_data->param leak
+
+# stable: This commit addressed earlier commit 78ade659569 which did
+#         not land in branch.
+8fbd82f464f26a56167f7962174b2b69756a105a etnaviv: don't do resolve-in-place without valid TS
+
+# stable: This commit addressed earlier commit 8d90e28839 which did
+#         not land in branch.
+7b4387519c382cffef9c62bbbbefcfe71cfde905 intel/fs: Alloc pull constants off mem_ctx
+
+# stable: 17.3 nomination only.
+3f8e3c2bd8f54ae6817f7496be47f4e1a8860d9c radeonsi: add a workaround for weird s_buffer_load_dword behavior on SI
+7dae419aa7c34af820c08896acef3b65d855188e Android: move drivers' symlinks to /vendor (v2)
+
+# fixes:  This commit has more than one Fixes tag but the commit it
+#         addresses didn't land in branch.
+e17e8934f9e4b008bdfb4f9abd8ed4faa604c7d9 automake: include git_sha1.h.in in release tarball
+
+# stable: This commit is not really needed after 6ac2d169019.
+e8c9e65185de3e821e1e482e77906d1d51efa3ec intel/fs: Use a pure vertical stride for large register strides
+
+# stable: These commits addressed earlier commit 379b24a40d3 which did
+#         not land in branch.
+7364f080f9a272323ed3491f278a1eed3eb9b1a7 intel/nir: Add a helper for getting the NoIndirect mask
+3e63cf893f096a7263eb1856d58417dd2d170d4b intel/nir: Break the linking code into a helper in brw_nir.c
+951a5dc4cc29da996b54ae63eeba1915a3a65b4a intel/nir: Use the correct indirect lowering masks in link_shaders
+
+# stable: These commits resulted in a CTS regression being addressed
+#         at https://bugs.freedesktop.org/show_bug.cgi?id=103626 .
+18fde36ced4279f2577097a1a7d31b55f2f5f141 intel/fs: Use the original destination region for int MUL lowering
+
+# stable: These commits are refactorings rather than fixes.
+fcd4adb9d08094520fb8d118d3448b04c6ec1fd1 intel/fs: Pass builders instead of blocks into emit_[un]zip
+0d905597fe2997c89022c76cdf84dc4fba5eb055 intel/fs: Be more explicit about our placement of [un]zip
+6c00240bc650805e0b66aa6e17dbe69bbe41e446 intel/fs: Don't stomp f0.1 in SIMD16 ballot
+
+# stable: This commit addressed earlier commit ea1b97714d9b which did
+#         not land in branch.
+50330d7115f0d5050ec3cfe6bca2b0136222e097 r600/shader: reserve first register of vertex shader.
+
+# stable: This commit depends on earlier commit 3735af04152b which did
+#         not land in branch.
+a6cc361e5fd2450249847d5ee8093d26ed7ff545 anv/cmd_buffer: Advance the address when initializing clear colors
+
+# stable: This commit addressed earlier commit a62a97933578 which did
+#         not land in branch.
+a07f7b26198ce0f5c8799481a673754968ac5daf anv/cmd_buffer: Take bo_offset into account in fast clear state addresses
+
+# stable: These commits addressed earlier commit 2c4097aff1b which did
+#         not land in branch.
+344252a27f8d875572bbe65641a825af8e73845d i965/bufmgr: Add a helper to mark a BO as external
+0a6a137eb27129e17298cfe9dd620205588ee4f6 i965: Mark BOs as external when we export their handle
+
+# stable: 17.3 nomination only.
+6e4d65f674a70809e6df1a4f716f874828915562 broadcom/vc5: Add vc5_drm.h to the release tarball
+4639cc716e89c69da41c7b54fa938457000fbd4c intel/blorp: Use mocs.tex for depth stencil
+deec84fd771876b5c0755293376df11bc95b473b anv/blorp: Add a device parameter to blorp_surf_for_anv_image
+bc933d0e8462871e19328f66182c35543e334013 intel/blorp: Make the MOCS setting part of blorp_address
+d7a19d69ebc032ba7207fc97bc6f10d5bb35bb99 i965: Use PTE MOCS for all external buffers
+
+# fixes:  This commit is only a typo correction on an error message.
+a6932faae1074445210d392a80b94fdac147b255 glsl: Fix typo fragement -> fragment
+
+# fixes:  This commit makes reference to 2 other commits but none have
+#         made it to the 17.2 queue.
+9b0223046668593deb9c0be0b557994bb5218788 egl: pass the dri2_dpy to the $plat_teardown functions
+
+# extra:  The commit just references a proper fix that has already
+#         landed.
+a31d0382084c8aa860ffcef9b12592c5c44e192f Revert "intel/fs: Use a pure vertical stride for large register strides"
+
+# stable: The commit depends on at least one other that did not land in
+#          branch - 8b3a2578519.
+010214b403de1b5e25a549372ba6192b89e05d06 radeonsi: allow DMABUF exports for local buffers
+
+# stable: This commit addressed earlier commit ead0dfe31ec7 which did
+#         not land in branch.
+709f5bdc4a2bf31f422f5cf60797224c0463c10a swr: Fix KNOB_MAX_WORKER_THREADS thread creation override.
+
+# stable: 17.3 nomination only.
+bf0904e31fb7d9cd8932d582076c8d7beb02ba89 winsys/amdgpu: disable local BOs again due to worse performance
+35c3cbad3c30ad3d40a6811dd6ca2286e013bfc5 radeonsi: don't call force_dcc_off for buffers
+
+# fixes:  This commit addressed earlier commit d1c9f30d7ff7 which did
+#         not land in branch.
+1bdeac545f4ea9f7ca6947f5da7fcf4f5b3010dc radv: port merge tess info from anv
+
+# extra:  The commit just references a fix for an additional change in its v2.
+c1ff99fd70cd2ceb2cac4723e4fd5efc93834746 main: Clear shader program data whenever ProgramBinary is called
+
+# extra:  The commit references a previous commit in which the changes
+#         should have been included but, as clarified by the
+#         developer, it is not needed for stable.
+71e630753ebbee82e8f8709da5488296b2c070c8 r600: set DX10_CLAMP for compute shader too
--- a/configure.ac
+++ b/configure.ac
@@ -410,8 +410,21 @@ int main() {
 }]])], GCC_ATOMIC_BUILTINS_SUPPORTED=1)
 if test "x$GCC_ATOMIC_BUILTINS_SUPPORTED" = x1; then
    DEFINES="$DEFINES -DUSE_GCC_ATOMIC_BUILTINS"
+    dnl On some platforms, new-style atomics need a helper library
+    AC_MSG_CHECKING(whether -latomic is needed)
+    AC_LINK_IFELSE([AC_LANG_SOURCE([[
+    #include <stdint.h>
+    uint64_t v;
+    int main() {
+        return (int)__atomic_load_n(&v, __ATOMIC_ACQUIRE);
+    }]])], GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC=no, GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC=yes)
+    AC_MSG_RESULT($GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC)
+    if test "x$GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC" = xyes; then
+        LIBATOMIC_LIBS="-latomic"
+    fi
 fi
 AM_CONDITIONAL([GCC_ATOMIC_BUILTINS_SUPPORTED], [test x$GCC_ATOMIC_BUILTINS_SUPPORTED = x1])
+AC_SUBST([LIBATOMIC_LIBS])

 dnl Check if host supports 64-bit atomics
 dnl note that lack of support usually results in link (not compile) error
@@ -773,6 +786,20 @@ if test "x$enable_asm" = xyes; then
            ;;
        esac
        ;;
+    aarch64)
+        case "$host_os" in
+        linux*)
+            asm_arch=aarch64
+            ;;
+        esac
+        ;;
+    arm)
+        case "$host_os" in
+        linux*)
+            asm_arch=arm
+            ;;
+        esac
+        ;;
    esac

    case "$asm_arch" in
@@ -792,6 +819,14 @@ if test "x$enable_asm" = xyes; then
        DEFINES="$DEFINES -DUSE_PPC64LE_ASM"
        AC_MSG_RESULT([yes, ppc64le])
        ;;
+    aarch64)
+        DEFINES="$DEFINES -DUSE_AARCH64_ASM"
+        AC_MSG_RESULT([yes, aarch64])
+        ;;
+    arm)
+        DEFINES="$DEFINES -DUSE_ARM_ASM"
+        AC_MSG_RESULT([yes, arm])
+        ;;
    *)
        AC_MSG_RESULT([no, platform not supported])
        ;;
@@ -803,6 +838,28 @@ AC_CHECK_HEADER([xlocale.h], [DEFINES="$DEFINES -DHAVE_XLOCALE_H"])
 AC_CHECK_HEADER([sys/sysctl.h], [DEFINES="$DEFINES -DHAVE_SYS_SYSCTL_H"])
 AC_CHECK_FUNC([strtof], [DEFINES="$DEFINES -DHAVE_STRTOF"])
 AC_CHECK_FUNC([mkostemp], [DEFINES="$DEFINES -DHAVE_MKOSTEMP"])
+AC_CHECK_FUNC([memfd_create], [DEFINES="$DEFINES -DHAVE_MEMFD_CREATE"])
+
+AC_MSG_CHECKING([whether strtod has locale support])
+AC_LINK_IFELSE([AC_LANG_SOURCE([[
+    #define _GNU_SOURCE
+    #include <stdlib.h>
+    #include <locale.h>
+    #ifdef HAVE_XLOCALE_H
+    #include <xlocale.h>
+    #endif
+    int main() {
+       locale_t loc = newlocale(LC_CTYPE_MASK, "C", NULL);
+       const char *s = "1.0";
+       char *end;
+       double d = strtod_l(s, end, loc);
+       float f = strtof_l(s, end, loc);
+       freelocale(loc);
+       return 0;
+    }]])],
+  [DEFINES="$DEFINES -DHAVE_STRTOD_L"];
+   AC_MSG_RESULT([yes]),
+   AC_MSG_RESULT([no]))

 dnl Check to see if dlopen is in default libraries (like Solaris, which
 dnl has it in libc), or if libdl is needed to get it.
@@ -1360,18 +1417,10 @@ AC_ARG_ENABLE([libglvnd],
 AM_CONDITIONAL(USE_LIBGLVND, test "x$enable_libglvnd" = xyes)

 if test "x$enable_libglvnd" = xyes ; then
-    dnl XXX: update once we can handle more than libGL/glx.
-    dnl Namely: we should error out if neither of the glvnd enabled libraries
-    dnl are built
    case "x$enable_glx" in
-    xno)
-        AC_MSG_ERROR([cannot build libglvnd without GLX])
-        ;;
    xxlib | xgallium-xlib )
        AC_MSG_ERROR([cannot build libgvnd when Xlib-GLX or Gallium-Xlib-GLX is enabled])
        ;;
-    xdri)
-        ;;
    esac

    PKG_CHECK_MODULES([GLVND], libglvnd >= 0.2.0)
@@ -1380,6 +1429,10 @@ if test "x$enable_libglvnd" = xyes ; then

    DEFINES="${DEFINES} -DUSE_LIBGLVND=1"
    DEFAULT_GL_LIB_NAME=GLX_mesa
+
+    if test "x$enable_glx" = xno -a "x$enable_egl" = xno; then
+        AC_MSG_ERROR([cannot build libglvnd without GLX or EGL])
+    fi
 fi

 AC_ARG_WITH([gl-lib-name],
@@ -2140,7 +2193,9 @@ if test "x$enable_xvmc" = xyes -o \
        "x$enable_vdpau" = xyes -o \
        "x$enable_omx" = xyes -o \
        "x$enable_va" = xyes; then
-    PKG_CHECK_MODULES([VL], [x11-xcb xcb xcb-dri2 >= $XCBDRI2_REQUIRED])
+    if echo $platforms | grep -q "x11"; then
+        PKG_CHECK_MODULES([VL], [x11-xcb xcb xcb-dri2 >= $XCBDRI2_REQUIRED])
+    fi
    need_gallium_vl_winsys=yes
 fi
 AM_CONDITIONAL(NEED_GALLIUM_VL_WINSYS, test "x$need_gallium_vl_winsys" = xyes)
@@ -2551,7 +2606,7 @@ if test -n "$with_gallium_drivers"; then
            if test "x$HAVE_SWR_AVX" != xyes -a \
                    "x$HAVE_SWR_AVX2" != xyes -a \
                    "x$HAVE_SWR_KNL" != xyes -a \
-                    "x$HAVE_SWR_SKX" != xyes -a; then
+                    "x$HAVE_SWR_SKX" != xyes; then
               AC_MSG_ERROR([swr enabled but no swr architectures selected])
            fi

@@ -2735,6 +2790,8 @@ AM_CONDITIONAL(HAVE_X86_ASM, test "x$asm_arch" = xx86 -o "x$asm_arch" = xx86_64)
 AM_CONDITIONAL(HAVE_X86_64_ASM, test "x$asm_arch" = xx86_64)
 AM_CONDITIONAL(HAVE_SPARC_ASM, test "x$asm_arch" = xsparc)
 AM_CONDITIONAL(HAVE_PPC64LE_ASM, test "x$asm_arch" = xppc64le)
+AM_CONDITIONAL(HAVE_AARCH64_ASM, test "x$asm_arch" = xaarch64)
+AM_CONDITIONAL(HAVE_ARM_ASM, test "x$asm_arch" = xarm)

 AC_SUBST([NINE_MAJOR], 1)
 AC_SUBST([NINE_MINOR], 0)
--- a/docs/egl.html
+++ b/docs/egl.html
@@ -130,27 +130,6 @@ mesa/demos repository.</p>
 runtime</p>

 <dl>
-<dt><code>EGL_DRIVERS_PATH</code></dt>
-<dd>
-
-<p>By default, the main library will look for drivers in the directory where
-the drivers are installed to.  This variable specifies a list of
-colon-separated directories where the main library will look for drivers, in
-addition to the default directory.  This variable is ignored for setuid/setgid
-binaries.</p>
-
-<p>This variable is usually set to test an uninstalled build.  For example, one
-may set</p>
-
-<pre>
-  $ export LD_LIBRARY_PATH=$mesa/lib
-  $ export EGL_DRIVERS_PATH=$mesa/lib/egl
-</pre>
-
-<p>to test a build without installation</p>
-
-</dd>
-
 <dt><code>EGL_DRIVER</code></dt>
 <dd>

--- a/docs/llvmpipe.html
+++ b/docs/llvmpipe.html
@@ -20,7 +20,7 @@
 The Gallium llvmpipe driver is a software rasterizer that uses LLVM to
 do runtime code generation.
 Shaders, point/line/triangle rasterization and vertex processing are
-implemented with LLVM IR which is translated to x86 or x86-64 machine
+implemented with LLVM IR which is translated to x86, x86-64, or ppc64le machine
 code.
 Also, the driver is multithreaded to take advantage of multiple CPU cores
 (up to 8 at this time).
@@ -32,24 +32,36 @@ It's the fastest software rasterizer for Mesa.

 <ul>
 <li>
-   <p>An x86 or amd64 processor; 64-bit mode recommended.</p>
   <p>
+   For x86 or amd64 processors, 64-bit mode is recommended.
   Support for SSE2 is strongly encouraged.  Support for SSE3 and SSE4.1 will
   yield the most efficient code.  The fewer features the CPU has the more
-   likely is that you run into underperforming, buggy, or incomplete code.
+   likely it is that you will run into underperforming, buggy, or incomplete code.
+   </p>
+   <p>
+   For ppc64le processors, use of the Altivec feature (the Vector
+   Facility) is recommended if supported; use of the VSX feature (the
+   Vector-Scalar Facility) is recommended if supported AND Mesa is
+   built with LLVM version 4.0 or later.
   </p>
   <p>
   See /proc/cpuinfo to know what your CPU supports.
   </p>
 </li>
 <li>
-   <p>LLVM: version 3.4 recommended; 3.3 or later required.</p>
+   <p>Unless otherwise stated, LLVM version 3.4 is recommended; 3.3 or later is required.</p>
   <p>
   For Linux, on a recent Debian based distribution do:
   </p>
 <pre>
     aptitude install llvm-dev
 </pre>
+   <p>
+   If you want development snapshot builds of LLVM for Debian and derived
+   distributions like Ubuntu, you can use the APT repository at <a
+   href="https://apt.llvm.org/" title="Debian Development packages for LLVM"
+   >apt.llvm.org</a>, which are maintained by Debian's LLVM maintainer.
+   </p>
   <p>
   For a RPM-based distribution do:
   </p>
@@ -228,8 +240,8 @@ build/linux-???-debug/gallium/drivers/llvmpipe:
 </ul>

 <p>
-Some of this tests can output results and benchmarks to a tab-separated-file
-for posterior analysis, e.g.:
+Some of these tests can output results and benchmarks to a tab-separated file
+for later analysis, e.g.:
 </p>
 <pre>
  build/linux-x86_64-debug/gallium/drivers/llvmpipe/lp_test_blend -o blend.tsv
@@ -240,8 +252,8 @@ for posterior analysis, e.g.:

 <ul>
 <li>
-  When looking to this code by the first time start in lp_state_fs.c, and 
-  then skim through the lp_bld_* functions called in there, and the comments
+  When looking at this code for the first time, start in lp_state_fs.c, and
+  then skim through the lp_bld_* functions called there, and the comments
  at the top of the lp_bld_*.c functions.
 </li>
 <li>
--- a/docs/relnotes/17.2.0.html
+++ b/docs/relnotes/17.2.0.html
@@ -14,7 +14,7 @@
 <iframe src="../contents.html"></iframe>
 <div class="content">

-<h1>Mesa 17.2.0 Release Notes / TBD</h1>
+<h1>Mesa 17.2.0 Release Notes / September 4, 2017</h1>

 <p>
 Mesa 17.2.0 is a new development release.
@@ -33,7 +33,8 @@ because compatibility contexts are not supported.

 <h2>SHA256 checksums</h2>
 <pre>
-TBD.
+9484ad96b4bb6cda5bbf1aef52dfa35183dc21aa6258a2991c245996c2fdaf85  mesa-17.2.0.tar.gz
+3123448f770eae58bc73e15480e78909defb892f10ab777e9116c9b218094943  mesa-17.2.0.tar.xz
 </pre>


@@ -56,9 +57,156 @@ Note: some of the new features are only available with certain drivers.
 <h2>Bug fixes</h2>

 <ul>
-TBD
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=68365">Bug 68365</a> - [SNB Bisected]Piglit spec_ARB_framebuffer_object_fbo-blit-stretch  fail</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=77240">Bug 77240</a> - khrplatform.h not installed if EGL is disabled</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95530">Bug 95530</a> - Stellaris - colored overlay of sectors doesn't render on i965</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96449">Bug 96449</a> - Dying Light reports OpenGL version 3.0 with mesa-git</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96958">Bug 96958</a> - [SKL] Improper rendering in Europa Universalis IV</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97524">Bug 97524</a> - Samplers referring to the same texture unit with different types should raise GL_INVALID_OPERATION</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97957">Bug 97957</a> - Awful screen tearing in a separate X server with DRI3</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98238">Bug 98238</a> - Witcher 2: objects are black when changing lod on Radeon Pitcairn</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98428">Bug 98428</a> - Undefined non-weak-symbol in dri-drivers</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98833">Bug 98833</a> - [REGRESSION, bisected] Wayland revert commit breaks non-Vsync fullscreen frame updates</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99467">Bug 99467</a> - [radv] DOOM 2016 + wine. Green screen everywhere (but can be started)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100070">Bug 100070</a> - Rocket League: grass gets rendered incorrectly</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100242">Bug 100242</a> - radeon buffer allocation failure during startup of Factorio</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100620">Bug 100620</a> - [SKL] 48-bit addresses break DOOM</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100690">Bug 100690</a> - [Regression, bisected] TotalWar: Warhammer corrupted graphics</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100741">Bug 100741</a> - Chromium - Memory leak</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100785">Bug 100785</a> - [regression, bisected] arb_gpu_shader5 piglit fail</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100854">Bug 100854</a> - YUV to RGB Color Space Conversion result is not precise</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100871">Bug 100871</a> - gles cts hangs mesa indefinitely</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100877">Bug 100877</a> - vulkan/tests/block_pool_no_free regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100892">Bug 100892</a> - Polaris 12: winsys init bad switch (missing break) initializing addrlib</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100925">Bug 100925</a> - [HSW/BSW/BDW/SKL] Google Earth is not resolving all the details in the map correctly</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100937">Bug 100937</a> - Mesa fails to build with GCC 4.8</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100945">Bug 100945</a> - Build failure in GNOME Continuous</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100988">Bug 100988</a> - glXGetCurrentDisplay() no longer works for FakeGLX contexts?</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101071">Bug 101071</a> - compiling glsl fails with undefined reference to `pthread_create'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101088">Bug 101088</a> - `gallium: remove pipe_index_buffer and set_index_buffer` causes glitches and crash in gallium nine</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101110">Bug 101110</a> - Build failure in GNOME Continuous</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101189">Bug 101189</a> - Latest git fails to compile with radeon</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101252">Bug 101252</a> - eglGetDisplay() is not thread safe</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101254">Bug 101254</a> - VDPAU videos don't start playing with r600 gallium driver</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101283">Bug 101283</a> - skylake: page fault accessing address 0</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101284">Bug 101284</a> - [G45] ES2-CTS.functional.texture.specification.basic_copytexsubimage2d.cube_rgba</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101294">Bug 101294</a> - radeonsi minecraft forge splash freeze since 17.1</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101306">Bug 101306</a> - [BXT] gles asserts in cts</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101326">Bug 101326</a> - gallium/wgl: Allow context creation without prior SetPixelFormat()</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101334">Bug 101334</a> - AMD SI cards: Some vulkan apps freeze the system</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101336">Bug 101336</a> - glcpp-test.sh regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101340">Bug 101340</a> - i915_surface.c:108:4: error: too few arguments to function ‘util_blitter_default_src_texture’</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101360">Bug 101360</a> - Assertion failure comparing result of ballotARB</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101401">Bug 101401</a> - [REGRESSION][BISECTED] GDM fails to start after 8ec4975cd83365c791a1</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101418">Bug 101418</a> - Build failure in GNOME Continuous</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101451">Bug 101451</a> - [G33] ES2-CTS.functional.clipping.polygon regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101464">Bug 101464</a> - PrimitiveRestartNV inside a render list causes a crash</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101471">Bug 101471</a> - Mesa fails to build: unknown typename bool</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101535">Bug 101535</a> - [bisected] [Skylake] Kwin won't start and glxgears coredumps</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101538">Bug 101538</a> - From &quot;Use isl for hiz layouts&quot; commit onwards, everything crashes with Mesa</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101539">Bug 101539</a> - [Regresion] [IVB] Segment fault in recent commit in intel_miptree_level_has_hiz under Ivy bridge</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101558">Bug 101558</a> - [regression][bisected] MPV playing video via opengl &quot;randomly&quot; results in only part of the window / screen being rendered with Mesa GIT.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101596">Bug 101596</a> - Blender renders black UI elements</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101607">Bug 101607</a> - Regression in anisotropic filtering from &quot;i965: Convert fs sampler state to use genxml&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101657">Bug 101657</a> - strtod.c:32:10: fatal error: xlocale.h: No such file or directory</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101666">Bug 101666</a> - bitfieldExtract is marked as a built-in function on OpenGL ES 3.0, but was added in OpenGL ES 3.1</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101683">Bug 101683</a> - Some games hang while loading when compositing is shut off or absent</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101703">Bug 101703</a> - No stencil buffer allocated when requested by GLUT</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101704">Bug 101704</a> - [regression][bisected] glReadPixels() from pbuffer failing in Android CTS camera tests</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101766">Bug 101766</a> - Assertion `!&quot;invalid type&quot;' failed when constant expression involves literal of different type</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101774">Bug 101774</a> - gen_clflush.h:37:7: error: implicit declaration of function ‘__builtin_ia32_clflush’</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101775">Bug 101775</a> - Xorg segfault since 147d7fb &quot;st/mesa: add a winsys buffers list in st_context&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101829">Bug 101829</a> - read-after-free in st_framebuffer_validate</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101831">Bug 101831</a> - Build failure in GNOME Continuous</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101851">Bug 101851</a> - [regression] libEGL_common.a undefined reference to '__gxx_personality_v0'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101867">Bug 101867</a> - Launch options window renders black in Feral Games in current Mesa trunk</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101876">Bug 101876</a> - SIGSEGV when launching Steam</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101910">Bug 101910</a> - [BYT] ES31-CTS.functional.copy_image.non_compressed.viewclass_96_bits.rgb32f_rgb32f</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101925">Bug 101925</a> - playstore/webview crash</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101961">Bug 101961</a> - Serious Sam Fusion hangs system completely</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101982">Bug 101982</a> - Weston crashes when running an OpenGL program on i965</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101983">Bug 101983</a> - [G33] ES2-CTS.functional.shaders.struct.uniform.sampler_nested* regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102024">Bug 102024</a> - FORMAT_FEATURE_SAMPLED_IMAGE_BIT not supported for D16_UNORM and D32_SFLOAT</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102148">Bug 102148</a> - Crash when running qopenglwidget example on mesa llvmpipe win32</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102241">Bug 102241</a> - gallium/wgl: SwapBuffers freezing regularly with swap interval enabled</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102308">Bug 102308</a> - segfault in glCompressedTextureSubImage3D</li>
+
 </ul>

+
 <h2>Changes</h2>

 <ul>
--- a/docs/relnotes/17.2.1.html
+++ b/docs/relnotes/17.2.1.html
@@ -0,0 +1,200 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 17.2.1 Release Notes / September 17, 2017</h1>
+
+<p>
+Mesa 17.2.1 is a bug fix release which fixes bugs found since the 17.2.0 release.
+</p>
+<p>
+Mesa 17.2.1 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+c902d8dc2540195bc570d88af1a8fd8a1774373660a27bb1d539551f46824bc1  mesa-17.2.1.tar.gz
+77385d17827cff24a3bae134342234f2efe7f7f990e778109682571dbbc9ba1e  mesa-17.2.1.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100613">Bug 100613</a> - Regression in Mesa 17 on s390x (zSystems)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101709">Bug 101709</a> - [llvmpipe] piglit gl-1.0-scissor-offscreen regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102454">Bug 102454</a> - glibc 2.26 doesn't provide anymore xlocale.h</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102467">Bug 102467</a> - src/mesa/state_tracker/st_cb_readpixels.c:178]: (warning) Redundant assignment</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102502">Bug 102502</a> - [bisected] Kodi crashes since commit 707d2e8b - gallium: fold u_trim_pipe_prim call from st/mesa to drivers</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Bas Nieuwenhuizen (4):</p>
+<ul>
+  <li>radv: Actually set the cmd_buffer usage_flags.</li>
+  <li>radv: Fix vkCopyImage with both depth and stencil aspects.</li>
+  <li>radv: Disable multilayer &amp; multilevel DCC.</li>
+  <li>radv: Don't allocate CMASK for linear images.</li>
+</ul>
+
+<p>Ben Crocker (1):</p>
+<ul>
+  <li>llvmpipe: lp_build_gather_elem_vec BE fix for 3x16 load</li>
+</ul>
+
+<p>Brian Paul (1):</p>
+<ul>
+  <li>llvmpipe: initialize llvmpipe-&gt;dirty with LP_NEW_SCISSOR</li>
+</ul>
+
+<p>Charmaine Lee (1):</p>
+<ul>
+  <li>vbo: fix offset in minmax cache key</li>
+</ul>
+
+<p>Dave Airlie (12):</p>
+<ul>
+  <li>radv: disable 1d/2d linear optimisation on gfx9.</li>
+  <li>radv/gfx9: set descriptor up for base_mip to level range.</li>
+  <li>Revert "radv: disable support for VEGA for now."</li>
+  <li>radv/winsys: use amdgpu_bo_va_op_raw.</li>
+  <li>radv/gfx9: allocate events from uncached VA space</li>
+  <li>radv: use simpler indirect packet 3 if possible.</li>
+  <li>radv: don't use iview for meta image width/height.</li>
+  <li>radv: handle GFX9 1D textures</li>
+  <li>radv/gfx9: set mip0-depth correctly for 2d arrays/3d images</li>
+  <li>radv/ac: bump params array for image atomic comp swap</li>
+  <li>radv/gfx9: fix image resource handling.</li>
+  <li>radv/winsys: fix flags vs va_flags thinko.</li>
+</ul>
+
+<p>Emil Velikov (7):</p>
+<ul>
+  <li>docs: add sha256 checksums for 17.2.0</li>
+  <li>cherry-ignore: add getCapability patches</li>
+  <li>cherry-ignore: ignore gfx9 tile swizzle fix</li>
+  <li>cherry-ignore: add execution_type() fix to the list</li>
+  <li>cherry-ignore: add EGL+gbm swast patches</li>
+  <li>egl/x11/dri3: adding missing __DRI_BACKGROUND_CALLABLE extension</li>
+  <li>Update version to 17.2.1</li>
+</ul>
+
+<p>Eric Engestrom (3):</p>
+<ul>
+  <li>util: improve compiler guard</li>
+  <li>mesa/st: remove unwanted backup file</li>
+  <li>docs/egl: remove reference to EGL_DRIVERS_PATH</li>
+</ul>
+
+<p>Grazvydas Ignotas (1):</p>
+<ul>
+  <li>radv: don't assert on empty hash table</li>
+</ul>
+
+<p>Jason Ekstrand (2):</p>
+<ul>
+  <li>anv/formats: Nicely handle unknown VkFormat enums</li>
+  <li>spirv: Add support for the HelperInvocation builtin</li>
+</ul>
+
+<p>Karol Herbst (1):</p>
+<ul>
+  <li>nvc0: write 0 to pipeline_statistics.cs_invocations</li>
+</ul>
+
+<p>Kenneth Graunke (2):</p>
+<ul>
+  <li>i965: Fix crash in fallback GTT mapping.</li>
+  <li>i965: Set "Subslice Hashing Mode" to 16x16 on Apollolake.</li>
+</ul>
+
+<p>Marek Olšák (1):</p>
+<ul>
+  <li>st/mesa: skip draw calls with pipe_draw_info::count == 0</li>
+</ul>
+
+<p>Michael Olbrich (1):</p>
+<ul>
+  <li>egl/dri2: only destroy created objects</li>
+</ul>
+
+<p>Nicolai HÃÂ¤hnle (1):</p>
+<ul>
+  <li>radeonsi: apply a mask to gl_SampleMaskIn in the PS prolog</li>
+</ul>
+
+<p>Nicolai Hähnle (4):</p>
+<ul>
+  <li>radeonsi/gfx9: always flush DB metadata on framebuffer changes</li>
+  <li>st/glsl_to_tgsi: only the first (inner-most) array reference can be a 2D index</li>
+  <li>ac/surface: match Z and stencil tile config</li>
+  <li>glsl: fix glsl_struct_field size calculations for shader cache</li>
+</ul>
+
+<p>Ray Strode (1):</p>
+<ul>
+  <li>gallivm: correct channel shift logic on big endian</li>
+</ul>
+
+<p>Rob Clark (1):</p>
+<ul>
+  <li>freedreno: skip batch-cache for compute shaders</li>
+</ul>
+
+<p>Roland Scheidegger (1):</p>
+<ul>
+  <li>st/mesa: fix view template initialization in try_pbo_readpixels</li>
+</ul>
+
+<p>Samuel Pitoiset (1):</p>
+<ul>
+  <li>radeonsi: update dirty_level_mask before dispatching</li>
+</ul>
+
+<p>Timothy Arceri (9):</p>
+<ul>
+  <li>glsl: allow NULL to be passed to encode_type_to_blob()</li>
+  <li>glsl: stop adding pointers from gl_shader_variable to the cache</li>
+  <li>glsl: stop adding pointers from glsl_struct_field to the cache</li>
+  <li>glsl: add has_uniform_storage() helper to shader cache</li>
+  <li>glsl: don't write uniform storage offset if there isn't one</li>
+  <li>glsl: always write a name/label string to the cache</li>
+  <li>compiler: move pointers to the start of shader_info</li>
+  <li>glsl: stop adding pointers from shader_info to the cache</li>
+  <li>glsl: stop adding pointers from bindless structs to the cache</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/17.2.2.html
+++ b/docs/relnotes/17.2.2.html
@@ -0,0 +1,203 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 17.2.2 Release Notes / October 2, 2017</h1>
+
+<p>
+Mesa 17.2.2 is a bug fix release which fixes bugs found since the 17.2.1 release.
+</p>
+<p>
+Mesa 17.2.2 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+8242256f3243ed3f35184ed7bf0a9070439ccdf477a3bd9cfd2437c0b2f9bc7f  mesa-17.2.2.tar.gz
+cf522244d6a5a1ecde3fc00e7c96935253fe22f808f064cab98be6f3faa65782  mesa-17.2.2.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102573">Bug 102573</a> - fails to build on armel</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102844">Bug 102844</a> - memory leak with glDeleteProgram for shader program type GL_COMPUTE_SHADER</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102847">Bug 102847</a> - swr fail to build with llvm-5.0.0</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102904">Bug 102904</a> - piglit and gl45 cts linker tests regressed</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Alexandru-Liviu Prodea (1):</p>
+<ul>
+  <li>Scons: Add LLVM 5.0 support</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (1):</p>
+<ul>
+  <li>radv: Check for GFX9 for 1D arrays in image_size intrinsic.</li>
+</ul>
+
+<p>Boris Brezillon (1):</p>
+<ul>
+  <li>broadcom/vc4: Fix infinite retry in vc4_bo_alloc()</li>
+</ul>
+
+<p>Dave Airlie (3):</p>
+<ul>
+  <li>radv/nir: call opt_remove_phis after trivial continues.</li>
+  <li>ac/surface: handle S8 on gfx9</li>
+  <li>st/glsl-&gt;tgsi: fix u64 to bool comparisons.</li>
+</ul>
+
+<p>David Airlie (1):</p>
+<ul>
+  <li>radv: add gfx9 scissor workaround</li>
+</ul>
+
+<p>Emil Velikov (2):</p>
+<ul>
+  <li>docs: add sha256 checksums for 17.2.1</li>
+  <li>automake: enable libunwind in `make distcheck'</li>
+</ul>
+
+<p>Eric Anholt (4):</p>
+<ul>
+  <li>broadcom/vc4: Fix use-after-free for flushing when writing to a texture.</li>
+  <li>broadcom/vc4: Fix use-after-free trying to mix a quad and tile clear.</li>
+  <li>broadcom/vc4: Fix use-after-free when deleting a program.</li>
+  <li>broadcom/vc4: Keep pipe_sampler_view-&gt;texture matching the original texture.</li>
+</ul>
+
+<p>Gert Wollny (2):</p>
+<ul>
+  <li>travis: force llvm-3.3 for "make Gallium ST Other"</li>
+  <li>travis: Add libunwind-dev to gallium/make builds</li>
+</ul>
+
+<p>Grazvydas Ignotas (1):</p>
+<ul>
+  <li>configure: check if -latomic is needed for __atomic_*</li>
+</ul>
+
+<p>Ian Romanick (1):</p>
+<ul>
+  <li>nv20: Fix GL_CLAMP</li>
+</ul>
+
+<p>Jason Ekstrand (6):</p>
+<ul>
+  <li>i965/blorp: Set r8stencil_needs_update when writing stencil</li>
+  <li>vulkan/wsi/wayland: Stop printing out the DRM device</li>
+  <li>vulkan/wsi/wayland: Refactor wsi_wl_display code</li>
+  <li>vulkan/wsi/wayland: Stop caching Wayland displays</li>
+  <li>vulkan/wsi/wayland: Copy wl_proxy objects from oldSwapchain if available</li>
+  <li>vulkan/wsi/wayland: Return better error messages</li>
+</ul>
+
+<p>Juan A. Suarez Romero (4):</p>
+<ul>
+  <li>cherry-ignore: add "radeonsi/gfx9: proper workaround for LS/HS VGPR initialization bug"</li>
+  <li>cherry-ignore: add "radv: Check for GFX9 for 1D arrays in image_size intrinsic."</li>
+  <li>cherry-ignore: add "radv: copy the number of viewports/scissors at pipeline bind time"</li>
+  <li>Update version to 17.2.2</li>
+</ul>
+
+<p>Józef Kucia (1):</p>
+<ul>
+  <li>anv: Fix descriptors copying</li>
+</ul>
+
+<p>Kenneth Graunke (2):</p>
+<ul>
+  <li>i965/vec4: Actually handle atomic op intrinsics.</li>
+  <li>i965/vec4: Fix swizzles on atomic sources.</li>
+</ul>
+
+<p>Leo Liu (1):</p>
+<ul>
+  <li>st/va/postproc: use video original size for postprocessing</li>
+</ul>
+
+<p>Lucas Stach (1):</p>
+<ul>
+  <li>etnaviv: fix 16bpp clears</li>
+</ul>
+
+<p>Matt Turner (2):</p>
+<ul>
+  <li>util: Link libmesautil into u_atomic_test</li>
+  <li>util/u_atomic: Add implementation of __sync_val_compare_and_swap_8</li>
+</ul>
+
+<p>Nicolai Hähnle (9):</p>
+<ul>
+  <li>radeonsi: workaround for gather4 on integer cube maps</li>
+  <li>amd/common: round cube array slice in ac_prepare_cube_coords</li>
+  <li>amd/common: add workaround for cube map array layer clamping</li>
+  <li>glsl/linker: fix output variable overlap check</li>
+  <li>radeonsi: fix array textures layer coordinate</li>
+  <li>radeonsi: set MIP_POINT_PRECLAMP to 0</li>
+  <li>amd/addrlib: fix missing va_end() after va_copy()</li>
+  <li>amd/common: move ac_build_phi from radeonsi</li>
+  <li>radeonsi: fix a regression in integer cube map handling</li>
+</ul>
+
+<p>Samuel Iglesias Gonsálvez (1):</p>
+<ul>
+  <li>anv: fix viewport transformation for z component</li>
+</ul>
+
+<p>Samuel Pitoiset (1):</p>
+<ul>
+  <li>radv: fix saved compute state when doing statistics/occlusion queries</li>
+</ul>
+
+<p>Tapani Pälli (1):</p>
+<ul>
+  <li>mesa: free current ComputeProgram state in _mesa_free_context_data</li>
+</ul>
+
+<p>Tim Rowley (1):</p>
+<ul>
+  <li>swr/rast: remove llvm fence/atomics from generated files</li>
+</ul>
+
+<p>Tomasz Figa (1):</p>
+<ul>
+  <li>egl/dri2: Implement swapInterval fallback in a conformant way</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/17.2.3.html
+++ b/docs/relnotes/17.2.3.html
@@ -0,0 +1,181 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 17.2.3 Release Notes / October 19, 2017</h1>
+
+<p>
+Mesa 17.2.3 is a bug fix release which fixes bugs found since the 17.2.2 release.
+</p>
+<p>
+Mesa 17.2.3 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+fb305eecfeec1fd771fdc96fff973c51871f7bd35fd2bd56cacc27b4b8823220  mesa-17.2.3.tar.gz
+a0b0ec8f7b24dd044d7ab30a8c7e6d3767521e245f88d4ed5dd93315dc56f837  mesa-17.2.3.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101832">Bug 101832</a> - [PATCH][regression][bisect] Xorg fails to start after f50aa21456d82c8cb6fbaa565835f1acc1720a5d</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102852">Bug 102852</a> - Scons: Support the new Scons 3.0.0</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102940">Bug 102940</a> - Regression: Vulkan KMS rendering crashes since 17.2</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Alex Smith (1):</p>
+<ul>
+  <li>radv: Add R16G16B16A16_SNORM fast clear support</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (2):</p>
+<ul>
+  <li>nir/spirv: Allow loop breaks in a switch body.</li>
+  <li>radv: Only set the MTYPE flags on GFX9+.</li>
+</ul>
+
+<p>Ben Crocker (4):</p>
+<ul>
+  <li>gallivm: fix typo in debug_printf message</li>
+  <li>gallivm: allow additional llc options</li>
+  <li>gallivm/ppc64le: adjust VSX code generation control.</li>
+  <li>gallivm/ppc64le: allow environmental control of Altivec code generation</li>
+</ul>
+
+<p>Daniel Stone (2):</p>
+<ul>
+  <li>egl/wayland: Check queryImage return for wl_buffer</li>
+  <li>egl/wayland: Don't use dmabuf with no modifiers</li>
+</ul>
+
+<p>Dave Airlie (2):</p>
+<ul>
+  <li>radv: emit fmuladd instead of fma to llvm.</li>
+  <li>radv: lower ffma in nir.</li>
+</ul>
+
+<p>Emil Velikov (6):</p>
+<ul>
+  <li>cherry-ignore: add "anv: Remove unreachable cases from isl_format_for_size"</li>
+  <li>cherry-ignore: add "anv/wsi: Allocate enough memory for the entire image"</li>
+  <li>swr/rast: do not crash on NULL strings returned by getenv</li>
+  <li>wayland-drm: use a copy of the wayland_drm_callbacks struct</li>
+  <li>eglmesaext: add forward declaration for struct wl_buffers</li>
+  <li>Update version to 17.2.3</li>
+</ul>
+
+<p>Eric Engestrom (1):</p>
+<ul>
+  <li>scons: use python3-compatible print()</li>
+</ul>
+
+<p>Ilia Mirkin (2):</p>
+<ul>
+  <li>nv50/ir: fix 64-bit integer shifts</li>
+  <li>nv50,nvc0: fix push hint logic in presence of a start offset</li>
+</ul>
+
+<p>Jason Ekstrand (6):</p>
+<ul>
+  <li>intel/compiler: Don't cmod propagate into a saturated operation</li>
+  <li>intel/compiler: Don't propagate cmod into integer multiplies</li>
+  <li>glsl/blob: Return false from ensure_can_read on overrun</li>
+  <li>glsl/blob: Return false from grow_to_fit if we've ever failed</li>
+  <li>nir/opcodes: Fix constant-folding of ufind_msb</li>
+  <li>nir: Get rid of the variable on vote intrinsics</li>
+</ul>
+
+<p>Juan A. Suarez Romero (1):</p>
+<ul>
+  <li>docs: add sha256 checksums for 17.2.2</li>
+</ul>
+
+<p>Józef Kucia (3):</p>
+<ul>
+  <li>anv: Fix vkCmdFillBuffer()</li>
+  <li>spirv: Fix SpvOpAtomicISub</li>
+  <li>anv: Do not assert() on VK_ATTACHMENT_UNUSED</li>
+</ul>
+
+<p>Leo Liu (3):</p>
+<ul>
+  <li>st/va: use pipe transfer_map to map upload buffer</li>
+  <li>st/vdpau: don't re-allocate interlaced buffer with packed YUV format</li>
+  <li>st/va: don't re-allocate interlaced buffer with pakced format</li>
+</ul>
+
+<p>Lionel Landwerlin (4):</p>
+<ul>
+  <li>intel: compiler: vec4: add missing default 0 lod</li>
+  <li>anv/cmd_buffer: fix push descriptors with set &gt; 0</li>
+  <li>anv/cmd_buffer: Reset state in cmd_buffer_destroy</li>
+  <li>anv: bo_cache: allow importing a BO larger than needed</li>
+</ul>
+
+<p>Marek Olšák (3):</p>
+<ul>
+  <li>mesa: fix texture updates for ATI_fragment_shader</li>
+  <li>st/mesa: don't use pipe_surface for passing information about EGLImage</li>
+  <li>glsl_to_tgsi: fix instruction order for bindless textures</li>
+</ul>
+
+<p>Nicolai Hähnle (14):</p>
+<ul>
+  <li>st/glsl_to_tgsi: fix conditional assignments to packed shader outputs</li>
+  <li>amd/common: fix build_cube_select</li>
+  <li>radeonsi/gfx9: fix geometry shaders without output vertices</li>
+  <li>util/queue: fix a race condition in the fence code</li>
+  <li>glsl/lower_instruction: handle denorms and overflow in ldexp correctly</li>
+  <li>radeonsi: move current_rast_prim to r600_common_context</li>
+  <li>radeonsi: don't discard points and lines</li>
+  <li>radeonsi: deduce rast_prim correctly for tessellation point mode</li>
+  <li>radeonsi: fix maximum advertised point size / line width</li>
+  <li>st/mesa: don't clobber glGetInternalformat* buffer for GL_NUM_SAMPLE_COUNTS</li>
+  <li>st/glsl_to_tgsi: fix indirect access to 64-bit integer</li>
+  <li>st/glsl_to_tgsi: fix a use-after-free in merge_two_dsts</li>
+  <li>radeonsi: clamp depth comparison value only for fixed point formats</li>
+  <li>radeonsi: clamp border colors for upgraded depth textures</li>
+</ul>
+
+<p>Rob Clark (2):</p>
+<ul>
+  <li>freedreno/a5xx: align height to GMEM</li>
+  <li>freedreno/a5xx: fix missing restore state</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/17.2.4.html
+++ b/docs/relnotes/17.2.4.html
@@ -0,0 +1,132 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 17.2.4 Release Notes / October 30, 2017</h1>
+
+<p>
+Mesa 17.2.4 is a bug fix release which fixes bugs found since the 17.2.3 release.
+</p>
+<p>
+Mesa 17.2.4 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+cb266edc5cf7226219ebaf556ca2e03dff282e0324d20afd80423a5754d1272c  mesa-17.2.4.tar.gz
+5ba408fecd6e1132e5490eec1a2f04466214e4c65c8b89b331be844768c2e550  mesa-17.2.4.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102774">Bug 102774</a> - [BDW] [Bisected] Absolute constant buffers break VAAPI in mpv</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103388">Bug 103388</a> - Linking libcltgsi.la (llvm/codegen/libclllvm_la-common.lo) fails with &quot;error: no match for 'operator-'&quot; with GCC-7, Mesa from Git and current LLVM revisions</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+<p>Andres Gomez (8):</p>
+<ul>
+  <li>cherry-ignore: configure.ac: rework llvm detection and handling</li>
+  <li>cherry-ignore: glsl: fix derived cs variables</li>
+  <li>cherry-ignore: added 17.3 nominations.</li>
+  <li>cherry-ignore: radv: Don't use vgpr indexing for outputs on GFX9.</li>
+  <li>cherry-ignore: radv: Disallow indirect outputs for GS on GFX9 as well.</li>
+  <li>cherry-ignore: mesa/bufferobj: don't double negate the range</li>
+  <li>cherry-ignore: broadcom/vc5: Propagate vc4 aliasing fix to vc5.</li>
+  <li>Update version to 17.2.4</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (1):</p>
+<ul>
+  <li>ac/nir: Fix nir_texop_lod on GFX for 1D arrays.</li>
+</ul>
+
+<p>Dave Airlie (1):</p>
+<ul>
+  <li>radv/image: bump all the offset to uint64_t.</li>
+</ul>
+
+<p>Emil Velikov (1):</p>
+<ul>
+  <li>docs: add sha256 checksums for 17.2.3</li>
+</ul>
+
+<p>Henri Verbeet (1):</p>
+<ul>
+  <li>vulkan/wsi: Free the event in x11_manage_fifo_queues().</li>
+</ul>
+
+<p>Jan Vesely (1):</p>
+<ul>
+  <li>clover: Fix compilation after clang r315871</li>
+</ul>
+
+<p>Jason Ekstrand (4):</p>
+<ul>
+  <li>nir/intrinsics: Set the correct num_indices for load_output</li>
+  <li>intel/fs: Handle flag read/write aliasing in needs_src_copy</li>
+  <li>anv/pipeline: Call nir_lower_system_valaues after brw_preprocess_nir</li>
+  <li>intel/eu: Use EXECUTE_1 for JMPI</li>
+</ul>
+
+<p>Kenneth Graunke (1):</p>
+<ul>
+  <li>i965: Revert absolute mode for constant buffer pointers.</li>
+</ul>
+
+<p>Marek Olšák (1):</p>
+<ul>
+  <li>Revert "mesa: fix texture updates for ATI_fragment_shader"</li>
+</ul>
+
+<p>Matthew Nicholls (1):</p>
+<ul>
+  <li>ac/nir: generate correct instruction for atomic min/max on unsigned images</li>
+</ul>
+
+<p>Michel Dänzer (1):</p>
+<ul>
+  <li>st/mesa: Initialize textures array in st_framebuffer_validate</li>
+</ul>
+
+<p>Samuel Pitoiset (1):</p>
+<ul>
+  <li>radv: add the draw count buffer to the list of buffers</li>
+</ul>
+
+<p>Stefan Schake (1):</p>
+<ul>
+  <li>broadcom/vc4: Fix aliasing issue</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/17.2.5.html
+++ b/docs/relnotes/17.2.5.html
@@ -0,0 +1,156 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 17.2.5 Release Notes / November 10, 2017</h1>
+
+<p>
+Mesa 17.2.5 is a bug fix release which fixes bugs found since the 17.2.4 release.
+</p>
+<p>
+Mesa 17.2.5 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+25b40e72fad64b096c2d8d6fe9579369954debe7970d4ad53e5033c7eec2918b  mesa-17.2.5.tar.gz
+7f7f914b7b9ea0b15f2d9d01a4375e311b0e90e55683b8e8a67ce8691eb1070f  mesa-17.2.5.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97532">Bug 97532</a> - Regression: GLB 2.7 &amp; Glmark-2 GLES versions segfault due to linker precision error (259fc505) on dead variable</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102680">Bug 102680</a> - [OpenGL CTS] KHR-GL45.shader_ballot_tests.ShaderBallotBitmasks fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102809">Bug 102809</a> - Rust shadows(?) flash random colours</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103142">Bug 103142</a> - R600g+sb: optimizer apparently stuck in an endless loop</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+<p>Andres Gomez (8):</p>
+<ul>
+  <li>docs: add sha256 checksums for 17.2.4</li>
+  <li>cherry-ignore: radv: copy indirect lowering settings from radeonsi</li>
+  <li>cherry-ignore: i965: fix blorp stage_prog_data-&gt;param leak</li>
+  <li>cherry-ignore: etnaviv: don't do resolve-in-place without valid TS</li>
+  <li>cherry-ignore: intel/fs: Alloc pull constants off mem_ctx</li>
+  <li>cherry-ignore: added 17.3 nominations.</li>
+  <li>cherry-ignore: automake: include git_sha1.h.in in release tarball</li>
+  <li>Update version to 17.2.5</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (3):</p>
+<ul>
+  <li>radv: Don't expose heaps with 0 memory.</li>
+  <li>radv: Don't use vgpr indexing for outputs on GFX9.</li>
+  <li>radv: Disallow indirect outputs for GS on GFX9 as well.</li>
+</ul>
+
+<p>Dave Airlie (3):</p>
+<ul>
+  <li>i915g: make gears run again.</li>
+  <li>radv: free attachments on end command buffer.</li>
+  <li>radv: add initial copy descriptor support. (v2)</li>
+</ul>
+
+<p>Eric Engestrom (1):</p>
+<ul>
+  <li>vc4: fix release build</li>
+</ul>
+
+<p>Gert Wollny (1):</p>
+<ul>
+  <li>r600/sb: bail out if prepare_alu_group() doesn't find a proper scheduling</li>
+</ul>
+
+<p>Jason Ekstrand (4):</p>
+<ul>
+  <li>spirv: Claim support for the simple memory model</li>
+  <li>i965/blorp: Use blorp_to_isl_format for src_isl_format in blit_miptrees</li>
+  <li>i965/blorp: Use more temporary isl_format variables</li>
+  <li>i965/miptree: Take an isl_format in render_aux_usage</li>
+</ul>
+
+<p>Kenneth Graunke (1):</p>
+<ul>
+  <li>mesa: Accept GL_BACK in get_fb0_attachment with ARB_ES3_1_compatibility.</li>
+</ul>
+
+<p>Leo Liu (1):</p>
+<ul>
+  <li>radeon/video: add gfx9 offsets when rejoin the video surface</li>
+</ul>
+
+<p>Marek Olšák (2):</p>
+<ul>
+  <li>st/dri: don't expose modifiers in EGL if the driver doesn't implement them</li>
+  <li>ac/surface/gfx9: don't allow DCC for the smallest mipmap levels</li>
+</ul>
+
+<p>Nanley Chery (1):</p>
+<ul>
+  <li>i965: Check CCS_E compatibility for texture view rendering</li>
+</ul>
+
+<p>Neil Roberts (1):</p>
+<ul>
+  <li>nir/opt_intrinsics: Fix values for gl_SubGroupG{e,t}MaskARB</li>
+</ul>
+
+<p>Nicolai Hähnle (1):</p>
+<ul>
+  <li>amd/common/gfx9: workaround DCC corruption more conservatively</li>
+</ul>
+
+<p>Tapani Pälli (1):</p>
+<ul>
+  <li>i965: unref push_const_bo in intelDestroyContext</li>
+</ul>
+
+<p>Timothy Arceri (1):</p>
+<ul>
+  <li>radv: copy indirect lowering settings from radeonsi</li>
+</ul>
+
+<p>Tomasz Figa (1):</p>
+<ul>
+  <li>glsl: Allow precision mismatch on dead data with GLSL ES 1.00</li>
+</ul>
+
+<p>Topi Pohjolainen (1):</p>
+<ul>
+  <li>intel/compiler/gen9: Pixel shader header only workaround</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/17.2.6.html
+++ b/docs/relnotes/17.2.6.html
@@ -0,0 +1,187 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 17.2.6 Release Notes / November 25, 2017</h1>
+
+<p>
+Mesa 17.2.6 is a bug fix release which fixes bugs found since the 17.2.5 release.
+</p>
+<p>
+Mesa 17.2.6 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+a9ed76702ffb14ad674ad48899f5c8c7e3a0f987911878a5dfdc4117dce5b415  mesa-17.2.6.tar.gz
+6ad85224620330be26ab68c8fc78381b12b38b610ade2db8716b38faaa8f30de  mesa-17.2.6.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100438">Bug 100438</a> - glsl/ir.cpp:1376: ir_dereference_variable::ir_dereference_variable(ir_variable*): Assertion `var != NULL' failed.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102177">Bug 102177</a> - [SKL] ES31-CTS.core.sepshaderobjs.StateInteraction fails sporadically</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103115">Bug 103115</a> - [BSW BXT GLK] dEQP-VK.spirv_assembly.instruction.compute.sconvert.int32_to_int64</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103519">Bug 103519</a> - wayland egl apps crash on start with mesa 17.2</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103529">Bug 103529</a> - [GM45] GPU hang with mpv fullscreen (bisected)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103628">Bug 103628</a> - [BXT, GLK, BSW] KHR-GL46.shader_ballot_tests.ShaderBallotBitmasks</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103787">Bug 103787</a> - [BDW,BSW] gpu hang on spec.arb_pipeline_statistics_query.arb_pipeline_statistics_query-comp</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+<p>Adam Jackson (2):</p>
+<ul>
+  <li>glx/drisw: Fix glXMakeCurrent(dpy, None, ctx)</li>
+  <li>glx/dri3: Fix passing renderType into glXCreateContext</li>
+</ul>
+
+<p>Alex Smith (2):</p>
+<ul>
+  <li>spirv: Use correct type for sampled images</li>
+  <li>nir/spirv: tg4 requires a sampler</li>
+</ul>
+
+<p>Andres Gomez (14):</p>
+<ul>
+  <li>docs: add sha256 checksums for 17.2.5</li>
+  <li>cherry-ignore: intel/fs: Use a pure vertical stride for large register strides</li>
+  <li>cherry-ignore: intel/nir: Use the correct indirect lowering masks in link_shaders</li>
+  <li>cherry-ignore: intel/fs: Use the original destination region for int MUL lowering</li>
+  <li>cherry-ignore: intel/fs: refactors</li>
+  <li>cherry-ignore: r600/shader: reserve first register of vertex shader.</li>
+  <li>cherry-ignore: anv/cmd_buffer: Advance the address when initializing clear colors</li>
+  <li>cherry-ignore: anv/cmd_buffer: Take bo_offset into account in fast clear state addresses</li>
+  <li>cherry-ignore: i965: Mark BOs as external when we export their handle</li>
+  <li>cherry-ignore: added 17.3 nominations.</li>
+  <li>cherry-ignore: glsl: Fix typo fragement -&gt; fragment</li>
+  <li>cherry-ignore: egl: pass the dri2_dpy to the $plat_teardown functions</li>
+  <li>cherry-ignore: Revert "intel/fs: Use a pure vertical stride for large register strides"</li>
+  <li>Update version to 17.2.6</li>
+</ul>
+
+<p>Anuj Phogat (2):</p>
+<ul>
+  <li>i965: Program DWord Length in MI_FLUSH_DW</li>
+  <li>i965/gen8+: Fix the number of dwords programmed in MI_FLUSH_DW</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (2):</p>
+<ul>
+  <li>radv: Free syncobj with multiple imports.</li>
+  <li>radv: Free temporary syncobj after waiting on it.</li>
+</ul>
+
+<p>Dave Airlie (1):</p>
+<ul>
+  <li>r600: fix isoline tess factor component swapping.</li>
+</ul>
+
+<p>Derek Foreman (1):</p>
+<ul>
+  <li>egl/wayland: Add a fallback when fourcc query isn't supported</li>
+</ul>
+
+<p>Dylan Baker (1):</p>
+<ul>
+  <li>autotools: Set C++ visibility flags on Intel</li>
+</ul>
+
+<p>Emil Velikov (3):</p>
+<ul>
+  <li>targets/opencl: don't hardcode the icd file install to /etc/...</li>
+  <li>configure.ac: loosen --enable-glvnd check to honour egl</li>
+  <li>configure.ac: require xcb* for the omx/va/... when using x11 platform</li>
+</ul>
+
+<p>George Barrett (1):</p>
+<ul>
+  <li>glsl: Catch subscripted calls to undeclared subroutines</li>
+</ul>
+
+<p>Jason Ekstrand (9):</p>
+<ul>
+  <li>intel/fs: Use ANY/ALL32 predicates in SIMD32</li>
+  <li>intel/fs: Use an explicit D type for vote any/all/eq intrinsics</li>
+  <li>intel/fs: Use a pair of 1-wide MOVs instead of SEL for any/all</li>
+  <li>intel/eu/reg: Add a subscript() helper</li>
+  <li>intel/fs: Fix MOV_INDIRECT for 64-bit values on little-core</li>
+  <li>intel/fs: Fix integer multiplication lowering for src/dst hazards</li>
+  <li>intel/fs: Mark 64-bit values as being contiguous</li>
+  <li>intel/fs: Rework zero-length URB write handling</li>
+  <li>i965: Add stencil buffers to cache set regardless of stencil texturing</li>
+</ul>
+
+<p>Kenneth Graunke (5):</p>
+<ul>
+  <li>i965: properly initialize brw-&gt;cs.base.stage to MESA_SHADER_COMPUTE</li>
+  <li>i965: Make L3 configuration atom listen for TCS/TES program updates.</li>
+  <li>intel/tools: Fix detection of enabled shader stages.</li>
+  <li>i965: Implement another VF cache invalidate workaround on Gen8+.</li>
+  <li>i965: Upload invariant state once at the start of the batch on Gen4-5.</li>
+</ul>
+
+<p>Matt Turner (2):</p>
+<ul>
+  <li>i965/fs: Fix extract_i8/u8 to a 64-bit destination</li>
+  <li>i965/fs: Split all 32-&gt;64-bit MOVs on CHV, BXT, GLK</li>
+</ul>
+
+<p>Neil Roberts (1):</p>
+<ul>
+  <li>glsl: Transform fb buffers are only active if a variable uses them</li>
+</ul>
+
+<p>Nicolai Hähnle (1):</p>
+<ul>
+  <li>ddebug: fix use-after-free of streamout targets</li>
+</ul>
+
+<p>Tim Rowley (2):</p>
+<ul>
+  <li>swr/rast: Use gather instruction for i32gather_ps on simd16/avx512</li>
+  <li>swr/rast: Faster emulated simd16 permute</li>
+</ul>
+
+<p>Timothy Arceri (3):</p>
+<ul>
+  <li>glsl: drop cache_fallback</li>
+  <li>glsl: use the correct parent when allocating program data members</li>
+  <li>mesa: rework how we free gl_shader_program_data</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/17.2.7.html
+++ b/docs/relnotes/17.2.7.html
@@ -0,0 +1,247 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 17.2.7 Release Notes / December 14, 2017</h1>
+
+<p>
+Mesa 17.2.7 is a bug fix release which fixes bugs found since the 17.2.6 release.
+</p>
+<p>
+Mesa 17.2.7 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+e8d837a1cd55014e636e9caf6c75cfbe1b3e4be9ab3fa125f5ef38398aa12e97  mesa-17.2.7.tar.gz
+50cfdea8df55045797b4d0409591c04c784d9551c4da09b8178874dbe5a37a68  mesa-17.2.7.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94739">Bug 94739</a> - Mesa 11.1.2 implementation error: bad format MESA_FORMAT_Z_FLOAT32 in _mesa_unpack_uint_24_8_depth_stencil_row</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101378">Bug 101378</a> - interpolateAtSample check for input parameter is too strict</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102006">Bug 102006</a> - gstreamer vaapih264enc segfault</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102435">Bug 102435</a> - [skl,kbl] [drm] GPU HANG: ecode 9:0:0x86df7cf9, in csgo_linux64 [4947], reason: Hang on rcs, action: reset</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102552">Bug 102552</a> - Null dereference due to not checking return value of util_format_description</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102677">Bug 102677</a> - [OpenGL CTS] KHR-GL45.CommonBugs.CommonBug_PerVertexValidation fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103098">Bug 103098</a> - [OpenGL CTS] KHR-GL45.enhanced_layouts.varying_structure_locations fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103227">Bug 103227</a> - [G965 G45 ILK] ES2-CTS.gtf.GL2ExtensionTests.texture_float.texture_float regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103393">Bug 103393</a> - glDispatchComputeGroupSizeARB : gl_GlobalInvocationID.x != gl_WorkGroupID.x * gl_LocalGroupSizeARB.x + gl_LocalInvocationID.x</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103412">Bug 103412</a> - gallium/wgl: Another fix to context creation without prior SetPixelFormat()</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103616">Bug 103616</a> - Increased difference from reference image in shaders</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103626">Bug 103626</a> - [SNB] ES3-CTS.functional.shaders.precision</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103732">Bug 103732</a> - [swr] often gets stuck in piglit's glx-multi-context-single-window test</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103909">Bug 103909</a> - anv_allocator.c:113:1: error: static declaration of ‘memfd_create’ follows non-static declaration</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103966">Bug 103966</a> - Mesa 17.2.5 implementation error: bad format MESA_FORMAT_Z_FLOAT32 in _mesa_unpack_uint_24_8_depth_stencil_row</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104119">Bug 104119</a> - radv: OpBitFieldInsert produces 0 with a loop counter for Insert</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104143">Bug 104143</a> - r600/sb: clobbers gl_Position -&gt; gl_FragCoord</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Alex Smith (1):</p>
+<ul>
+  <li>radv: Add LLVM version to the device name string</li>
+</ul>
+
+<p>Andres Gomez (2):</p>
+<ul>
+  <li>docs: add sha256 checksums for 17.2.6</li>
+  <li>docs: remove bug 103626 from fix list as per 17.2.6</li>
+</ul>
+
+<p>Ben Crocker (2):</p>
+<ul>
+  <li>docs/llvmpipe.html: Minor edits</li>
+  <li>docs/llvmpipe: document ppc64le as alternative architecture to x86.</li>
+</ul>
+
+<p>Dave Airlie (1):</p>
+<ul>
+  <li>r600/sb: handle jump after target to end of program. (v2)</li>
+</ul>
+
+<p>Denis Pauk (1):</p>
+<ul>
+  <li>gallium/{r600, radeonsi}: Fix segfault with color format (v2)</li>
+</ul>
+
+<p>Eduardo Lima Mitev (3):</p>
+<ul>
+  <li>glsl_parser_extra: Add utility to copy symbols between symbol tables</li>
+  <li>glsl: Use the utility function to copy symbols between symbol tables</li>
+  <li>glsl/linker: Check that re-declared, inter-shader built-in blocks match</li>
+</ul>
+
+<p>Emil Velikov (3):</p>
+<ul>
+  <li>gl_table.py: add extern C guard for the generated glapitable.h</li>
+  <li>cherry-ignore: radeonsi: allow DMABUF exports for local buffers</li>
+  <li>Update version to 17.2.7</li>
+</ul>
+
+<p>Eric Anholt (1):</p>
+<ul>
+  <li>broadcom/vc4: Fix handling of GFXH-515 workaround with a start vertex count.</li>
+</ul>
+
+<p>Eric Engestrom (1):</p>
+<ul>
+  <li>compiler: use NDEBUG to guard asserts</li>
+</ul>
+
+<p>Fabian Bieler (2):</p>
+<ul>
+  <li>glsl: Match order of gl_LightSourceParameters elements.</li>
+  <li>glsl: Fix gl_NormalScale.</li>
+</ul>
+
+<p>Frank Richter (1):</p>
+<ul>
+  <li>gallium/wgl: fix default pixel format issue</li>
+</ul>
+
+<p>George Kyriazis (1):</p>
+<ul>
+  <li>swr: Handle resource across context changes</li>
+</ul>
+
+<p>Gert Wollny (2):</p>
+<ul>
+  <li>r600: Emit EOP for more CF instruction types</li>
+  <li>r600/sb: do not convert if-blocks that contain indirect array access</li>
+</ul>
+
+<p>Ilia Mirkin (1):</p>
+<ul>
+  <li>glsl: fix derived cs variables</li>
+</ul>
+
+<p>James Legg (1):</p>
+<ul>
+  <li>nir/opcodes: Fix constant-folding of bitfield_insert</li>
+</ul>
+
+<p>Jason Ekstrand (1):</p>
+<ul>
+  <li>i965: Disable regular fast-clears (CCS_D) on gen9+</li>
+</ul>
+
+<p>Juan A. Suarez Romero (1):</p>
+<ul>
+  <li>glsl: add varying resources for arrays of complex types</li>
+</ul>
+
+<p>Julien Isorce (1):</p>
+<ul>
+  <li>st/va: change frame_idx from array to hash table</li>
+</ul>
+
+<p>Kai Wasserbäch (1):</p>
+<ul>
+  <li>docs: Point to apt.llvm.org for development snapshot packages</li>
+</ul>
+
+<p>Kenneth Graunke (3):</p>
+<ul>
+  <li>meta: Initialize depth/clear values on declaration.</li>
+  <li>meta: Fix ClearTexture with GL_DEPTH_COMPONENT.</li>
+  <li>i965: Fix Smooth Point Enables.</li>
+</ul>
+
+<p>Marek Olšák (3):</p>
+<ul>
+  <li>radeonsi: fix layered DCC fast clear</li>
+  <li>radeonsi/gfx9: fix importing shared textures with DCC</li>
+  <li>radeonsi: flush the context after resource_copy_region for buffer exports</li>
+</ul>
+
+<p>Matt Turner (4):</p>
+<ul>
+  <li>i965/fs: Handle negating immediates on MADs when propagating saturates</li>
+  <li>util: Fix SHA1 implementation on big endian</li>
+  <li>util: Fix disk_cache index calculation on big endian</li>
+  <li>i965/fs: Unpack count argument to 64-bit shift ops on Atom</li>
+</ul>
+
+<p>Nicolai Hähnle (3):</p>
+<ul>
+  <li>radeonsi: fix the R600_RESOURCE_FLAG_UNMAPPABLE check</li>
+  <li>glsl: allow any l-value of an input variable as interpolant in interpolateAt*</li>
+  <li>glsl: fix interpolateAtXxx(some_vec[idx], ...) with dynamic idx</li>
+</ul>
+
+<p>Pierre Moreau (1):</p>
+<ul>
+  <li>nvc0/ir: Properly lower 64-bit shifts when the shift value is &gt;32</li>
+</ul>
+
+<p>Tapani Pälli (1):</p>
+<ul>
+  <li>mesa/gles: adjust internal format in glTexSubImage2D error checks</li>
+</ul>
+
+<p>Timothy Arceri (1):</p>
+<ul>
+  <li>glsl: get correct member type when processing xfb ifc arrays</li>
+</ul>
+
+<p>Vadym Shovkoplias (2):</p>
+<ul>
+  <li>intel/blorp: Fix possible NULL pointer dereferencing</li>
+  <li>glx/dri3: Remove unused deviceName variable</li>
+</ul>
+
+<p>Vinson Lee (1):</p>
+<ul>
+  <li>anv: Check if memfd_create is already defined.</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/17.2.8.html
+++ b/docs/relnotes/17.2.8.html
@@ -0,0 +1,112 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 17.2.8 Release Notes / December 22, 2017</h1>
+
+<p>
+Mesa 17.2.8 is a bug fix release which fixes bugs found since the 17.2.7 release.
+</p>
+<p>
+Mesa 17.2.8 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+c715c3a3d6fe26a69c096f573ec416e038a548f0405e3befedd5136517527a84  mesa-17.2.8.tar.gz
+6e940345cceaadfd805d701ed2b956589fa77fe8c39991da30ed51ea6b9d095f  mesa-17.2.8.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102710">Bug 102710</a> - vkCmdBlitImage with arrayLayers &gt; 1 fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103007">Bug 103007</a> - [OpenGL CTS] [HSW] KHR-GL45.gpu_shader_fp64.fp64.max_uniform_components fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103544">Bug 103544</a> - Graphical glitches r600 in game this war of mine linux native</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103579">Bug 103579</a> - Vertex shader causes compiler to crash in SPIRV-to-NIR</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Andres Gomez (6):</p>
+<ul>
+  <li>cherry-ignore: swr: Fix KNOB_MAX_WORKER_THREADS thread creation override.</li>
+  <li>cherry-ignore: added 17.3 nominations.</li>
+  <li>cherry-ignore: radv: port merge tess info from anv</li>
+  <li>cherry-ignore: main: Clear shader program data whenever ProgramBinary is called</li>
+  <li>cherry-ignore: r600: set DX10_CLAMP for compute shader too</li>
+  <li>Update version to 17.2.8</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (2):</p>
+<ul>
+  <li>spirv: Fix loading an entire block at once.</li>
+  <li>radv: Fix multi-layer blits.</li>
+</ul>
+
+<p>Brian Paul (2):</p>
+<ul>
+  <li>xlib: call _mesa_warning() instead of fprintf()</li>
+  <li>gallium/aux: include nr_samples in util_resource_size() computation</li>
+</ul>
+
+<p>Emil Velikov (1):</p>
+<ul>
+  <li>docs: add sha256 checksums for 17.2.7</li>
+</ul>
+
+<p>Iago Toral Quiroga (1):</p>
+<ul>
+  <li>i965/vec4: use a temp register to compute offsets for pull loads</li>
+</ul>
+
+<p>Leo Liu (1):</p>
+<ul>
+  <li>radeon/vce: move destroy command before feedback command</li>
+</ul>
+
+<p>Matt Turner (2):</p>
+<ul>
+  <li>util: Assume little endian in the absence of platform-specific handling</li>
+  <li>util: Add a SHA1 unit test program</li>
+</ul>
+
+<p>Roland Scheidegger (2):</p>
+<ul>
+  <li>r600: use min_dx10/max_dx10 instead of min/max</li>
+  <li>r600: use DX10_CLAMP bit in shader setup</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/include/EGL/egl.h
+++ b/include/EGL/egl.h
@@ -31,14 +31,14 @@ extern "C" {
 ** This header is generated from the Khronos OpenGL / OpenGL ES XML
 ** API Registry. The current version of the Registry, generator scripts
 ** used to make the header, and the header can be found at
-**   http://www.opengl.org/registry/egl
+**   http://www.khronos.org/registry/egl
 **
-** Khronos $Revision$ on $Date$
+** Khronos $Git commit SHA1: a732b061e7 $ on $Git commit date: 2017-06-17 23:27:53 +0100 $
 */

 #include <EGL/eglplatform.h>

-/* Generated on date 20161230 */
+/* Generated on date 20170627 */

 /* Generated C header for:
 * API: egl
--- a/include/EGL/eglext.h
+++ b/include/EGL/eglext.h
@@ -31,14 +31,14 @@ extern "C" {
 ** This header is generated from the Khronos OpenGL / OpenGL ES XML
 ** API Registry. The current version of the Registry, generator scripts
 ** used to make the header, and the header can be found at
-**   http://www.opengl.org/registry/egl
+**   http://www.khronos.org/registry/egl
 **
-** Khronos $Revision$ on $Date$
+** Khronos $Git commit SHA1: a732b061e7 $ on $Git commit date: 2017-06-17 23:27:53 +0100 $
 */

 #include <EGL/eglplatform.h>

-#define EGL_EGLEXT_VERSION 20161230
+#define EGL_EGLEXT_VERSION 20170627

 /* Generated C header for:
 * API: egl
@@ -133,6 +133,15 @@ EGLAPI EGLint EGLAPIENTRY eglLabelObjectKHR (EGLDisplay display, EGLenum objectT
 #endif
 #endif /* EGL_KHR_debug */

+#ifndef EGL_KHR_display_reference
+#define EGL_KHR_display_reference 1
+#define EGL_TRACK_REFERENCES_KHR          0x3352
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYDISPLAYATTRIBKHRPROC) (EGLDisplay dpy, EGLint name, EGLAttrib *value);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryDisplayAttribKHR (EGLDisplay dpy, EGLint name, EGLAttrib *value);
+#endif
+#endif /* EGL_KHR_display_reference */
+
 #ifndef EGL_KHR_fence_sync
 #define EGL_KHR_fence_sync 1
 typedef khronos_utime_nanoseconds_t EGLTimeKHR;
@@ -555,6 +564,11 @@ EGLAPI EGLBoolean EGLAPIENTRY eglQuerySurfacePointerANGLE (EGLDisplay dpy, EGLSu
 #define EGL_DISCARD_SAMPLES_ARM           0x3286
 #endif /* EGL_ARM_pixmap_multisample_discard */

+#ifndef EGL_EXT_bind_to_front
+#define EGL_EXT_bind_to_front 1
+#define EGL_FRONT_BUFFER_EXT              0x3464
+#endif /* EGL_EXT_bind_to_front */
+
 #ifndef EGL_EXT_buffer_age
 #define EGL_EXT_buffer_age 1
 #define EGL_BUFFER_AGE_EXT                0x313D
@@ -564,6 +578,30 @@ EGLAPI EGLBoolean EGLAPIENTRY eglQuerySurfacePointerANGLE (EGLDisplay dpy, EGLSu
 #define EGL_EXT_client_extensions 1
 #endif /* EGL_EXT_client_extensions */

+#ifndef EGL_EXT_compositor
+#define EGL_EXT_compositor 1
+#define EGL_PRIMARY_COMPOSITOR_CONTEXT_EXT 0x3460
+#define EGL_EXTERNAL_REF_ID_EXT           0x3461
+#define EGL_COMPOSITOR_DROP_NEWEST_FRAME_EXT 0x3462
+#define EGL_COMPOSITOR_KEEP_NEWEST_FRAME_EXT 0x3463
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLCOMPOSITORSETCONTEXTLISTEXTPROC) (const EGLint *external_ref_ids, EGLint num_entries);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLCOMPOSITORSETCONTEXTATTRIBUTESEXTPROC) (EGLint external_ref_id, const EGLint *context_attributes, EGLint num_entries);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLCOMPOSITORSETWINDOWLISTEXTPROC) (EGLint external_ref_id, const EGLint *external_win_ids, EGLint num_entries);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLCOMPOSITORSETWINDOWATTRIBUTESEXTPROC) (EGLint external_win_id, const EGLint *window_attributes, EGLint num_entries);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLCOMPOSITORBINDTEXWINDOWEXTPROC) (EGLint external_win_id);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLCOMPOSITORSETSIZEEXTPROC) (EGLint external_win_id, EGLint width, EGLint height);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLCOMPOSITORSWAPPOLICYEXTPROC) (EGLint external_win_id, EGLint policy);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglCompositorSetContextListEXT (const EGLint *external_ref_ids, EGLint num_entries);
+EGLAPI EGLBoolean EGLAPIENTRY eglCompositorSetContextAttributesEXT (EGLint external_ref_id, const EGLint *context_attributes, EGLint num_entries);
+EGLAPI EGLBoolean EGLAPIENTRY eglCompositorSetWindowListEXT (EGLint external_ref_id, const EGLint *external_win_ids, EGLint num_entries);
+EGLAPI EGLBoolean EGLAPIENTRY eglCompositorSetWindowAttributesEXT (EGLint external_win_id, const EGLint *window_attributes, EGLint num_entries);
+EGLAPI EGLBoolean EGLAPIENTRY eglCompositorBindTexWindowEXT (EGLint external_win_id);
+EGLAPI EGLBoolean EGLAPIENTRY eglCompositorSetSizeEXT (EGLint external_win_id, EGLint width, EGLint height);
+EGLAPI EGLBoolean EGLAPIENTRY eglCompositorSwapPolicyEXT (EGLint external_win_id, EGLint policy);
+#endif
+#endif /* EGL_EXT_compositor */
+
 #ifndef EGL_EXT_create_context_robustness
 #define EGL_EXT_create_context_robustness 1
 #define EGL_CONTEXT_OPENGL_ROBUST_ACCESS_EXT 0x30BF
@@ -618,6 +656,21 @@ EGLAPI EGLBoolean EGLAPIENTRY eglQueryDisplayAttribEXT (EGLDisplay dpy, EGLint a
 #define EGL_GL_COLORSPACE_BT2020_PQ_EXT   0x3340
 #endif /* EGL_EXT_gl_colorspace_bt2020_pq */

+#ifndef EGL_EXT_gl_colorspace_display_p3
+#define EGL_EXT_gl_colorspace_display_p3 1
+#define EGL_GL_COLORSPACE_DISPLAY_P3_EXT  0x3363
+#endif /* EGL_EXT_gl_colorspace_display_p3 */
+
+#ifndef EGL_EXT_gl_colorspace_display_p3_linear
+#define EGL_EXT_gl_colorspace_display_p3_linear 1
+#define EGL_GL_COLORSPACE_DISPLAY_P3_LINEAR_EXT 0x3362
+#endif /* EGL_EXT_gl_colorspace_display_p3_linear */
+
+#ifndef EGL_EXT_gl_colorspace_scrgb
+#define EGL_EXT_gl_colorspace_scrgb 1
+#define EGL_GL_COLORSPACE_SCRGB_EXT       0x3351
+#endif /* EGL_EXT_gl_colorspace_scrgb */
+
 #ifndef EGL_EXT_gl_colorspace_scrgb_linear
 #define EGL_EXT_gl_colorspace_scrgb_linear 1
 #define EGL_GL_COLORSPACE_SCRGB_LINEAR_EXT 0x3350
@@ -670,6 +723,13 @@ EGLAPI EGLBoolean EGLAPIENTRY eglQueryDmaBufModifiersEXT (EGLDisplay dpy, EGLint
 #endif
 #endif /* EGL_EXT_image_dma_buf_import_modifiers */

+#ifndef EGL_EXT_image_implicit_sync_control
+#define EGL_EXT_image_implicit_sync_control 1
+#define EGL_IMPORT_SYNC_TYPE_EXT          0x3470
+#define EGL_IMPORT_IMPLICIT_SYNC_EXT      0x3471
+#define EGL_IMPORT_EXPLICIT_SYNC_EXT      0x3472
+#endif /* EGL_EXT_image_implicit_sync_control */
+
 #ifndef EGL_EXT_multiview_window
 #define EGL_EXT_multiview_window 1
 #define EGL_MULTIVIEW_VIEW_COUNT_EXT      0x3134
@@ -769,6 +829,12 @@ EGLAPI EGLBoolean EGLAPIENTRY eglStreamConsumerOutputEXT (EGLDisplay dpy, EGLStr
 #endif
 #endif /* EGL_EXT_stream_consumer_egloutput */

+#ifndef EGL_EXT_surface_CTA861_3_metadata
+#define EGL_EXT_surface_CTA861_3_metadata 1
+#define EGL_CTA861_3_MAX_CONTENT_LIGHT_LEVEL_EXT 0x3360
+#define EGL_CTA861_3_MAX_FRAME_AVERAGE_LEVEL_EXT 0x3361
+#endif /* EGL_EXT_surface_CTA861_3_metadata */
+
 #ifndef EGL_EXT_surface_SMPTE2086_metadata
 #define EGL_EXT_surface_SMPTE2086_metadata 1
 #define EGL_SMPTE2086_DISPLAY_PRIMARY_RX_EXT 0x3341
@@ -781,6 +847,7 @@ EGLAPI EGLBoolean EGLAPIENTRY eglStreamConsumerOutputEXT (EGLDisplay dpy, EGLStr
 #define EGL_SMPTE2086_WHITE_POINT_Y_EXT   0x3348
 #define EGL_SMPTE2086_MAX_LUMINANCE_EXT   0x3349
 #define EGL_SMPTE2086_MIN_LUMINANCE_EXT   0x334A
+#define EGL_METADATA_SCALING_EXT          50000
 #endif /* EGL_EXT_surface_SMPTE2086_metadata */

 #ifndef EGL_EXT_swap_buffers_with_damage
--- a/include/EGL/eglmesaext.h
+++ b/include/EGL/eglmesaext.h
@@ -70,6 +70,7 @@ typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYWAYLANDBUFFERWL) (EGLDisplay dpy, st
 #ifndef EGL_WL_create_wayland_buffer_from_image
 #define EGL_WL_create_wayland_buffer_from_image 1

+struct wl_buffer;
 #ifdef EGL_EGLEXT_PROTOTYPES
 EGLAPI struct wl_buffer * EGLAPIENTRY eglCreateWaylandBufferFromImageWL(EGLDisplay dpy, EGLImageKHR image);
 #endif
--- a/include/GLES/gl.h
+++ b/include/GLES/gl.h
@@ -50,9 +50,22 @@ extern "C" {

 #ifndef GL_VERSION_ES_CM_1_0
 #define GL_VERSION_ES_CM_1_0 1
+
+/*
+ * XXX: Temporary fix; needs to be reverted as part of the next
+ * header update.
+ * For more details:
+ * https://github.com/KhronosGroup/OpenGL-Registry/pull/76
+ * https://lists.freedesktop.org/archives/mesa-dev/2017-June/161647.html
+ */
+#include <KHR/khrplatform.h>
+typedef khronos_int8_t GLbyte;
+typedef khronos_float_t GLclampf;
+typedef short GLshort;
+typedef unsigned short GLushort;
+
 typedef void GLvoid;
 typedef unsigned int GLenum;
-#include <KHR/khrplatform.h>
 typedef khronos_float_t GLfloat;
 typedef khronos_int32_t GLfixed;
 typedef unsigned int GLuint;
--- a/include/GLES/glext.h
+++ b/include/GLES/glext.h
@@ -104,7 +104,6 @@ GL_API void GL_APIENTRY glBlendEquationOES (GLenum mode);

 #ifndef GL_OES_byte_coordinates
 #define GL_OES_byte_coordinates 1
-typedef khronos_int8_t GLbyte;
 #endif /* GL_OES_byte_coordinates */

 #ifndef GL_OES_compressed_ETC1_RGB8_sub_texture
@@ -128,7 +127,6 @@ typedef khronos_int8_t GLbyte;

 #ifndef GL_OES_draw_texture
 #define GL_OES_draw_texture 1
-typedef short GLshort;
 #define GL_TEXTURE_CROP_RECT_OES          0x8B9D
 typedef void (GL_APIENTRYP PFNGLDRAWTEXSOESPROC) (GLshort x, GLshort y, GLshort z, GLshort width, GLshort height);
 typedef void (GL_APIENTRYP PFNGLDRAWTEXIOESPROC) (GLint x, GLint y, GLint z, GLint width, GLint height);
@@ -409,7 +407,6 @@ GL_API GLbitfield GL_APIENTRY glQueryMatrixxOES (GLfixed *mantissa, GLint *expon

 #ifndef GL_OES_single_precision
 #define GL_OES_single_precision 1
-typedef khronos_float_t GLclampf;
 typedef void (GL_APIENTRYP PFNGLCLEARDEPTHFOESPROC) (GLclampf depth);
 typedef void (GL_APIENTRYP PFNGLCLIPPLANEFOESPROC) (GLenum plane, const GLfloat *equation);
 typedef void (GL_APIENTRYP PFNGLDEPTHRANGEFOESPROC) (GLclampf n, GLclampf f);
--- a/include/KHR/khrplatform.h
+++ b/include/KHR/khrplatform.h
@@ -26,7 +26,7 @@

 /* Khronos platform-specific types and definitions.
 *
- * $Revision: 23298 $ on $Date: 2013-09-30 17:07:13 -0700 (Mon, 30 Sep 2013) $
+ * $Revision: 32517 $ on $Date: 2016-03-11 02:41:19 -0800 (Fri, 11 Mar 2016) $
 *
 * Adopters may modify this file to suit their platform. Adopters are
 * encouraged to submit platform specific modifications to the Khronos
@@ -98,11 +98,7 @@
 * This precedes the return type of the function in the function prototype.
 */
 #if defined(_WIN32) && !defined(__SCITECH_SNAP__)
-#   if defined(KHRONOS_DLL_EXPORTS)
-#      define KHRONOS_APICALL __declspec(dllexport)
-#   else
-#      define KHRONOS_APICALL __declspec(dllimport)
-#   endif
+#   define KHRONOS_APICALL __declspec(dllimport)
 #elif defined (__SYMBIAN32__)
 #   define KHRONOS_APICALL IMPORT_C
 #elif (defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 303) \
@@ -231,7 +227,7 @@ typedef signed   short int     khronos_int16_t;
 typedef unsigned short int     khronos_uint16_t;

 /*
- * Types that differ between LLP64 and LP64 architectures - in LLP64, 
+ * Types that differ between LLP64 and LP64 architectures - in LLP64,
 * pointers are 64 bits, but 'long' is still 32 bits. Win64 appears
 * to be the only LLP64 architecture in current use.
 */
--- a/scons/gallium.py
+++ b/scons/gallium.py
@@ -157,6 +157,19 @@ def check_header(env, header):
    env = conf.Finish()
    return have_header

+def check_functions(env, functions):
+    '''Check if all of the functions exist'''
+
+    conf = SCons.Script.Configure(env)
+    have_functions = True
+
+    for function in functions:
+        if not conf.CheckFunc(function):
+            have_functions = False
+
+    env = conf.Finish()
+    return have_functions
+
 def check_prog(env, prog):
    """Check whether this program exists."""

@@ -244,16 +257,16 @@ def generate(env):
    # Backwards compatability with the debug= profile= options
    if env['build'] == 'debug':
        if not env['debug']:
-            print 'scons: warning: debug option is deprecated and will be removed eventually; use instead'
-            print
-            print ' scons build=release'
-            print
+            print('scons: warning: debug option is deprecated and will be removed eventually; use instead')
+            print('')
+            print(' scons build=release')
+            print('')
            env['build'] = 'release'
        if env['profile']:
-            print 'scons: warning: profile option is deprecated and will be removed eventually; use instead'
-            print
-            print ' scons build=profile'
-            print
+            print('scons: warning: profile option is deprecated and will be removed eventually; use instead')
+            print('')
+            print(' scons build=profile')
+            print('')
            env['build'] = 'profile'
    if False:
        # Enforce SConscripts to use the new build variable
@@ -287,7 +300,7 @@ def generate(env):
    env['build_dir'] = build_dir
    env.SConsignFile(os.path.join(build_dir, '.sconsign'))
    if 'SCONS_CACHE_DIR' in os.environ:
-        print 'scons: Using build cache in %s.' % (os.environ['SCONS_CACHE_DIR'],)
+        print('scons: Using build cache in %s.' % (os.environ['SCONS_CACHE_DIR'],))
        env.CacheDir(os.environ['SCONS_CACHE_DIR'])
    env['CONFIGUREDIR'] = os.path.join(build_dir, 'conf')
    env['CONFIGURELOG'] = os.path.join(os.path.abspath(build_dir), 'config.log')
@@ -339,6 +352,9 @@ def generate(env):
        if check_header(env, 'xlocale.h'):
            cppdefines += ['HAVE_XLOCALE_H']

+        if check_functions(env, ['strtod_l', 'strtof_l']):
+            cppdefines += ['HAVE_STRTOD_L']
+
    if platform == 'windows':
        cppdefines += [
            'WIN32',
@@ -369,8 +385,8 @@ def generate(env):
    if env['embedded']:
        cppdefines += ['PIPE_SUBSYSTEM_EMBEDDED']
    if env['texture_float']:
-        print 'warning: Floating-point textures enabled.'
-        print 'warning: Please consult docs/patents.txt with your lawyer before building Mesa.'
+        print('warning: Floating-point textures enabled.')
+        print('warning: Please consult docs/patents.txt with your lawyer before building Mesa.')
        cppdefines += ['TEXTURE_FLOAT_ENABLED']
    env.Append(CPPDEFINES = cppdefines)

--- a/scons/llvm.py
+++ b/scons/llvm.py
@@ -68,13 +68,13 @@ def generate(env):
    if env['platform'] == 'windows':
        # XXX: There is no llvm-config on Windows, so assume a standard layout
        if llvm_dir is None:
-            print 'scons: LLVM environment variable must be specified when building for windows'
+            print('scons: LLVM environment variable must be specified when building for windows')
            return

        # Try to determine the LLVM version from llvm/Config/config.h
        llvm_config = os.path.join(llvm_dir, 'include/llvm/Config/llvm-config.h')
        if not os.path.exists(llvm_config):
-            print 'scons: could not find %s' % llvm_config
+            print('scons: could not find %s' % llvm_config)
            return
        llvm_version_major_re = re.compile(r'^#define LLVM_VERSION_MAJOR ([0-9]+)')
        llvm_version_minor_re = re.compile(r'^#define LLVM_VERSION_MINOR ([0-9]+)')
@@ -92,10 +92,10 @@ def generate(env):
            llvm_version = distutils.version.LooseVersion('%s.%s' % (llvm_version_major, llvm_version_minor))

        if llvm_version is None:
-            print 'scons: could not determine the LLVM version from %s' % llvm_config
+            print('scons: could not determine the LLVM version from %s' % llvm_config)
            return
        if llvm_version < distutils.version.LooseVersion(required_llvm_version):
-            print 'scons: LLVM version %s found, but %s is required' % (llvm_version, required_llvm_version)
+            print('scons: LLVM version %s found, but %s is required' % (llvm_version, required_llvm_version))
            return

        env.Prepend(CPPPATH = [os.path.join(llvm_dir, 'include')])
@@ -104,7 +104,26 @@ def generate(env):
        ])
        env.Prepend(LIBPATH = [os.path.join(llvm_dir, 'lib')])
        # LIBS should match the output of `llvm-config --libs engine mcjit bitwriter x86asmprinter irreader`
-        if llvm_version >= distutils.version.LooseVersion('4.0'):
+        if llvm_version >= distutils.version.LooseVersion('5.0'):
+            env.Prepend(LIBS = [
+                'LLVMX86Disassembler', 'LLVMX86AsmParser',
+                'LLVMX86CodeGen', 'LLVMSelectionDAG', 'LLVMAsmPrinter',
+                'LLVMDebugInfoCodeView', 'LLVMCodeGen',
+                'LLVMScalarOpts', 'LLVMInstCombine',
+                'LLVMTransformUtils',
+                'LLVMBitWriter', 'LLVMX86Desc',
+                'LLVMMCDisassembler', 'LLVMX86Info',
+                'LLVMX86AsmPrinter', 'LLVMX86Utils',
+                'LLVMMCJIT', 'LLVMExecutionEngine', 'LLVMTarget',
+                'LLVMAnalysis', 'LLVMProfileData',
+                'LLVMRuntimeDyld', 'LLVMObject', 'LLVMMCParser',
+                'LLVMBitReader', 'LLVMMC', 'LLVMCore',
+                'LLVMSupport',
+                'LLVMIRReader', 'LLVMAsmParser',
+                'LLVMDemangle', 'LLVMGlobalISel', 'LLVMDebugInfoMSF',
+                'LLVMBinaryFormat',
+            ])
+        elif llvm_version >= distutils.version.LooseVersion('4.0'):
            env.Prepend(LIBS = [
                'LLVMX86Disassembler', 'LLVMX86AsmParser',
                'LLVMX86CodeGen', 'LLVMSelectionDAG', 'LLVMAsmPrinter',
@@ -212,14 +231,14 @@ def generate(env):
    else:
        llvm_config = os.environ.get('LLVM_CONFIG', 'llvm-config')
        if not env.Detect(llvm_config):
-            print 'scons: %s script not found' % llvm_config
+            print('scons: %s script not found' % llvm_config)
            return

        llvm_version = env.backtick('%s --version' % llvm_config).rstrip()
        llvm_version = distutils.version.LooseVersion(llvm_version)

        if llvm_version < distutils.version.LooseVersion(required_llvm_version):
-            print 'scons: LLVM version %s found, but %s is required' % (llvm_version, required_llvm_version)
+            print('scons: LLVM version %s found, but %s is required' % (llvm_version, required_llvm_version))
            return

        try:
@@ -245,13 +264,13 @@ def generate(env):
                env.ParseConfig('%s --system-libs' % llvm_config)
                env.Append(CXXFLAGS = ['-std=c++11'])
        except OSError:
-            print 'scons: llvm-config version %s failed' % llvm_version
+            print('scons: llvm-config version %s failed' % llvm_version)
            return

    assert llvm_version is not None
    env['llvm'] = True

-    print 'scons: Found LLVM version %s' % llvm_version
+    print('scons: Found LLVM version %s' % llvm_version)
    env['LLVM_VERSION'] = llvm_version

    # Define HAVE_LLVM macro with the major/minor version number (e.g., 0x0206 for 2.6)
--- a/src/SConscript
+++ b/src/SConscript
@@ -28,7 +28,7 @@ def write_git_sha1_h_file(filename):
        try:
            subprocess.Popen(args, stdout=f).wait()
        except:
-            print "Warning: exception in write_git_sha1_h_file()"
+            print("Warning: exception in write_git_sha1_h_file()")
            return

    if not os.path.exists(filename) or not filecmp.cmp(tempfile, filename):
--- a/src/amd/Android.common.mk
+++ b/src/amd/Android.common.mk
@@ -54,9 +54,7 @@ LOCAL_C_INCLUDES := \
 	$(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_nir,,)/nir \
 	$(MESA_TOP)/src/gallium/include \
 	$(MESA_TOP)/src/gallium/auxiliary \
-	$(intermediates)/common \
-	external/llvm/include \
-	external/llvm/device/include
+	$(intermediates)/common

 LOCAL_EXPORT_C_INCLUDE_DIRS := \
 	$(LOCAL_PATH)/common
--- a/src/amd/addrlib/core/addrobject.cpp
+++ b/src/amd/addrlib/core/addrobject.cpp
@@ -216,20 +216,16 @@ VOID Object::DebugPrint(
 #if DEBUG
    if (m_client.callbacks.debugPrint != NULL)
    {
-        va_list ap;
-
-        va_start(ap, pDebugString);
-
        ADDR_DEBUGPRINT_INPUT debugPrintInput = {0};

        debugPrintInput.size         = sizeof(ADDR_DEBUGPRINT_INPUT);
        debugPrintInput.pDebugString = const_cast<CHAR*>(pDebugString);
        debugPrintInput.hClient      = m_client.handle;
-        va_copy(debugPrintInput.ap, ap);
+        va_start(debugPrintInput.ap, pDebugString);

        m_client.callbacks.debugPrint(&debugPrintInput);

-        va_end(ap);
+        va_end(debugPrintInput.ap);
    }
 #endif
 }
--- a/src/amd/common/ac_binary.c
+++ b/src/amd/common/ac_binary.c
@@ -109,7 +109,7 @@ static void parse_relocs(Elf *elf, Elf_Data *relocs, Elf_Data *symbols,
 	}
 }

-void ac_elf_read(const char *elf_data, unsigned elf_size,
+bool ac_elf_read(const char *elf_data, unsigned elf_size,
 		 struct ac_shader_binary *binary)
 {
 	char *elf_buffer;
@@ -118,6 +118,7 @@ void ac_elf_read(const char *elf_data, unsigned elf_size,
 	Elf_Data *symbols = NULL, *relocs = NULL;
 	size_t section_str_index;
 	unsigned symbol_sh_link = 0;
+	bool success = true;

 	/* One of the libelf implementations
 	 * (http://www.mr511.de/software/english.htm) requires calling
@@ -137,7 +138,8 @@ void ac_elf_read(const char *elf_data, unsigned elf_size,
 		GElf_Shdr section_header;
 		if (gelf_getshdr(section, &section_header) != &section_header) {
 			fprintf(stderr, "Failed to read ELF section header\n");
-			return;
+			success = false;
+			break;
 		}
 		name = elf_strptr(elf, section_str_index, section_header.sh_name);
 		if (!strcmp(name, ".text")) {
@@ -148,6 +150,11 @@ void ac_elf_read(const char *elf_data, unsigned elf_size,
 		} else if (!strcmp(name, ".AMDGPU.config")) {
 			section_data = elf_getdata(section, section_data);
 			binary->config_size = section_data->d_size;
+			if (!binary->config_size) {
+				fprintf(stderr, ".AMDGPU.config is empty!\n");
+				success = false;
+				break;
+			}
 			binary->config = MALLOC(binary->config_size * sizeof(unsigned char));
 			memcpy(binary->config, section_data->d_buf, binary->config_size);
 		} else if (!strcmp(name, ".AMDGPU.disasm")) {
@@ -186,6 +193,7 @@ void ac_elf_read(const char *elf_data, unsigned elf_size,
 		binary->global_symbol_count = 1;
 		binary->config_size_per_symbol = binary->config_size;
 	}
+	return success;
 }

 const unsigned char *ac_shader_binary_config_start(
--- a/src/amd/common/ac_binary.h
+++ b/src/amd/common/ac_binary.h
@@ -83,7 +83,7 @@ struct ac_shader_config {
 * Parse the elf binary stored in \p elf_data and create a
 * ac_shader_binary object.
 */
-void ac_elf_read(const char *elf_data, unsigned elf_size,
+bool ac_elf_read(const char *elf_data, unsigned elf_size,
 		 struct ac_shader_binary *binary);

 /**
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -45,10 +45,13 @@
 * The caller is responsible for initializing ctx::module and ctx::builder.
 */
 void
-ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context)
+ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context,
+		     enum chip_class chip_class)
 {
 	LLVMValueRef args[1];

+	ctx->chip_class = chip_class;
+
 	ctx->context = context;
 	ctx->module = NULL;
 	ctx->builder = NULL;
@@ -176,6 +179,20 @@ void ac_build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned bufsize)
 	}
 }

+/**
+ * Helper function that builds an LLVM IR PHI node and immediately adds
+ * incoming edges.
+ */
+LLVMValueRef
+ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type,
+	     unsigned count_incoming, LLVMValueRef *values,
+	     LLVMBasicBlockRef *blocks)
+{
+	LLVMValueRef phi = LLVMBuildPhi(ctx->builder, type, "");
+	LLVMAddIncoming(phi, values, blocks, count_incoming);
+	return phi;
+}
+
 LLVMValueRef
 ac_build_gather_values_extended(struct ac_llvm_context *ctx,
 				LLVMValueRef *values,
@@ -290,15 +307,15 @@ static void build_cube_select(LLVMBuilderRef builder,
 	is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, is_ma_y, ""), "");

 	/* Select sc */
-	tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], coords[0], "");
+	tmp = LLVMBuildSelect(builder, is_ma_x, coords[2], coords[0], "");
 	sgn = LLVMBuildSelect(builder, is_ma_y, LLVMConstReal(f32, 1.0),
-		LLVMBuildSelect(builder, is_ma_x, sgn_ma,
+		LLVMBuildSelect(builder, is_ma_z, sgn_ma,
 			LLVMBuildFNeg(builder, sgn_ma, ""), ""), "");
 	out_st[0] = LLVMBuildFMul(builder, tmp, sgn, "");

 	/* Select tc */
 	tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], "");
-	sgn = LLVMBuildSelect(builder, is_ma_y, LLVMBuildFNeg(builder, sgn_ma, ""),
+	sgn = LLVMBuildSelect(builder, is_ma_y, sgn_ma,
 		LLVMConstReal(f32, -1.0), "");
 	out_st[1] = LLVMBuildFMul(builder, tmp, sgn, "");

@@ -312,7 +329,7 @@ static void build_cube_select(LLVMBuilderRef builder,

 void
 ac_prepare_cube_coords(struct ac_llvm_context *ctx,
-		       bool is_deriv, bool is_array,
+		       bool is_deriv, bool is_array, bool is_lod,
 		       LLVMValueRef *coords_arg,
 		       LLVMValueRef *derivs_arg)
 {
@@ -322,6 +339,38 @@ ac_prepare_cube_coords(struct ac_llvm_context *ctx,
 	LLVMValueRef coords[3];
 	LLVMValueRef invma;

+	if (is_array && !is_lod) {
+		LLVMValueRef tmp = coords_arg[3];
+		tmp = ac_build_intrinsic(ctx, "llvm.rint.f32", ctx->f32, &tmp, 1, 0);
+
+		/* Section 8.9 (Texture Functions) of the GLSL 4.50 spec says:
+		 *
+		 *    "For Array forms, the array layer used will be
+		 *
+		 *       max(0, min(d−1, floor(layer+0.5)))
+		 *
+		 *     where d is the depth of the texture array and layer
+		 *     comes from the component indicated in the tables below.
+		 *     Workaroudn for an issue where the layer is taken from a
+		 *     helper invocation which happens to fall on a different
+		 *     layer due to extrapolation."
+		 *
+		 * VI and earlier attempt to implement this in hardware by
+		 * clamping the value of coords[2] = (8 * layer) + face.
+		 * Unfortunately, this means that the we end up with the wrong
+		 * face when clamping occurs.
+		 *
+		 * Clamp the layer earlier to work around the issue.
+		 */
+		if (ctx->chip_class <= VI) {
+			LLVMValueRef ge0;
+			ge0 = LLVMBuildFCmp(builder, LLVMRealOGE, tmp, ctx->f32_0, "");
+			tmp = LLVMBuildSelect(builder, ge0, tmp, ctx->f32_0, "");
+		}
+
+		coords_arg[3] = tmp;
+	}
+
 	build_cube_intrinsic(ctx, coords_arg, &selcoords);

 	invma = ac_build_intrinsic(ctx, "llvm.fabs.f32",
@@ -795,21 +844,21 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
 	      bool has_ds_bpermute,
 	      uint32_t mask,
 	      int idx,
-	      LLVMValueRef lds,
 	      LLVMValueRef val)
 {
-	LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, args[2];
+	LLVMValueRef tl, trbl, args[2];
 	LLVMValueRef result;

-	thread_id = ac_get_thread_id(ctx);
-
-	tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
-			      LLVMConstInt(ctx->i32, mask, false), "");
-
-	trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
-				LLVMConstInt(ctx->i32, idx, false), "");
-
 	if (has_ds_bpermute) {
+		LLVMValueRef thread_id, tl_tid, trbl_tid;
+		thread_id = ac_get_thread_id(ctx);
+
+		tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
+				      LLVMConstInt(ctx->i32, mask, false), "");
+
+		trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
+					LLVMConstInt(ctx->i32, idx, false), "");
+
 		args[0] = LLVMBuildMul(ctx->builder, tl_tid,
 				       LLVMConstInt(ctx->i32, 4, false), "");
 		args[1] = val;
@@ -827,15 +876,42 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
 					  AC_FUNC_ATTR_READNONE |
 					  AC_FUNC_ATTR_CONVERGENT);
 	} else {
-		LLVMValueRef store_ptr, load_ptr0, load_ptr1;
+		uint32_t masks[2];

-		store_ptr = ac_build_gep0(ctx, lds, thread_id);
-		load_ptr0 = ac_build_gep0(ctx, lds, tl_tid);
-		load_ptr1 = ac_build_gep0(ctx, lds, trbl_tid);
+		switch (mask) {
+		case AC_TID_MASK_TOP_LEFT:
+			masks[0] = 0x8000;
+			if (idx == 1)
+				masks[1] = 0x8055;
+			else
+				masks[1] = 0x80aa;

-		LLVMBuildStore(ctx->builder, val, store_ptr);
-		tl = LLVMBuildLoad(ctx->builder, load_ptr0, "");
-		trbl = LLVMBuildLoad(ctx->builder, load_ptr1, "");
+			break;
+		case AC_TID_MASK_TOP:
+			masks[0] = 0x8044;
+			masks[1] = 0x80ee;
+			break;
+		case AC_TID_MASK_LEFT:
+			masks[0] = 0x80a0;
+			masks[1] = 0x80f5;
+			break;
+		}
+
+		args[0] = val;
+		args[1] = LLVMConstInt(ctx->i32, masks[0], false);
+
+		tl = ac_build_intrinsic(ctx,
+					"llvm.amdgcn.ds.swizzle", ctx->i32,
+					args, 2,
+					AC_FUNC_ATTR_READNONE |
+					AC_FUNC_ATTR_CONVERGENT);
+
+		args[1] = LLVMConstInt(ctx->i32, masks[1], false);
+		trbl = ac_build_intrinsic(ctx,
+					"llvm.amdgcn.ds.swizzle", ctx->i32,
+					args, 2,
+					AC_FUNC_ATTR_READNONE |
+					AC_FUNC_ATTR_CONVERGENT);
 	}

 	tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -28,6 +28,8 @@
 #include <stdbool.h>
 #include <llvm-c/TargetMachine.h>

+#include "amd_family.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -61,10 +63,13 @@ struct ac_llvm_context {
 	unsigned fpmath_md_kind;
 	LLVMValueRef fpmath_md_2p5_ulp;
 	LLVMValueRef empty_md;
+
+	enum chip_class chip_class;
 };

 void
-ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context);
+ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context,
+		     enum chip_class chip_class);

 LLVMValueRef
 ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
@@ -73,6 +78,11 @@ ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,

 void ac_build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned bufsize);

+LLVMValueRef
+ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type,
+	     unsigned count_incoming, LLVMValueRef *values,
+	     LLVMBasicBlockRef *blocks);
+
 LLVMValueRef
 ac_build_gather_values_extended(struct ac_llvm_context *ctx,
 				LLVMValueRef *values,
@@ -91,7 +101,7 @@ ac_build_fdiv(struct ac_llvm_context *ctx,

 void
 ac_prepare_cube_coords(struct ac_llvm_context *ctx,
-		       bool is_deriv, bool is_array,
+		       bool is_deriv, bool is_array, bool is_lod,
 		       LLVMValueRef *coords_arg,
 		       LLVMValueRef *derivs_arg);

@@ -173,7 +183,6 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
 	      bool has_ds_bpermute,
 	      uint32_t mask,
 	      int idx,
-	      LLVMValueRef lds,
 	      LLVMValueRef val);

 #define AC_SENDMSG_GS 2
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1178,7 +1178,17 @@ static LLVMValueRef emit_find_lsb(struct ac_llvm_context *ctx,
 		 */
 		LLVMConstInt(ctx->i1, 1, false),
 	};
-	return ac_build_intrinsic(ctx, "llvm.cttz.i32", ctx->i32, params, 2, AC_FUNC_ATTR_READNONE);
+
+	LLVMValueRef lsb = ac_build_intrinsic(ctx, "llvm.cttz.i32", ctx->i32,
+					      params, 2,
+					      AC_FUNC_ATTR_READNONE);
+
+	/* TODO: We need an intrinsic to skip this conditional. */
+	/* Check for zero: */
+	return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder,
+							   LLVMIntEQ, src0,
+							   ctx->i32_0, ""),
+			       LLVMConstInt(ctx->i32, -1, 0), lsb, "");
 }

 static LLVMValueRef emit_ifind_msb(struct ac_llvm_context *ctx,
@@ -1305,7 +1315,6 @@ static LLVMValueRef emit_f2f16(struct nir_to_llvm_context *ctx,
 	src0 = to_float(&ctx->ac, src0);
 	result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, "");

-	/* TODO SI/CIK options here */
 	if (ctx->options->chip_class >= VI) {
 		LLVMValueRef args[2];
 		/* Check if the result is a denormal - and flush to 0 if so. */
@@ -1319,7 +1328,22 @@ static LLVMValueRef emit_f2f16(struct nir_to_llvm_context *ctx,

 	if (ctx->options->chip_class >= VI)
 		result = LLVMBuildSelect(ctx->builder, cond, ctx->f32zero, result, "");
-
+	else {
+		/* for SI/CIK */
+		/* 0x38800000 is smallest half float value (2^-14) in 32-bit float,
+		 * so compare the result and flush to 0 if it's smaller.
+		 */
+		LLVMValueRef temp, cond2;
+		temp = emit_intrin_1f_param(&ctx->ac, "llvm.fabs",
+					    ctx->f32, result);
+		cond = LLVMBuildFCmp(ctx->builder, LLVMRealUGT,
+				     LLVMBuildBitCast(ctx->builder, LLVMConstInt(ctx->i32, 0x38800000, false), ctx->f32, ""),
+				     temp, "");
+		cond2 = LLVMBuildFCmp(ctx->builder, LLVMRealUNE,
+				      temp, ctx->f32zero, "");
+		cond = LLVMBuildAnd(ctx->builder, cond, cond2, "");
+		result = LLVMBuildSelect(ctx->builder, cond, ctx->f32zero, result, "");
+	}
 	return result;
 }

@@ -1443,11 +1467,6 @@ static LLVMValueRef emit_ddxy(struct nir_to_llvm_context *ctx,
 	int idx;
 	LLVMValueRef result;

-	if (!ctx->lds && !ctx->has_ds_bpermute)
-		ctx->lds = LLVMAddGlobalInAddressSpace(ctx->module,
-						       LLVMArrayType(ctx->i32, 64),
-						       "ddxy_lds", LOCAL_ADDR_SPACE);
-
 	if (op == nir_op_fddx_fine || op == nir_op_fddx)
 		mask = AC_TID_MASK_LEFT;
 	else if (op == nir_op_fddy_fine || op == nir_op_fddy)
@@ -1464,7 +1483,7 @@ static LLVMValueRef emit_ddxy(struct nir_to_llvm_context *ctx,
 		idx = 2;

 	result = ac_build_ddxy(&ctx->ac, ctx->has_ds_bpermute,
-			      mask, idx, ctx->lds,
+			      mask, idx,
 			      src0);
 	return result;
 }
@@ -1742,7 +1761,7 @@ static void visit_alu(struct nir_to_llvm_context *ctx, const nir_alu_instr *inst
 						      result);
 		break;
 	case nir_op_ffma:
-		result = emit_intrin_3f_param(&ctx->ac, "llvm.fma",
+		result = emit_intrin_3f_param(&ctx->ac, "llvm.fmuladd",
 		                              to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]);
 		break;
 	case nir_op_ibitfield_extract:
@@ -1779,10 +1798,12 @@ static void visit_alu(struct nir_to_llvm_context *ctx, const nir_alu_instr *inst
 		break;
 	case nir_op_i2f32:
 	case nir_op_i2f64:
+		src[0] = to_integer(&ctx->ac, src[0]);
 		result = LLVMBuildSIToFP(ctx->builder, src[0], to_float_type(&ctx->ac, def_type), "");
 		break;
 	case nir_op_u2f32:
 	case nir_op_u2f64:
+		src[0] = to_integer(&ctx->ac, src[0]);
 		result = LLVMBuildUIToFP(ctx->builder, src[0], to_float_type(&ctx->ac, def_type), "");
 		break;
 	case nir_op_f2f64:
@@ -1793,6 +1814,7 @@ static void visit_alu(struct nir_to_llvm_context *ctx, const nir_alu_instr *inst
 		break;
 	case nir_op_u2u32:
 	case nir_op_u2u64:
+		src[0] = to_integer(&ctx->ac, src[0]);
 		if (get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < get_elem_bits(&ctx->ac, def_type))
 			result = LLVMBuildZExt(ctx->builder, src[0], def_type, "");
 		else
@@ -1800,6 +1822,7 @@ static void visit_alu(struct nir_to_llvm_context *ctx, const nir_alu_instr *inst
 		break;
 	case nir_op_i2i32:
 	case nir_op_i2i64:
+		src[0] = to_integer(&ctx->ac, src[0]);
 		if (get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < get_elem_bits(&ctx->ac, def_type))
 			result = LLVMBuildSExt(ctx->builder, src[0], def_type, "");
 		else
@@ -1809,18 +1832,25 @@ static void visit_alu(struct nir_to_llvm_context *ctx, const nir_alu_instr *inst
 		result = emit_bcsel(&ctx->ac, src[0], src[1], src[2]);
 		break;
 	case nir_op_find_lsb:
+		src[0] = to_integer(&ctx->ac, src[0]);
 		result = emit_find_lsb(&ctx->ac, src[0]);
 		break;
 	case nir_op_ufind_msb:
+		src[0] = to_integer(&ctx->ac, src[0]);
 		result = emit_ufind_msb(&ctx->ac, src[0]);
 		break;
 	case nir_op_ifind_msb:
+		src[0] = to_integer(&ctx->ac, src[0]);
 		result = emit_ifind_msb(&ctx->ac, src[0]);
 		break;
 	case nir_op_uadd_carry:
+		src[0] = to_integer(&ctx->ac, src[0]);
+		src[1] = to_integer(&ctx->ac, src[1]);
 		result = emit_uint_carry(&ctx->ac, "llvm.uadd.with.overflow.i32", src[0], src[1]);
 		break;
 	case nir_op_usub_borrow:
+		src[0] = to_integer(&ctx->ac, src[0]);
+		src[1] = to_integer(&ctx->ac, src[1]);
 		result = emit_uint_carry(&ctx->ac, "llvm.usub.with.overflow.i32", src[0], src[1]);
 		break;
 	case nir_op_b2f:
@@ -1833,15 +1863,20 @@ static void visit_alu(struct nir_to_llvm_context *ctx, const nir_alu_instr *inst
 		result = emit_b2i(&ctx->ac, src[0]);
 		break;
 	case nir_op_i2b:
+		src[0] = to_integer(&ctx->ac, src[0]);
 		result = emit_i2b(&ctx->ac, src[0]);
 		break;
 	case nir_op_fquantize2f16:
 		result = emit_f2f16(ctx, src[0]);
 		break;
 	case nir_op_umul_high:
+		src[0] = to_integer(&ctx->ac, src[0]);
+		src[1] = to_integer(&ctx->ac, src[1]);
 		result = emit_umul_high(&ctx->ac, src[0], src[1]);
 		break;
 	case nir_op_imul_high:
+		src[0] = to_integer(&ctx->ac, src[0]);
+		src[1] = to_integer(&ctx->ac, src[1]);
 		result = emit_imul_high(&ctx->ac, src[0], src[1]);
 		break;
 	case nir_op_pack_half_2x16:
@@ -2158,7 +2193,7 @@ static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx,
 		break;
 	}

-	if (instr->op == nir_texop_tg4) {
+	if (instr->op == nir_texop_tg4 && ctx->options->chip_class <= VI) {
 		enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type);
 		if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) {
 			return radv_lower_gather4_integer(ctx, args, instr);
@@ -3274,13 +3309,13 @@ static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx,

 	int count;
 	enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
+	bool is_array = glsl_sampler_type_is_array(type);
 	bool add_frag_pos = (dim == GLSL_SAMPLER_DIM_SUBPASS ||
 			     dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
 	bool is_ms = (dim == GLSL_SAMPLER_DIM_MS ||
 		      dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
-
-	count = image_type_to_components_count(dim,
-					       glsl_sampler_type_is_array(type));
+	bool gfx9_1d = ctx->options->chip_class >= GFX9 && dim == GLSL_SAMPLER_DIM_1D;
+	count = image_type_to_components_count(dim, is_array);

 	if (is_ms) {
 		LLVMValueRef fmask_load_address[3];
@@ -3288,7 +3323,7 @@ static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx,

 		fmask_load_address[0] = LLVMBuildExtractElement(ctx->builder, src0, masks[0], "");
 		fmask_load_address[1] = LLVMBuildExtractElement(ctx->builder, src0, masks[1], "");
-		if (glsl_sampler_type_is_array(type))
+		if (is_array)
 			fmask_load_address[2] = LLVMBuildExtractElement(ctx->builder, src0, masks[2], "");
 		else
 			fmask_load_address[2] = NULL;
@@ -3303,7 +3338,7 @@ static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx,
 							       sample_index,
 							       get_sampler_desc(ctx, instr->variables[0], DESC_FMASK));
 	}
-	if (count == 1) {
+	if (count == 1 && !gfx9_1d) {
 		if (instr->src[0].ssa->num_components)
 			res = LLVMBuildExtractElement(ctx->builder, src0, masks[0], "");
 		else
@@ -3313,13 +3348,22 @@ static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx,
 		if (is_ms)
 			count--;
 		for (chan = 0; chan < count; ++chan) {
-			coords[chan] = LLVMBuildExtractElement(ctx->builder, src0, masks[chan], "");
+			coords[chan] = llvm_extract_elem(ctx, src0, chan);
 		}
-
 		if (add_frag_pos) {
 			for (chan = 0; chan < count; ++chan)
 				coords[chan] = LLVMBuildAdd(ctx->builder, coords[chan], LLVMBuildFPToUI(ctx->builder, ctx->frag_pos[chan], ctx->i32, ""), "");
 		}
+
+		if (gfx9_1d) {
+			if (is_array) {
+				coords[2] = coords[1];
+				coords[1] = ctx->ac.i32_0;
+			} else
+				coords[1] = ctx->ac.i32_0;
+			count++;
+		}
+
 		if (is_ms) {
 			coords[count] = sample_index;
 			count++;
@@ -3400,7 +3444,10 @@ static void visit_image_store(struct nir_to_llvm_context *ctx,
 	char intrinsic_name[64];
 	const nir_variable *var = instr->variables[0]->var;
 	const struct glsl_type *type = glsl_without_array(var->type);
-
+	LLVMValueRef glc = ctx->i1false;
+	bool force_glc = ctx->options->chip_class == SI;
+	if (force_glc)
+		glc = ctx->i1true;
 	if (ctx->stage == MESA_SHADER_FRAGMENT)
 		ctx->shader_info->fs.writes_memory = true;

@@ -3410,7 +3457,7 @@ static void visit_image_store(struct nir_to_llvm_context *ctx,
 		params[2] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]),
 						    LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
 		params[3] = LLVMConstInt(ctx->i32, 0, false); /* voffset */
-		params[4] = ctx->i1false;  /* glc */
+		params[4] = glc;  /* glc */
 		params[5] = ctx->i1false;  /* slc */
 		ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", ctx->voidt,
 				   params, 6, 0);
@@ -3418,7 +3465,6 @@ static void visit_image_store(struct nir_to_llvm_context *ctx,
 		bool is_da = glsl_sampler_type_is_array(type) ||
 			     glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
 		LLVMValueRef da = is_da ? ctx->i1true : ctx->i1false;
-		LLVMValueRef glc = ctx->i1false;
 		LLVMValueRef slc = ctx->i1false;

 		params[0] = to_float(&ctx->ac, get_src(ctx, instr->src[2]));
@@ -3453,7 +3499,7 @@ static void visit_image_store(struct nir_to_llvm_context *ctx,
 static LLVMValueRef visit_image_atomic(struct nir_to_llvm_context *ctx,
                                       const nir_intrinsic_instr *instr)
 {
-	LLVMValueRef params[6];
+	LLVMValueRef params[7];
 	int param_count = 0;
 	const nir_variable *var = instr->variables[0]->var;

@@ -3465,15 +3511,17 @@ static LLVMValueRef visit_image_atomic(struct nir_to_llvm_context *ctx,
 	if (ctx->stage == MESA_SHADER_FRAGMENT)
 		ctx->shader_info->fs.writes_memory = true;

+	bool is_unsigned = glsl_get_sampler_result_type(type) == GLSL_TYPE_UINT;
+
 	switch (instr->intrinsic) {
 	case nir_intrinsic_image_atomic_add:
 		atomic_name = "add";
 		break;
 	case nir_intrinsic_image_atomic_min:
-		atomic_name = "smin";
+		atomic_name = is_unsigned ? "umin" : "smin";
 		break;
 	case nir_intrinsic_image_atomic_max:
-		atomic_name = "smax";
+		atomic_name = is_unsigned ? "umax" : "smax";
 		break;
 	case nir_intrinsic_image_atomic_and:
 		atomic_name = "and";
@@ -3554,14 +3602,23 @@ static LLVMValueRef visit_image_size(struct nir_to_llvm_context *ctx,

 	res = ac_build_image_opcode(&ctx->ac, &args);

+	LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
+
 	if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE &&
 	    glsl_sampler_type_is_array(type)) {
-		LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
 		LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false);
 		LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, res, two, "");
 		z = LLVMBuildSDiv(ctx->builder, z, six, "");
 		res = LLVMBuildInsertElement(ctx->builder, res, z, two, "");
 	}
+	if (ctx->options->chip_class >= GFX9 &&
+	    glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_1D &&
+	    glsl_sampler_type_is_array(type)) {
+		LLVMValueRef layers = LLVMBuildExtractElement(ctx->builder, res, two, "");
+		res = LLVMBuildInsertElement(ctx->builder, res, layers,
+						ctx->ac.i32_1, "");
+
+	}
 	return res;
 }

@@ -4418,36 +4475,50 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)

 	/* pack derivatives */
 	if (ddx || ddy) {
+		int num_src_deriv_channels, num_dest_deriv_channels;
 		switch (instr->sampler_dim) {
 		case GLSL_SAMPLER_DIM_3D:
 		case GLSL_SAMPLER_DIM_CUBE:
 			num_deriv_comp = 3;
+			num_src_deriv_channels = 3;
+			num_dest_deriv_channels = 3;
 			break;
 		case GLSL_SAMPLER_DIM_2D:
 		default:
+			num_src_deriv_channels = 2;
+			num_dest_deriv_channels = 2;
 			num_deriv_comp = 2;
 			break;
 		case GLSL_SAMPLER_DIM_1D:
-			num_deriv_comp = 1;
+			num_src_deriv_channels = 1;
+			if (ctx->options->chip_class >= GFX9) {
+				num_dest_deriv_channels = 2;
+				num_deriv_comp = 2;
+			} else {
+				num_dest_deriv_channels = 1;
+				num_deriv_comp = 1;
+			}
 			break;
 		}

-		for (unsigned i = 0; i < num_deriv_comp; i++) {
+		for (unsigned i = 0; i < num_src_deriv_channels; i++) {
 			derivs[i] = to_float(&ctx->ac, llvm_extract_elem(ctx, ddx, i));
-			derivs[num_deriv_comp + i] = to_float(&ctx->ac, llvm_extract_elem(ctx, ddy, i));
+			derivs[num_dest_deriv_channels + i] = to_float(&ctx->ac, llvm_extract_elem(ctx, ddy, i));
+		}
+		for (unsigned i = num_src_deriv_channels; i < num_dest_deriv_channels; i++) {
+			derivs[i] = ctx->ac.f32_0;
+			derivs[num_dest_deriv_channels + i] = ctx->ac.f32_0;
 		}
 	}

 	if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && coord) {
-		if (instr->is_array && instr->op != nir_texop_lod)
-			coords[3] = apply_round_slice(ctx, coords[3]);
 		for (chan = 0; chan < instr->coord_components; chan++)
 			coords[chan] = to_float(&ctx->ac, coords[chan]);
 		if (instr->coord_components == 3)
 			coords[3] = LLVMGetUndef(ctx->f32);
 		ac_prepare_cube_coords(&ctx->ac,
 			instr->op == nir_texop_txd, instr->is_array,
-			coords, derivs);
+			instr->op == nir_texop_lod, coords, derivs);
 		if (num_deriv_comp)
 			num_deriv_comp--;
 	}
@@ -4475,6 +4546,25 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)
 			}
 			address[count++] = coords[2];
 		}
+
+		if (ctx->options->chip_class >= GFX9) {
+			LLVMValueRef filler;
+			if (instr->op == nir_texop_txf)
+				filler = ctx->ac.i32_0;
+			else
+				filler = LLVMConstReal(ctx->f32, 0.5);
+
+			if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D) {
+				/* No nir_texop_lod, because it does not take a slice
+				 * even with array textures. */
+				if (instr->is_array && instr->op != nir_texop_lod ) {
+					address[count] = address[count - 1];
+					address[count - 1] = filler;
+					count++;
+				} else
+					address[count++] = filler;
+			}
+		}
 	}

 	/* Pack LOD */
@@ -4569,6 +4659,14 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)
 		LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, result, two, "");
 		z = LLVMBuildSDiv(ctx->builder, z, six, "");
 		result = LLVMBuildInsertElement(ctx->builder, result, z, two, "");
+	} else if (ctx->options->chip_class >= GFX9 &&
+		   instr->op == nir_texop_txs &&
+		   instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
+		   instr->is_array) {
+		LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
+		LLVMValueRef layers = LLVMBuildExtractElement(ctx->builder, result, two, "");
+		result = LLVMBuildInsertElement(ctx->builder, result, layers,
+						ctx->ac.i32_1, "");
 	} else if (instr->dest.ssa.num_components != 4)
 		result = trim_vector(ctx, result, instr->dest.ssa.num_components);

@@ -5178,6 +5276,7 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx,
 		unsigned index = target - V_008DFC_SQ_EXP_MRT;
 		unsigned col_format = (ctx->options->key.fs.col_format >> (4 * index)) & 0xf;
 		bool is_int8 = (ctx->options->key.fs.is_int8 >> index) & 1;
+		bool is_int10 = (ctx->options->key.fs.is_int10 >> index) & 1;

 		switch(col_format) {
 		case V_028714_SPI_SHADER_ZERO:
@@ -5255,11 +5354,13 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx,
 			break;

 		case V_028714_SPI_SHADER_UINT16_ABGR: {
-			LLVMValueRef max = LLVMConstInt(ctx->i32, is_int8 ? 255 : 65535, 0);
+			LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
+							    is_int8 ? 255 : is_int10 ? 1023 : 65535, 0);
+			LLVMValueRef max_alpha = !is_int10 ? max_rgb : LLVMConstInt(ctx->i32, 3, 0);

 			for (unsigned chan = 0; chan < 4; chan++) {
 				val[chan] = to_integer(&ctx->ac, values[chan]);
-				val[chan] = emit_minmax_int(&ctx->ac, LLVMIntULT, val[chan], max);
+				val[chan] = emit_minmax_int(&ctx->ac, LLVMIntULT, val[chan], chan == 3 ? max_alpha : max_rgb);
 			}

 			args->compr = 1;
@@ -5269,14 +5370,18 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx,
 		}

 		case V_028714_SPI_SHADER_SINT16_ABGR: {
-			LLVMValueRef max = LLVMConstInt(ctx->i32, is_int8 ? 127 : 32767, 0);
-			LLVMValueRef min = LLVMConstInt(ctx->i32, is_int8 ? -128 : -32768, 0);
+			LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
+							    is_int8 ? 127 : is_int10 ? 511 : 32767, 0);
+			LLVMValueRef min_rgb = LLVMConstInt(ctx->i32,
+							    is_int8 ? -128 : is_int10 ? -512 : -32768, 0);
+			LLVMValueRef max_alpha = !is_int10 ? max_rgb : ctx->i32one;
+			LLVMValueRef min_alpha = !is_int10 ? min_rgb : LLVMConstInt(ctx->i32, -2, 0);

 			/* Clamp. */
 			for (unsigned chan = 0; chan < 4; chan++) {
 				val[chan] = to_integer(&ctx->ac, values[chan]);
-				val[chan] = emit_minmax_int(&ctx->ac, LLVMIntSLT, val[chan], max);
-				val[chan] = emit_minmax_int(&ctx->ac, LLVMIntSGT, val[chan], min);
+				val[chan] = emit_minmax_int(&ctx->ac, LLVMIntSLT, val[chan], chan == 3 ? max_alpha : max_rgb);
+				val[chan] = emit_minmax_int(&ctx->ac, LLVMIntSGT, val[chan], chan == 3 ? min_alpha : min_rgb);
 			}

 			args->compr = 1;
@@ -5367,11 +5472,11 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx,
 		                                     ctx->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_VIEWPORT, 0)], "");
 	}

-	uint32_t mask = ((outinfo->writes_pointsize == true ? 1 : 0) |
-			 (outinfo->writes_layer == true ? 4 : 0) |
-			 (outinfo->writes_viewport_index == true ? 8 : 0));
-	if (mask) {
-		pos_args[1].enabled_channels = mask;
+	if (outinfo->writes_pointsize ||
+	    outinfo->writes_layer ||
+	    outinfo->writes_viewport_index) {
+		pos_args[1].enabled_channels = ((outinfo->writes_pointsize == true ? 1 : 0) |
+						(outinfo->writes_layer == true ? 4 : 0));
 		pos_args[1].valid_mask = 0;
 		pos_args[1].done = 0;
 		pos_args[1].target = V_008DFC_SQ_EXP_POS + 1;
@@ -5385,8 +5490,26 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx,
 			pos_args[1].out[0] = psize_value;
 		if (outinfo->writes_layer == true)
 			pos_args[1].out[2] = layer_value;
-		if (outinfo->writes_viewport_index == true)
-			pos_args[1].out[3] = viewport_index_value;
+		if (outinfo->writes_viewport_index == true) {
+			if (ctx->options->chip_class >= GFX9) {
+				/* GFX9 has the layer in out.z[10:0] and the viewport
+				 * index in out.z[19:16].
+				 */
+				LLVMValueRef v = viewport_index_value;
+				v = to_integer(&ctx->ac, v);
+				v = LLVMBuildShl(ctx->builder, v,
+						 LLVMConstInt(ctx->i32, 16, false),
+						 "");
+				v = LLVMBuildOr(ctx->builder, v,
+						to_integer(&ctx->ac, pos_args[1].out[2]), "");
+
+				pos_args[1].out[2] = to_float(&ctx->ac, v);
+				pos_args[1].enabled_channels |= 1 << 2;
+			} else {
+				pos_args[1].out[3] = viewport_index_value;
+				pos_args[1].enabled_channels |= 1 << 3;
+			}
+		}
 	}
 	for (i = 0; i < 4; i++) {
 		if (pos_args[i].out[0])
@@ -5815,10 +5938,11 @@ si_export_mrt_z(struct nir_to_llvm_context *ctx,
 		args.enabled_channels |= 0x4;
 	}

-	/* SI (except OLAND) has a bug that it only looks
+	/* SI (except OLAND and HAINAN) has a bug that it only looks
 	 * at the X writemask component. */
 	if (ctx->options->chip_class == SI &&
-	    ctx->options->family != CHIP_OLAND)
+	    ctx->options->family != CHIP_OLAND &&
+	    ctx->options->family != CHIP_HAINAN)
 		args.enabled_channels |= 0x1;

 	ac_build_export(&ctx->ac, &args);
@@ -6041,7 +6165,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
 	ctx.context = LLVMContextCreate();
 	ctx.module = LLVMModuleCreateWithNameInContext("shader", ctx.context);

-	ac_llvm_context_init(&ctx.ac, ctx.context);
+	ac_llvm_context_init(&ctx.ac, ctx.context, options->chip_class);
 	ctx.ac.module = ctx.module;

 	ctx.has_ds_bpermute = ctx.options->chip_class >= VI;
@@ -6375,7 +6499,7 @@ void ac_create_gs_copy_shader(LLVMTargetMachineRef tm,
 	ctx.options = options;
 	ctx.shader_info = shader_info;

-	ac_llvm_context_init(&ctx.ac, ctx.context);
+	ac_llvm_context_init(&ctx.ac, ctx.context, options->chip_class);
 	ctx.ac.module = ctx.module;

 	ctx.is_gs_copy_shader = true;
--- a/src/amd/common/ac_nir_to_llvm.h
+++ b/src/amd/common/ac_nir_to_llvm.h
@@ -57,6 +57,7 @@ struct ac_tcs_variant_key {
 struct ac_fs_variant_key {
 	uint32_t col_format;
 	uint32_t is_int8;
+	uint32_t is_int10;
 };

 union ac_shader_variant_key {
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -257,6 +257,18 @@ static int gfx6_compute_level(ADDR_HANDLE addrlib,
 	AddrSurfInfoIn->width = u_minify(config->info.width, level);
 	AddrSurfInfoIn->height = u_minify(config->info.height, level);

+	/* Make GFX6 linear surfaces compatible with GFX9 for hybrid graphics,
+	 * because GFX9 needs linear alignment of 256 bytes.
+	 */
+	if (config->info.levels == 1 &&
+	    AddrSurfInfoIn->tileMode == ADDR_TM_LINEAR_ALIGNED &&
+	    AddrSurfInfoIn->bpp) {
+		unsigned alignment = 256 / (AddrSurfInfoIn->bpp / 8);
+
+		assert(util_is_power_of_two(AddrSurfInfoIn->bpp));
+		AddrSurfInfoIn->width = align(AddrSurfInfoIn->width, alignment);
+	}
+
 	if (config->is_3d)
 		AddrSurfInfoIn->numSlices = u_minify(config->info.depth, level);
 	else if (config->is_cube)
@@ -541,15 +553,35 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
 	AddrSurfInfoIn.flags.noStencil = (surf->flags & RADEON_SURF_SBUFFER) == 0;
 	AddrSurfInfoIn.flags.compressZ = AddrSurfInfoIn.flags.depth;

-	/* noStencil = 0 can result in a depth part that is incompatible with
-	 * mipmapped texturing. So set noStencil = 1 when mipmaps are requested (in
-	 * this case, we may end up setting stencil_adjusted).
+	/* On CI/VI, the DB uses the same pitch and tile mode (except tilesplit)
+	 * for Z and stencil. This can cause a number of problems which we work
+	 * around here:
 	 *
-	 * TODO: update addrlib to a newer version, remove this, and
-	 * use flags.matchStencilTileCfg = 1 as an alternative fix.
+	 * - a depth part that is incompatible with mipmapped texturing
+	 * - at least on Stoney, entirely incompatible Z/S aspects (e.g.
+	 *   incorrect tiling applied to the stencil part, stencil buffer
+	 *   memory accesses that go out of bounds) even without mipmapping
+	 *
+	 * Some piglit tests that are prone to different types of related
+	 * failures:
+	 *  ./bin/ext_framebuffer_multisample-upsample 2 stencil
+	 *  ./bin/framebuffer-blit-levels {draw,read} stencil
+	 *  ./bin/ext_framebuffer_multisample-unaligned-blit N {depth,stencil} {msaa,upsample,downsample}
+	 *  ./bin/fbo-depth-array fs-writes-{depth,stencil} / {depth,stencil}-{clear,layered-clear,draw}
+	 *  ./bin/depthstencil-render-miplevels 1024 d=s=z24_s8
 	 */
-	if (config->info.levels > 1)
+	int stencil_tile_idx = -1;
+
+	if (AddrSurfInfoIn.flags.depth && !AddrSurfInfoIn.flags.noStencil &&
+	    (config->info.levels > 1 || info->family == CHIP_STONEY)) {
+		/* Compute stencilTileIdx that is compatible with the (depth)
+		 * tileIdx. This degrades the depth surface if necessary to
+		 * ensure that a matching stencilTileIdx exists. */
+		AddrSurfInfoIn.flags.matchStencilTileCfg = 1;
+
+		/* Keep the depth mip-tail compatible with texturing. */
 		AddrSurfInfoIn.flags.noStencil = 1;
+	}

 	/* Set preferred macrotile parameters. This is usually required
 	 * for shared resources. This is for 2D tiling only. */
@@ -631,12 +663,33 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
 			if (level > 0)
 				continue;

+			/* Check that we actually got a TC-compatible HTILE if
+			 * we requested it (only for level 0, since we're not
+			 * supporting HTILE on higher mip levels anyway). */
+			assert(AddrSurfInfoOut.tcCompatible ||
+			       !AddrSurfInfoIn.flags.tcCompatible ||
+			       AddrSurfInfoIn.flags.matchStencilTileCfg);
+
+			if (AddrSurfInfoIn.flags.matchStencilTileCfg) {
+				if (!AddrSurfInfoOut.tcCompatible) {
+					AddrSurfInfoIn.flags.tcCompatible = 0;
+					surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE;
+				}
+
+				AddrSurfInfoIn.flags.matchStencilTileCfg = 0;
+				AddrSurfInfoIn.tileIndex = AddrSurfInfoOut.tileIndex;
+				stencil_tile_idx = AddrSurfInfoOut.stencilTileIdx;
+
+				assert(stencil_tile_idx >= 0);
+			}
+
 			gfx6_surface_settings(info, &AddrSurfInfoOut, surf);
 		}
 	}

 	/* Calculate texture layout information for stencil. */
 	if (surf->flags & RADEON_SURF_SBUFFER) {
+		AddrSurfInfoIn.tileIndex = stencil_tile_idx;
 		AddrSurfInfoIn.bpp = 8;
 		AddrSurfInfoIn.flags.depth = 0;
 		AddrSurfInfoIn.flags.stencil = 1;
@@ -835,9 +888,11 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
 		    in->numSamples == 1) {
 			ADDR2_COMPUTE_DCCINFO_INPUT din = {0};
 			ADDR2_COMPUTE_DCCINFO_OUTPUT dout = {0};
+			ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {};

 			din.size = sizeof(ADDR2_COMPUTE_DCCINFO_INPUT);
 			dout.size = sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT);
+			dout.pMipInfo = meta_mip_info;

 			din.dccKeyFlags.pipeAligned = 1;
 			din.dccKeyFlags.rbAligned = 1;
@@ -861,6 +916,39 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
 			surf->u.gfx9.dcc_pitch_max = dout.pitch - 1;
 			surf->dcc_size = dout.dccRamSize;
 			surf->dcc_alignment = dout.dccRamBaseAlign;
+			surf->num_dcc_levels = in->numMipLevels;
+
+			/* Disable DCC for levels that are in the mip tail.
+			 *
+			 * There are two issues that this is intended to
+			 * address:
+			 *
+			 * 1. Multiple mip levels may share a cache line. This
+			 *    can lead to corruption when switching between
+			 *    rendering to different mip levels because the
+			 *    RBs don't maintain coherency.
+			 *
+			 * 2. Texturing with metadata after rendering sometimes
+			 *    fails with corruption, probably for a similar
+			 *    reason.
+			 *
+			 * Working around these issues for all levels in the
+			 * mip tail may be overly conservative, but it's what
+			 * Vulkan does.
+			 *
+			 * Alternative solutions that also work but are worse:
+			 * - Disable DCC entirely.
+			 * - Flush TC L2 after rendering.
+			 */
+			for (unsigned i = 0; i < in->numMipLevels; i++) {
+				if (meta_mip_info[i].inMiptail) {
+					surf->num_dcc_levels = i;
+					break;
+				}
+			}
+
+			if (!surf->num_dcc_levels)
+				surf->dcc_size = 0;
 		}

 		/* FMASK */
@@ -997,6 +1085,11 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib,

 	case RADEON_SURF_MODE_1D:
 	case RADEON_SURF_MODE_2D:
+		if (surf->flags & RADEON_SURF_IMPORTED) {
+			AddrSurfInfoIn.swizzleMode = surf->u.gfx9.surf.swizzle_mode;
+			break;
+		}
+
 		r = gfx9_get_preferred_swizzle_mode(addrlib, &AddrSurfInfoIn, false,
 						    &AddrSurfInfoIn.swizzleMode);
 		if (r)
@@ -1009,6 +1102,7 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib,

 	surf->u.gfx9.resource_type = AddrSurfInfoIn.resourceType;

+	surf->num_dcc_levels = 0;
 	surf->surf_size = 0;
 	surf->dcc_size = 0;
 	surf->htile_size = 0;
@@ -1025,9 +1119,16 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib,

 	/* Calculate texture layout information for stencil. */
 	if (surf->flags & RADEON_SURF_SBUFFER) {
-		AddrSurfInfoIn.bpp = 8;
-		AddrSurfInfoIn.flags.depth = 0;
 		AddrSurfInfoIn.flags.stencil = 1;
+		AddrSurfInfoIn.bpp = 8;
+
+		if (!AddrSurfInfoIn.flags.depth) {
+			r = gfx9_get_preferred_swizzle_mode(addrlib, &AddrSurfInfoIn, false,
+							    &AddrSurfInfoIn.swizzleMode);
+			if (r)
+				return r;
+		} else
+			AddrSurfInfoIn.flags.depth = 0;

 		r = gfx9_compute_miptree(addrlib, surf, compressed, &AddrSurfInfoIn);
 		if (r)
@@ -1035,7 +1136,6 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib,
 	}

 	surf->is_linear = surf->u.gfx9.surf.swizzle_mode == ADDR_SW_LINEAR;
-	surf->num_dcc_levels = surf->dcc_size ? config->info.levels : 0;

 	switch (surf->u.gfx9.surf.swizzle_mode) {
 		/* S = standard. */
--- a/src/amd/common/sid.h
+++ b/src/amd/common/sid.h
@@ -2453,6 +2453,8 @@
 #define   S_008F3C_BORDER_COLOR_PTR(x)                                (((unsigned)(x) & 0xFFF) << 0)
 #define   G_008F3C_BORDER_COLOR_PTR(x)                                (((x) >> 0) & 0xFFF)
 #define   C_008F3C_BORDER_COLOR_PTR                                   0xFFFFF000
+/* The UPGRADED_DEPTH field is driver-specific and does not exist in hardware. */
+#define   S_008F3C_UPGRADED_DEPTH(x)                                  (((unsigned)(x) & 0x1) << 29)
 #define   S_008F3C_BORDER_COLOR_TYPE(x)                               (((unsigned)(x) & 0x03) << 30)
 #define   G_008F3C_BORDER_COLOR_TYPE(x)                               (((x) >> 30) & 0x03)
 #define   C_008F3C_BORDER_COLOR_TYPE                                  0x3FFFFFFF
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -368,6 +368,10 @@ radv_emit_graphics_blend_state(struct radv_cmd_buffer *cmd_buffer,
 	radeon_set_context_reg(cmd_buffer->cs, R_028B70_DB_ALPHA_TO_MASK, pipeline->graphics.blend.db_alpha_to_mask);

 	if (cmd_buffer->device->physical_device->has_rbplus) {
+
+		radeon_set_context_reg_seq(cmd_buffer->cs, R_028760_SX_MRT0_BLEND_OPT, 8);
+		radeon_emit_array(cmd_buffer->cs, pipeline->graphics.blend.sx_mrt_blend_opt, 8);
+
 		radeon_set_context_reg_seq(cmd_buffer->cs, R_028754_SX_PS_DOWNCONVERT, 3);
 		radeon_emit(cmd_buffer->cs, 0);	/* R_028754_SX_PS_DOWNCONVERT */
 		radeon_emit(cmd_buffer->cs, 0);	/* R_028758_SX_BLEND_OPT_EPSILON */
@@ -934,6 +938,11 @@ static void
 radv_emit_scissor(struct radv_cmd_buffer *cmd_buffer)
 {
 	uint32_t count = cmd_buffer->state.dynamic.scissor.count;
+
+	if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
+		cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH;
+		si_emit_cache_flush(cmd_buffer);
+	}
 	si_write_scissors(cmd_buffer->cs, 0, count,
 			  cmd_buffer->state.dynamic.scissor.scissors,
 			  cmd_buffer->state.dynamic.viewport.viewports,
@@ -1007,6 +1016,8 @@ radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer,
 	}

 	radeon_set_context_reg(cmd_buffer->cs, R_028008_DB_DEPTH_VIEW, ds->db_depth_view);
+	radeon_set_context_reg(cmd_buffer->cs, R_028ABC_DB_HTILE_SURFACE, ds->db_htile_surface);
+

 	if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
 		radeon_set_context_reg_seq(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, 3);
@@ -1043,7 +1054,6 @@ radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer,
 		radeon_emit(cmd_buffer->cs, ds->db_depth_size);	/* R_028058_DB_DEPTH_SIZE */
 		radeon_emit(cmd_buffer->cs, ds->db_depth_slice);	/* R_02805C_DB_DEPTH_SLICE */

-		radeon_set_context_reg(cmd_buffer->cs, R_028ABC_DB_HTILE_SURFACE, ds->db_htile_surface);
 	}

 	radeon_set_context_reg(cmd_buffer->cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
@@ -1208,6 +1218,10 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer)
 	struct radv_framebuffer *framebuffer = cmd_buffer->state.framebuffer;
 	const struct radv_subpass *subpass = cmd_buffer->state.subpass;

+	/* this may happen for inherited secondary recording */
+	if (!framebuffer)
+		return;
+
 	for (i = 0; i < 8; ++i) {
 		if (i >= subpass->color_count || subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED) {
 			radeon_set_context_reg(cmd_buffer->cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
@@ -1247,9 +1261,13 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer)
 		}
 		radv_load_depth_clear_regs(cmd_buffer, image);
 	} else {
-		radeon_set_context_reg_seq(cmd_buffer->cs, R_028040_DB_Z_INFO, 2);
-		radeon_emit(cmd_buffer->cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* R_028040_DB_Z_INFO */
-		radeon_emit(cmd_buffer->cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* R_028044_DB_STENCIL_INFO */
+		if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9)
+			radeon_set_context_reg_seq(cmd_buffer->cs, R_028038_DB_Z_INFO, 2);
+		else
+			radeon_set_context_reg_seq(cmd_buffer->cs, R_028040_DB_Z_INFO, 2);
+
+		radeon_emit(cmd_buffer->cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* DB_Z_INFO */
+		radeon_emit(cmd_buffer->cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* DB_STENCIL_INFO */
 	}
 	radeon_set_context_reg(cmd_buffer->cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
 			       S_028208_BR_X(framebuffer->width) |
@@ -1971,6 +1989,7 @@ VkResult radv_BeginCommandBuffer(

 	memset(&cmd_buffer->state, 0, sizeof(cmd_buffer->state));
 	cmd_buffer->state.last_primitive_reset_en = -1;
+	cmd_buffer->usage_flags = pBeginInfo->flags;

 	/* setup initial configuration into command buffer */
 	if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
@@ -2016,7 +2035,7 @@ void radv_CmdBindVertexBuffers(
 	/* We have to defer setting up vertex buffer since we need the buffer
 	 * stride from the pipeline. */

-	assert(firstBinding + bindingCount < MAX_VBS);
+	assert(firstBinding + bindingCount <= MAX_VBS);
 	for (uint32_t i = 0; i < bindingCount; i++) {
 		vb[firstBinding + i].buffer = radv_buffer_from_handle(pBuffers[i]);
 		vb[firstBinding + i].offset = pOffsets[i];
@@ -2233,8 +2252,13 @@ VkResult radv_EndCommandBuffer(
 {
 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);

-	if (cmd_buffer->queue_family_index != RADV_QUEUE_TRANSFER)
+	if (cmd_buffer->queue_family_index != RADV_QUEUE_TRANSFER) {
+		if (cmd_buffer->device->physical_device->rad_info.chip_class == SI)
+			cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
 		si_emit_cache_flush(cmd_buffer);
+	}
+
+	vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments);

 	if (!cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs) ||
 	    cmd_buffer->record_fail)
@@ -2701,7 +2725,7 @@ void radv_CmdDrawIndexed(

 	radv_cmd_buffer_flush_state(cmd_buffer, true, (instanceCount > 1), false, indexCount);

-	MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 15);
+	MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 16);

 	if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
 		radeon_set_uconfig_reg_idx(cmd_buffer->cs, R_03090C_VGT_INDEX_TYPE,
@@ -2757,6 +2781,8 @@ radv_emit_indirect_draw(struct radv_cmd_buffer *cmd_buffer,
 	if (count_buffer) {
 		count_va = cmd_buffer->device->ws->buffer_get_va(count_buffer->bo);
 		count_va += count_offset + count_buffer->offset;
+
+		cmd_buffer->device->ws->cs_add_buffer(cs, count_buffer->bo, 8);
 	}

 	if (!draw_count)
@@ -2772,20 +2798,30 @@ radv_emit_indirect_draw(struct radv_cmd_buffer *cmd_buffer,
 	radeon_emit(cs, indirect_va);
 	radeon_emit(cs, indirect_va >> 32);

-	radeon_emit(cs, PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT_MULTI :
-				       PKT3_DRAW_INDIRECT_MULTI,
-			     8, false));
-	radeon_emit(cs, 0);
-	radeon_emit(cs, (base_reg - SI_SH_REG_OFFSET) >> 2);
-	radeon_emit(cs, ((base_reg + 4) - SI_SH_REG_OFFSET) >> 2);
-	radeon_emit(cs, (((base_reg + 8) - SI_SH_REG_OFFSET) >> 2) |
-	                S_2C3_DRAW_INDEX_ENABLE(draw_id_enable) |
-	                S_2C3_COUNT_INDIRECT_ENABLE(!!count_va));
-	radeon_emit(cs, draw_count); /* count */
-	radeon_emit(cs, count_va); /* count_addr */
-	radeon_emit(cs, count_va >> 32);
-	radeon_emit(cs, stride); /* stride */
-	radeon_emit(cs, di_src_sel);
+	if (draw_count == 1 && !count_va && !draw_id_enable) {
+		radeon_emit(cs, PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT :
+				     PKT3_DRAW_INDIRECT, 3, false));
+		radeon_emit(cs, 0);
+		radeon_emit(cs, (base_reg - SI_SH_REG_OFFSET) >> 2);
+		radeon_emit(cs, ((base_reg + 4) - SI_SH_REG_OFFSET) >> 2);
+		radeon_emit(cs, di_src_sel);
+	} else {
+		radeon_emit(cs, PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT_MULTI :
+				     PKT3_DRAW_INDIRECT_MULTI,
+				     8, false));
+		radeon_emit(cs, 0);
+		radeon_emit(cs, (base_reg - SI_SH_REG_OFFSET) >> 2);
+		radeon_emit(cs, ((base_reg + 4) - SI_SH_REG_OFFSET) >> 2);
+		radeon_emit(cs, (((base_reg + 8) - SI_SH_REG_OFFSET) >> 2) |
+			    S_2C3_DRAW_INDEX_ENABLE(draw_id_enable) |
+			    S_2C3_COUNT_INDIRECT_ENABLE(!!count_va));
+		radeon_emit(cs, draw_count); /* count */
+		radeon_emit(cs, count_va); /* count_addr */
+		radeon_emit(cs, count_va >> 32);
+		radeon_emit(cs, stride); /* stride */
+		radeon_emit(cs, di_src_sel);
+	}
+
 	radv_cmd_buffer_trace_emit(cmd_buffer);
 }

--- a/src/amd/vulkan/radv_descriptor_set.c
+++ b/src/amd/vulkan/radv_descriptor_set.c
@@ -66,6 +66,7 @@ VkResult radv_CreateDescriptorSetLayout(

 	set_layout->binding_count = max_binding + 1;
 	set_layout->shader_stages = 0;
+	set_layout->dynamic_shader_stages = 0;
 	set_layout->size = 0;

 	memset(set_layout->binding, 0, size - sizeof(struct radv_descriptor_set_layout));
@@ -734,8 +735,59 @@ void radv_update_descriptor_sets(
 		}

 	}
-	if (descriptorCopyCount)
-		radv_finishme("copy descriptors");
+
+	for (i = 0; i < descriptorCopyCount; i++) {
+		const VkCopyDescriptorSet *copyset = &pDescriptorCopies[i];
+		RADV_FROM_HANDLE(radv_descriptor_set, src_set,
+		                 copyset->srcSet);
+		RADV_FROM_HANDLE(radv_descriptor_set, dst_set,
+		                 copyset->dstSet);
+		const struct radv_descriptor_set_binding_layout *src_binding_layout =
+			src_set->layout->binding + copyset->srcBinding;
+		const struct radv_descriptor_set_binding_layout *dst_binding_layout =
+			dst_set->layout->binding + copyset->dstBinding;
+		uint32_t *src_ptr = src_set->mapped_ptr;
+		uint32_t *dst_ptr = dst_set->mapped_ptr;
+		struct radeon_winsys_bo **src_buffer_list = src_set->descriptors;
+		struct radeon_winsys_bo **dst_buffer_list = dst_set->descriptors;
+
+		src_ptr += src_binding_layout->offset / 4;
+		dst_ptr += dst_binding_layout->offset / 4;
+
+		src_ptr += src_binding_layout->size * copyset->srcArrayElement / 4;
+		dst_ptr += dst_binding_layout->size * copyset->dstArrayElement / 4;
+
+		src_buffer_list += src_binding_layout->buffer_offset;
+		src_buffer_list += copyset->srcArrayElement;
+
+		dst_buffer_list += dst_binding_layout->buffer_offset;
+		dst_buffer_list += copyset->dstArrayElement;
+
+		for (j = 0; j < copyset->descriptorCount; ++j) {
+			switch (src_binding_layout->type) {
+			case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+			case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
+				unsigned src_idx = copyset->srcArrayElement + j;
+				unsigned dst_idx = copyset->dstArrayElement + j;
+				struct radv_descriptor_range *src_range, *dst_range;
+				src_idx += src_binding_layout->dynamic_offset_offset;
+				dst_idx += dst_binding_layout->dynamic_offset_offset;
+
+				src_range = src_set->dynamic_descriptors + src_idx;
+				dst_range = dst_set->dynamic_descriptors + dst_idx;
+				*dst_range = *src_range;
+				break;
+			}
+			default:
+				memcpy(dst_ptr, src_ptr, src_binding_layout->size);
+			}
+			src_ptr += src_binding_layout->size / 4;
+			dst_ptr += dst_binding_layout->size / 4;
+			dst_buffer_list[j] = src_buffer_list[j];
+			++src_buffer_list;
+			++dst_buffer_list;
+		}
+	}
 }

 void radv_UpdateDescriptorSets(
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -235,32 +235,112 @@ is_extension_enabled(const VkExtensionProperties *extensions,
 	return false;
 }

-static const char *
-get_chip_name(enum radeon_family family)
+static void
+radv_get_device_name(enum radeon_family family, char *name, size_t name_len)
 {
+	const char *chip_string;
+	char llvm_string[32] = {};
+
 	switch (family) {
-	case CHIP_TAHITI: return "AMD RADV TAHITI";
-	case CHIP_PITCAIRN: return "AMD RADV PITCAIRN";
-	case CHIP_VERDE: return "AMD RADV CAPE VERDE";
-	case CHIP_OLAND: return "AMD RADV OLAND";
-	case CHIP_HAINAN: return "AMD RADV HAINAN";
-	case CHIP_BONAIRE: return "AMD RADV BONAIRE";
-	case CHIP_KAVERI: return "AMD RADV KAVERI";
-	case CHIP_KABINI: return "AMD RADV KABINI";
-	case CHIP_HAWAII: return "AMD RADV HAWAII";
-	case CHIP_MULLINS: return "AMD RADV MULLINS";
-	case CHIP_TONGA: return "AMD RADV TONGA";
-	case CHIP_ICELAND: return "AMD RADV ICELAND";
-	case CHIP_CARRIZO: return "AMD RADV CARRIZO";
-	case CHIP_FIJI: return "AMD RADV FIJI";
-	case CHIP_POLARIS10: return "AMD RADV POLARIS10";
-	case CHIP_POLARIS11: return "AMD RADV POLARIS11";
-	case CHIP_POLARIS12: return "AMD RADV POLARIS12";
-	case CHIP_STONEY: return "AMD RADV STONEY";
-	case CHIP_VEGA10: return "AMD RADV VEGA";
-	case CHIP_RAVEN: return "AMD RADV RAVEN";
-	default: return "AMD RADV unknown";
+	case CHIP_TAHITI: chip_string = "AMD RADV TAHITI"; break;
+	case CHIP_PITCAIRN: chip_string = "AMD RADV PITCAIRN"; break;
+	case CHIP_VERDE: chip_string = "AMD RADV CAPE VERDE"; break;
+	case CHIP_OLAND: chip_string = "AMD RADV OLAND"; break;
+	case CHIP_HAINAN: chip_string = "AMD RADV HAINAN"; break;
+	case CHIP_BONAIRE: chip_string = "AMD RADV BONAIRE"; break;
+	case CHIP_KAVERI: chip_string = "AMD RADV KAVERI"; break;
+	case CHIP_KABINI: chip_string = "AMD RADV KABINI"; break;
+	case CHIP_HAWAII: chip_string = "AMD RADV HAWAII"; break;
+	case CHIP_MULLINS: chip_string = "AMD RADV MULLINS"; break;
+	case CHIP_TONGA: chip_string = "AMD RADV TONGA"; break;
+	case CHIP_ICELAND: chip_string = "AMD RADV ICELAND"; break;
+	case CHIP_CARRIZO: chip_string = "AMD RADV CARRIZO"; break;
+	case CHIP_FIJI: chip_string = "AMD RADV FIJI"; break;
+	case CHIP_POLARIS10: chip_string = "AMD RADV POLARIS10"; break;
+	case CHIP_POLARIS11: chip_string = "AMD RADV POLARIS11"; break;
+	case CHIP_POLARIS12: chip_string = "AMD RADV POLARIS12"; break;
+	case CHIP_STONEY: chip_string = "AMD RADV STONEY"; break;
+	case CHIP_VEGA10: chip_string = "AMD RADV VEGA"; break;
+	case CHIP_RAVEN: chip_string = "AMD RADV RAVEN"; break;
+	default: chip_string = "AMD RADV unknown"; break;
 	}
+
+	if (HAVE_LLVM > 0) {
+		snprintf(llvm_string, sizeof(llvm_string),
+			 " (LLVM %i.%i.%i)", (HAVE_LLVM >> 8) & 0xff,
+			 HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH);
+	}
+
+	snprintf(name, name_len, "%s%s", chip_string, llvm_string);
+}
+
+static void
+radv_physical_device_init_mem_types(struct radv_physical_device *device)
+{
+	STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
+	uint64_t visible_vram_size = MIN2(device->rad_info.vram_size,
+	                                  device->rad_info.vram_vis_size);
+
+	int vram_index = -1, visible_vram_index = -1, gart_index = -1;
+	device->memory_properties.memoryHeapCount = 0;
+	if (device->rad_info.vram_size - visible_vram_size > 0) {
+		vram_index = device->memory_properties.memoryHeapCount++;
+		device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap) {
+			.size = device->rad_info.vram_size - visible_vram_size,
+			.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
+		};
+	}
+	if (visible_vram_size) {
+		visible_vram_index = device->memory_properties.memoryHeapCount++;
+		device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap) {
+			.size = visible_vram_size,
+			.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
+		};
+	}
+	if (device->rad_info.gart_size > 0) {
+		gart_index = device->memory_properties.memoryHeapCount++;
+		device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
+			.size = device->rad_info.gart_size,
+			.flags = 0,
+		};
+	}
+
+	STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
+	unsigned type_count = 0;
+	if (vram_index >= 0) {
+		device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM;
+		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
+			.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
+			.heapIndex = vram_index,
+		};
+	}
+	if (gart_index >= 0) {
+		device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
+		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
+			.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+			VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+			.heapIndex = gart_index,
+		};
+	}
+	if (visible_vram_index >= 0) {
+		device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM_CPU_ACCESS;
+		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
+			.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+			VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+			VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+			.heapIndex = visible_vram_index,
+		};
+	}
+	if (gart_index >= 0) {
+		device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_CACHED;
+		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
+			.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+			VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
+			VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
+			.heapIndex = gart_index,
+		};
+	}
+	device->memory_properties.memoryTypeCount = type_count;
 }

 static VkResult
@@ -311,6 +391,8 @@ radv_physical_device_init(struct radv_physical_device *device,
 		goto fail;
 	}

+	radv_get_device_name(device->rad_info.family, device->name, sizeof(device->name));
+
 	if (radv_device_get_cache_uuid(device->rad_info.family, device->uuid)) {
 		radv_finish_wsi(device);
 		device->ws->destroy(device->ws);
@@ -336,7 +418,6 @@ radv_physical_device_init(struct radv_physical_device *device,
 	}

 	fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
-	device->name = get_chip_name(device->rad_info.family);

 	radv_get_device_uuid(drm_device, device->device_uuid);

@@ -346,6 +427,7 @@ radv_physical_device_init(struct radv_physical_device *device,
 		device->rbplus_allowed = device->rad_info.family == CHIP_STONEY;
 	}

+	radv_physical_device_init_mem_types(device);
 	return VK_SUCCESS;

 fail:
@@ -902,47 +984,7 @@ void radv_GetPhysicalDeviceMemoryProperties(
 {
 	RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);

-	STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
-
-	pMemoryProperties->memoryTypeCount = RADV_MEM_TYPE_COUNT;
-	pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM] = (VkMemoryType) {
-		.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
-		.heapIndex = RADV_MEM_HEAP_VRAM,
-	};
-	pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_WRITE_COMBINE] = (VkMemoryType) {
-		.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
-		VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
-		.heapIndex = RADV_MEM_HEAP_GTT,
-	};
-	pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM_CPU_ACCESS] = (VkMemoryType) {
-		.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
-		VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
-		VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
-		.heapIndex = RADV_MEM_HEAP_VRAM_CPU_ACCESS,
-	};
-	pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_CACHED] = (VkMemoryType) {
-		.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
-		VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
-		VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
-		.heapIndex = RADV_MEM_HEAP_GTT,
-	};
-
-	STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
-
-	pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT;
-	pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) {
-		.size = physical_device->rad_info.vram_size -
-				physical_device->rad_info.vram_vis_size,
-		.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
-	};
-	pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) {
-		.size = physical_device->rad_info.vram_vis_size,
-		.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
-	};
-	pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) {
-		.size = physical_device->rad_info.gart_size,
-		.flags = 0,
-	};
+	*pMemoryProperties = physical_device->memory_properties;
 }

 void radv_GetPhysicalDeviceMemoryProperties2KHR(
@@ -1958,10 +2000,6 @@ static VkResult radv_alloc_sem_counts(struct radv_winsys_sem_counts *counts,

 		if (sem->temp_syncobj) {
 			counts->syncobj[syncobj_idx++] = sem->temp_syncobj;
-			if (reset_temp) {
-				/* after we wait on a temp import - drop it */
-				sem->temp_syncobj = 0;
-			}
 		}
 		else if (sem->syncobj)
 			counts->syncobj[syncobj_idx++] = sem->syncobj;
@@ -1982,6 +2020,21 @@ void radv_free_sem_info(struct radv_winsys_sem_info *sem_info)
 	free(sem_info->signal.sem);
 }

+
+static void radv_free_temp_syncobjs(struct radv_device *device,
+				    int num_sems,
+				    const VkSemaphore *sems)
+{
+	for (uint32_t i = 0; i < num_sems; i++) {
+		RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
+
+		if (sem->temp_syncobj) {
+			device->ws->destroy_syncobj(device->ws, sem->temp_syncobj);
+			sem->temp_syncobj = 0;
+		}
+	}
+}
+
 VkResult radv_alloc_sem_info(struct radv_winsys_sem_info *sem_info,
 			     int num_wait_sems,
 			     const VkSemaphore *wait_sems,
@@ -2133,6 +2186,9 @@ VkResult radv_QueueSubmit(
 			}
 		}

+		radv_free_temp_syncobjs(queue->device,
+					pSubmits[i].waitSemaphoreCount,
+					pSubmits[i].pWaitSemaphores);
 		radv_free_sem_info(&sem_info);
 		free(cs_array);
 	}
@@ -2231,6 +2287,7 @@ VkResult radv_AllocateMemory(
 	VkResult result;
 	enum radeon_bo_domain domain;
 	uint32_t flags = 0;
+	enum radv_mem_type mem_type_index = device->physical_device->mem_type_indices[pAllocateInfo->memoryTypeIndex];

 	assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);

@@ -2273,18 +2330,18 @@ VkResult radv_AllocateMemory(
 	}

 	uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
-	if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
-	    pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_CACHED)
+	if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
+	    mem_type_index == RADV_MEM_TYPE_GTT_CACHED)
 		domain = RADEON_DOMAIN_GTT;
 	else
 		domain = RADEON_DOMAIN_VRAM;

-	if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_VRAM)
+	if (mem_type_index == RADV_MEM_TYPE_VRAM)
 		flags |= RADEON_FLAG_NO_CPU_ACCESS;
 	else
 		flags |= RADEON_FLAG_CPU_ACCESS;

-	if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
+	if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
 		flags |= RADEON_FLAG_GTT_WC;

 	mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
@@ -2294,7 +2351,7 @@ VkResult radv_AllocateMemory(
 		result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
 		goto fail;
 	}
-	mem->type_index = pAllocateInfo->memoryTypeIndex;
+	mem->type_index = mem_type_index;
 out_success:
 	*pMem = radv_device_memory_to_handle(mem);

@@ -2378,13 +2435,14 @@ VkResult radv_InvalidateMappedMemoryRanges(
 }

 void radv_GetBufferMemoryRequirements(
-	VkDevice                                    device,
+	VkDevice                                    _device,
 	VkBuffer                                    _buffer,
 	VkMemoryRequirements*                       pMemoryRequirements)
 {
+	RADV_FROM_HANDLE(radv_device, device, _device);
 	RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);

-	pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
+	pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;

 	if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
 		pMemoryRequirements->alignment = 4096;
@@ -2418,13 +2476,14 @@ void radv_GetBufferMemoryRequirements2KHR(
 }

 void radv_GetImageMemoryRequirements(
-	VkDevice                                    device,
+	VkDevice                                    _device,
 	VkImage                                     _image,
 	VkMemoryRequirements*                       pMemoryRequirements)
 {
+	RADV_FROM_HANDLE(radv_device, device, _device);
 	RADV_FROM_HANDLE(radv_image, image, _image);

-	pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
+	pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;

 	pMemoryRequirements->size = image->size;
 	pMemoryRequirements->alignment = image->alignment;
@@ -2811,7 +2870,7 @@ VkResult radv_CreateEvent(

 	event->bo = device->ws->buffer_create(device->ws, 8, 8,
 					      RADEON_DOMAIN_GTT,
-					      RADEON_FLAG_CPU_ACCESS);
+					      RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS);
 	if (!event->bo) {
 		vk_free2(&device->alloc, pAllocator, event);
 		return VK_ERROR_OUT_OF_DEVICE_MEMORY;
@@ -3077,9 +3136,13 @@ radv_initialise_color_surface(struct radv_device *device,
 				    format != V_028C70_COLOR_24_8) |
 		S_028C70_NUMBER_TYPE(ntype) |
 		S_028C70_ENDIAN(endian);
-	if (iview->image->info.samples > 1)
-		if (iview->image->fmask.size)
-			cb->cb_color_info |= S_028C70_COMPRESSION(1);
+	if ((iview->image->info.samples > 1) && iview->image->fmask.size) {
+		cb->cb_color_info |= S_028C70_COMPRESSION(1);
+		if (device->physical_device->rad_info.chip_class == SI) {
+			unsigned fmask_bankh = util_logbase2(iview->image->fmask.bank_height);
+			cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
+		}
+	}

 	if (iview->image->cmask.size &&
 	    !(device->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
@@ -3109,15 +3172,15 @@ radv_initialise_color_surface(struct radv_device *device,
 	}

 	if (device->physical_device->rad_info.chip_class >= GFX9) {
-		uint32_t max_slice = radv_surface_layer_count(iview);
-		unsigned mip0_depth = iview->base_layer + max_slice - 1;
+		unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D ?
+		  (iview->extent.depth - 1) : (iview->image->info.array_size - 1);

 		cb->cb_color_view |= S_028C6C_MIP_LEVEL(iview->base_mip);
 		cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
 			S_028C74_RESOURCE_TYPE(iview->image->surface.u.gfx9.resource_type);
-		cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(iview->image->info.width - 1) |
-			S_028C68_MIP0_HEIGHT(iview->image->info.height - 1) |
-			S_028C68_MAX_MIP(iview->image->info.levels);
+		cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(iview->extent.width - 1) |
+			S_028C68_MIP0_HEIGHT(iview->extent.height - 1) |
+			S_028C68_MAX_MIP(iview->image->info.levels - 1);

 		cb->gfx9_epitch = S_0287A0_EPITCH(iview->image->surface.u.gfx9.surf.epitch);

@@ -3246,6 +3309,8 @@ radv_initialise_ds_surface(struct radv_device *device,
 			ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
 			tile_mode_index = si_tile_mode_index(iview->image, level, true);
 			ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
+			if (stencil_only)
+				ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
 		}

 		ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
@@ -3584,6 +3649,7 @@ VkResult radv_ImportSemaphoreFdKHR(VkDevice _device,
 	RADV_FROM_HANDLE(radv_device, device, _device);
 	RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
 	uint32_t syncobj_handle = 0;
+	uint32_t *syncobj_dst = NULL;
 	assert(pImportSemaphoreFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);

 	int ret = device->ws->import_syncobj(device->ws, pImportSemaphoreFdInfo->fd, &syncobj_handle);
@@ -3591,10 +3657,15 @@ VkResult radv_ImportSemaphoreFdKHR(VkDevice _device,
 		return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;

 	if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT_KHR) {
-		sem->temp_syncobj = syncobj_handle;
+		syncobj_dst = &sem->temp_syncobj;
 	} else {
-		sem->syncobj = syncobj_handle;
+		syncobj_dst = &sem->syncobj;
 	}
+
+	if (*syncobj_dst)
+		device->ws->destroy_syncobj(device->ws, *syncobj_dst);
+
+	*syncobj_dst = syncobj_handle;
 	close(pImportSemaphoreFdInfo->fd);
 	return VK_SUCCESS;
 }
@@ -3624,9 +3695,14 @@ void radv_GetPhysicalDeviceExternalSemaphorePropertiesKHR(
 	const VkPhysicalDeviceExternalSemaphoreInfoKHR* pExternalSemaphoreInfo,
 	VkExternalSemaphorePropertiesKHR*           pExternalSemaphoreProperties)
 {
-	pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
-	pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
-	pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR |
-		VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
-
+	if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR) {
+		pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
+		pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
+		pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR |
+			VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
+	} else {
+		pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
+		pExternalSemaphoreProperties->compatibleHandleTypes = 0;
+		pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
+	}
 }
--- a/src/amd/vulkan/radv_formats.c
+++ b/src/amd/vulkan/radv_formats.c
@@ -578,6 +578,10 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical
 			         VK_FORMAT_FEATURE_BLIT_DST_BIT;
 			tiled |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR |
 			         VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR;
+
+			/* GFX9 doesn't support linear depth surfaces */
+			if (physical_device->rad_info.chip_class >= GFX9)
+				linear = 0;
 		}
 	} else {
 		bool linear_sampling;
@@ -958,6 +962,12 @@ bool radv_format_pack_clear_color(VkFormat format,
 		clear_vals[1] = ((uint16_t)util_iround(CLAMP(value->float32[2], 0.0f, 1.0f) * 0xffff)) & 0xffff;
 		clear_vals[1] |= ((uint16_t)util_iround(CLAMP(value->float32[3], 0.0f, 1.0f) * 0xffff)) << 16;
 		break;
+	case VK_FORMAT_R16G16B16A16_SNORM:
+		clear_vals[0] = ((uint16_t)util_iround(CLAMP(value->float32[0], -1.0f, 1.0f) * 0x7fff)) & 0xffff;
+		clear_vals[0] |= ((uint16_t)util_iround(CLAMP(value->float32[1], -1.0f, 1.0f) * 0x7fff)) << 16;
+		clear_vals[1] = ((uint16_t)util_iround(CLAMP(value->float32[2], -1.0f, 1.0f) * 0x7fff)) & 0xffff;
+		clear_vals[1] |= ((uint16_t)util_iround(CLAMP(value->float32[3], -1.0f, 1.0f) * 0x7fff)) << 16;
+		break;
 	case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
 		clear_vals[0] = ((uint16_t)util_iround(CLAMP(value->float32[0], 0.0f, 1.0f) * 0x3ff)) & 0x3ff;
 		clear_vals[0] |= (((uint16_t)util_iround(CLAMP(value->float32[1], 0.0f, 1.0f) * 0x3ff)) & 0x3ff) << 10;
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -34,7 +34,7 @@
 #include "util/debug.h"
 #include "util/u_atomic.h"
 static unsigned
-radv_choose_tiling(struct radv_device *Device,
+radv_choose_tiling(struct radv_device *device,
 		   const struct radv_image_create_info *create_info)
 {
 	const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
@@ -44,12 +44,17 @@ radv_choose_tiling(struct radv_device *Device,
 		return RADEON_SURF_MODE_LINEAR_ALIGNED;
 	}

-	/* Textures with a very small height are recommended to be linear. */
-	if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
-	    /* Only very thin and long 2D textures should benefit from
-	     * linear_aligned. */
-	    (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
-		return RADEON_SURF_MODE_LINEAR_ALIGNED;
+	if (!vk_format_is_compressed(pCreateInfo->format) &&
+	    !vk_format_is_depth_or_stencil(pCreateInfo->format)
+	    && device->physical_device->rad_info.chip_class <= VI) {
+		/* this causes hangs in some VK CTS tests on GFX9. */
+		/* Textures with a very small height are recommended to be linear. */
+		if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
+		    /* Only very thin and long 2D textures should benefit from
+		     * linear_aligned. */
+		    (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
+			return RADEON_SURF_MODE_LINEAR_ALIGNED;
+	}

 	/* MSAA resources must be 2D tiled. */
 	if (pCreateInfo->samples > 1)
@@ -115,6 +120,7 @@ radv_init_surface(struct radv_device *device,
 	                           VK_IMAGE_USAGE_STORAGE_BIT)) ||
 	    (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) ||
            (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) ||
+            pCreateInfo->mipLevels > 1 || pCreateInfo->arrayLayers > 1 ||
            device->physical_device->rad_info.chip_class < VI ||
            create_info->scanout || (device->debug_flags & RADV_DEBUG_NO_DCC) ||
            !radv_is_colorbuffer_format_supported(pCreateInfo->format, &blendable))
@@ -181,6 +187,11 @@ radv_make_buffer_descriptor(struct radv_device *device,
 	state[0] = va;
 	state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
 		S_008F04_STRIDE(stride);
+
+	if (device->physical_device->rad_info.chip_class != VI && stride) {
+		range /= stride;
+	}
+
 	state[2] = range;
 	state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(desc->swizzle[0])) |
 		   S_008F0C_DST_SEL_Y(radv_map_swizzle(desc->swizzle[1])) |
@@ -200,7 +211,6 @@ si_set_mutable_tex_desc_fields(struct radv_device *device,
 {
 	uint64_t gpu_address = image->bo ? device->ws->buffer_get_va(image->bo) + image->offset : 0;
 	uint64_t va = gpu_address;
-	unsigned pitch = base_level_info->nblk_x * block_width;
 	enum chip_class chip_class = device->physical_device->rad_info.chip_class;
 	uint64_t meta_va = 0;
 	if (chip_class >= GFX9) {
@@ -216,9 +226,6 @@ si_set_mutable_tex_desc_fields(struct radv_device *device,
 		state[0] |= image->surface.u.legacy.tile_swizzle;
 	state[1] &= C_008F14_BASE_ADDRESS_HI;
 	state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
-	state[3] |= S_008F1C_TILING_INDEX(si_tile_mode_index(image, base_level,
-							     is_stencil));
-	state[4] |= S_008F20_PITCH_GFX6(pitch - 1);

 	if (chip_class >= VI) {
 		state[6] &= C_008F28_COMPRESSION_EN;
@@ -274,10 +281,14 @@ si_set_mutable_tex_desc_fields(struct radv_device *device,
 }

 static unsigned radv_tex_dim(VkImageType image_type, VkImageViewType view_type,
-			     unsigned nr_layers, unsigned nr_samples, bool is_storage_image)
+			     unsigned nr_layers, unsigned nr_samples, bool is_storage_image, bool gfx9)
 {
 	if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
 		return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE;
+
+	/* GFX9 allocates 1D textures as 2D. */
+	if (gfx9 && image_type == VK_IMAGE_TYPE_1D)
+		image_type = VK_IMAGE_TYPE_2D;
 	switch (image_type) {
 	case VK_IMAGE_TYPE_1D:
 		return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D;
@@ -368,7 +379,7 @@ si_make_texture_descriptor(struct radv_device *device,
 	}

 	type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
-			    is_storage_image);
+			    is_storage_image, device->physical_device->rad_info.chip_class >= GFX9);
 	if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
 	        height = 1;
 		depth = image->info.array_size;
@@ -414,7 +425,7 @@ si_make_texture_descriptor(struct radv_device *device,
 		state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
 		state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ?
 					     util_logbase2(image->info.samples) :
-					     last_level);
+					     image->info.levels - 1);
 	} else {
 		state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1);
 		state[4] |= S_008F20_DEPTH(depth - 1);
@@ -489,7 +500,7 @@ si_make_texture_descriptor(struct radv_device *device,
 			S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
 			S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
 			S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
-			S_008F1C_TYPE(radv_tex_dim(image->type, view_type, 1, 0, false));
+			S_008F1C_TYPE(radv_tex_dim(image->type, view_type, 1, 0, false, false));
 		fmask_state[4] = 0;
 		fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
 		fmask_state[6] = 0;
@@ -554,10 +565,11 @@ radv_query_opaque_metadata(struct radv_device *device,
 	memcpy(&md->metadata[2], desc, sizeof(desc));

 	/* Dwords [10:..] contain the mipmap level offsets. */
-	for (i = 0; i <= image->info.levels - 1; i++)
-		md->metadata[10+i] = image->surface.u.legacy.level[i].offset >> 8;
-
-	md->size_metadata = (11 + image->info.levels - 1) * 4;
+	if (device->physical_device->rad_info.chip_class <= VI) {
+		for (i = 0; i <= image->info.levels - 1; i++)
+			md->metadata[10+i] = image->surface.u.legacy.level[i].offset >> 8;
+		md->size_metadata = (11 + image->info.levels - 1) * 4;
+	}
 }

 void
@@ -826,8 +838,10 @@ radv_image_create(VkDevice _device,

 	if ((pCreateInfo->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) &&
 	    pCreateInfo->mipLevels == 1 &&
-	    !image->surface.dcc_size && image->info.depth == 1 && can_cmask_dcc)
+	    !image->surface.dcc_size && image->info.depth == 1 && can_cmask_dcc &&
+	    !image->surface.is_linear)
 		radv_image_alloc_cmask(device, image);
+
 	if (image->info.samples > 1 && vk_format_is_color(pCreateInfo->format)) {
 		radv_image_alloc_fmask(device, image);
 	} else if (vk_format_is_depth(pCreateInfo->format)) {
@@ -856,15 +870,15 @@ radv_image_create(VkDevice _device,
 static void
 radv_image_view_make_descriptor(struct radv_image_view *iview,
 				struct radv_device *device,
-				const VkImageViewCreateInfo* pCreateInfo,
+				const VkComponentMapping *components,
 				bool is_storage_image)
 {
-	RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
-	const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
+	struct radv_image *image = iview->image;
 	bool is_stencil = iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT;
 	uint32_t blk_w;
 	uint32_t *descriptor;
 	uint32_t *fmask_descriptor;
+	uint32_t hw_level = 0;

 	if (is_storage_image) {
 		descriptor = iview->storage_descriptor;
@@ -877,23 +891,32 @@ radv_image_view_make_descriptor(struct radv_image_view *iview,
 	assert(image->surface.blk_w % vk_format_get_blockwidth(image->vk_format) == 0);
 	blk_w = image->surface.blk_w / vk_format_get_blockwidth(image->vk_format) * vk_format_get_blockwidth(iview->vk_format);

+	if (device->physical_device->rad_info.chip_class >= GFX9)
+		hw_level = iview->base_mip;
 	si_make_texture_descriptor(device, image, is_storage_image,
 				   iview->type,
 				   iview->vk_format,
-				   &pCreateInfo->components,
-				   0, radv_get_levelCount(image, range) - 1,
-				   range->baseArrayLayer,
-				   range->baseArrayLayer + radv_get_layerCount(image, range) - 1,
+				   components,
+				   hw_level, hw_level + iview->level_count - 1,
+				   iview->base_layer,
+				   iview->base_layer + iview->layer_count - 1,
 				   iview->extent.width,
 				   iview->extent.height,
 				   iview->extent.depth,
 				   descriptor,
 				   fmask_descriptor);
+
+	const struct legacy_surf_level *base_level_info = NULL;
+	if (device->physical_device->rad_info.chip_class <= GFX9) {
+		if (is_stencil)
+			base_level_info = &image->surface.u.legacy.stencil_level[iview->base_mip];
+		else
+			base_level_info = &image->surface.u.legacy.level[iview->base_mip];
+	}
 	si_set_mutable_tex_desc_fields(device, image,
-				       is_stencil ? &image->surface.u.legacy.stencil_level[range->baseMipLevel]
-				                  : &image->surface.u.legacy.level[range->baseMipLevel],
-				       range->baseMipLevel,
-				       range->baseMipLevel,
+				       base_level_info,
+				       iview->base_mip,
+				       iview->base_mip,
 				       blk_w, is_stencil, descriptor);
 }

@@ -929,23 +952,34 @@ radv_image_view_init(struct radv_image_view *iview,
 		iview->vk_format = vk_format_depth_only(iview->vk_format);
 	}

-	iview->extent = (VkExtent3D) {
-		.width  = radv_minify(image->info.width , range->baseMipLevel),
-		.height = radv_minify(image->info.height, range->baseMipLevel),
-		.depth  = radv_minify(image->info.depth , range->baseMipLevel),
-	};
+	if (device->physical_device->rad_info.chip_class >= GFX9) {
+		iview->extent = (VkExtent3D) {
+			.width = image->info.width,
+			.height = image->info.height,
+			.depth = image->info.depth,
+		};
+	} else {
+		iview->extent = (VkExtent3D) {
+			.width  = radv_minify(image->info.width , range->baseMipLevel),
+			.height = radv_minify(image->info.height, range->baseMipLevel),
+			.depth  = radv_minify(image->info.depth , range->baseMipLevel),
+		};
+	}

-	iview->extent.width = round_up_u32(iview->extent.width * vk_format_get_blockwidth(iview->vk_format),
-					   vk_format_get_blockwidth(image->vk_format));
-	iview->extent.height = round_up_u32(iview->extent.height * vk_format_get_blockheight(iview->vk_format),
-					    vk_format_get_blockheight(image->vk_format));
+	if (iview->vk_format != image->vk_format) {
+		iview->extent.width = round_up_u32(iview->extent.width * vk_format_get_blockwidth(iview->vk_format),
+						   vk_format_get_blockwidth(image->vk_format));
+		iview->extent.height = round_up_u32(iview->extent.height * vk_format_get_blockheight(iview->vk_format),
+						    vk_format_get_blockheight(image->vk_format));
+	}

 	iview->base_layer = range->baseArrayLayer;
 	iview->layer_count = radv_get_layerCount(image, range);
 	iview->base_mip = range->baseMipLevel;
+	iview->level_count = radv_get_levelCount(image, range);

-	radv_image_view_make_descriptor(iview, device, pCreateInfo, false);
-	radv_image_view_make_descriptor(iview, device, pCreateInfo, true);
+	radv_image_view_make_descriptor(iview, device, &pCreateInfo->components, false);
+	radv_image_view_make_descriptor(iview, device, &pCreateInfo->components, true);
 }

 bool radv_layout_has_htile(const struct radv_image *image,
@@ -1020,23 +1054,34 @@ radv_DestroyImage(VkDevice _device, VkImage _image,
 }

 void radv_GetImageSubresourceLayout(
-	VkDevice                                    device,
+	VkDevice                                    _device,
 	VkImage                                     _image,
 	const VkImageSubresource*                   pSubresource,
 	VkSubresourceLayout*                        pLayout)
 {
 	RADV_FROM_HANDLE(radv_image, image, _image);
+	RADV_FROM_HANDLE(radv_device, device, _device);
 	int level = pSubresource->mipLevel;
 	int layer = pSubresource->arrayLayer;
 	struct radeon_surf *surface = &image->surface;

-	pLayout->offset = surface->u.legacy.level[level].offset + surface->u.legacy.level[level].slice_size * layer;
-	pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
-	pLayout->arrayPitch = surface->u.legacy.level[level].slice_size;
-	pLayout->depthPitch = surface->u.legacy.level[level].slice_size;
-	pLayout->size = surface->u.legacy.level[level].slice_size;
-	if (image->type == VK_IMAGE_TYPE_3D)
-		pLayout->size *= u_minify(image->info.depth, level);
+	if (device->physical_device->rad_info.chip_class >= GFX9) {
+		pLayout->offset = surface->u.gfx9.offset[level] + surface->u.gfx9.surf_slice_size * layer;
+		pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe;
+		pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
+		pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
+		pLayout->size = surface->u.gfx9.surf_slice_size;
+		if (image->type == VK_IMAGE_TYPE_3D)
+			pLayout->size *= u_minify(image->info.depth, level);
+	} else {
+		pLayout->offset = surface->u.legacy.level[level].offset + surface->u.legacy.level[level].slice_size * layer;
+		pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
+		pLayout->arrayPitch = surface->u.legacy.level[level].slice_size;
+		pLayout->depthPitch = surface->u.legacy.level[level].slice_size;
+		pLayout->size = surface->u.legacy.level[level].slice_size;
+		if (image->type == VK_IMAGE_TYPE_3D)
+			pLayout->size *= u_minify(image->info.depth, level);
+	}
 }


--- a/src/amd/vulkan/radv_meta.c
+++ b/src/amd/vulkan/radv_meta.c
@@ -477,48 +477,8 @@ radv_meta_build_nir_fs_noop(void)
 	return b.shader;
 }

-static nir_ssa_def *radv_meta_build_resolve_srgb_conversion(nir_builder *b,
-							    nir_ssa_def *input)
-{
-	nir_const_value v;
-	unsigned i;
-	v.u32[0] = 0x3b4d2e1c; // 0.00313080009
-
-	nir_ssa_def *cmp[3];
-	for (i = 0; i < 3; i++)
-		cmp[i] = nir_flt(b, nir_channel(b, input, i),
-				 nir_build_imm(b, 1, 32, v));
-
-	nir_ssa_def *ltvals[3];
-	v.f32[0] = 12.92;
-	for (i = 0; i < 3; i++)
-		ltvals[i] = nir_fmul(b, nir_channel(b, input, i),
-				     nir_build_imm(b, 1, 32, v));
-
-	nir_ssa_def *gtvals[3];
-
-	for (i = 0; i < 3; i++) {
-		v.f32[0] = 1.0/2.4;
-		gtvals[i] = nir_fpow(b, nir_channel(b, input, i),
-				     nir_build_imm(b, 1, 32, v));
-		v.f32[0] = 1.055;
-		gtvals[i] = nir_fmul(b, gtvals[i],
-				     nir_build_imm(b, 1, 32, v));
-		v.f32[0] = 0.055;
-		gtvals[i] = nir_fsub(b, gtvals[i],
-				     nir_build_imm(b, 1, 32, v));
-	}
-
-	nir_ssa_def *comp[4];
-	for (i = 0; i < 3; i++)
-		comp[i] = nir_bcsel(b, cmp[i], ltvals[i], gtvals[i]);
-	comp[3] = nir_channels(b, input, 3);
-	return nir_vec(b, comp, 4);
-}
-
 void radv_meta_build_resolve_shader_core(nir_builder *b,
 					 bool is_integer,
-					 bool is_srgb,
 					 int samples,
 					 nir_variable *input_img,
 					 nir_variable *color,
@@ -596,10 +556,4 @@ void radv_meta_build_resolve_shader_core(nir_builder *b,

 	if (outer_if)
 		b->cursor = nir_after_cf_node(&outer_if->cf_node);
-
-	if (is_srgb) {
-		nir_ssa_def *newv = nir_load_var(b, color);
-		newv = radv_meta_build_resolve_srgb_conversion(b, newv);
-		nir_store_var(b, color, newv, 0xf);
-	}
 }
--- a/src/amd/vulkan/radv_meta.h
+++ b/src/amd/vulkan/radv_meta.h
@@ -234,7 +234,6 @@ nir_shader *radv_meta_build_nir_fs_noop(void);

 void radv_meta_build_resolve_shader_core(nir_builder *b,
 					 bool is_integer,
-					 bool is_srgb,
 					 int samples,
 					 nir_variable *input_img,
 					 nir_variable *color,
--- a/src/amd/vulkan/radv_meta_blit.c
+++ b/src/amd/vulkan/radv_meta_blit.c
@@ -269,21 +269,26 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
               VkOffset3D src_offset_1,
               struct radv_image *dest_image,
               struct radv_image_view *dest_iview,
-               VkOffset3D dest_offset_0,
-               VkOffset3D dest_offset_1,
+               VkOffset2D dest_offset_0,
+               VkOffset2D dest_offset_1,
               VkRect2D dest_box,
               VkFilter blit_filter)
 {
 	struct radv_device *device = cmd_buffer->device;
+	uint32_t src_width = radv_minify(src_iview->image->info.width, src_iview->base_mip);
+	uint32_t src_height = radv_minify(src_iview->image->info.height, src_iview->base_mip);
+	uint32_t src_depth = radv_minify(src_iview->image->info.depth, src_iview->base_mip);
+	uint32_t dst_width = radv_minify(dest_iview->image->info.width, dest_iview->base_mip);
+	uint32_t dst_height = radv_minify(dest_iview->image->info.height, dest_iview->base_mip);

 	assert(src_image->info.samples == dest_image->info.samples);

 	float vertex_push_constants[5] = {
-		(float)src_offset_0.x / (float)src_iview->extent.width,
-		(float)src_offset_0.y / (float)src_iview->extent.height,
-		(float)src_offset_1.x / (float)src_iview->extent.width,
-		(float)src_offset_1.y / (float)src_iview->extent.height,
-		(float)src_offset_0.z / (float)src_iview->extent.depth,
+		(float)src_offset_0.x / (float)src_width,
+		(float)src_offset_0.y / (float)src_height,
+		(float)src_offset_1.x / (float)src_width,
+		(float)src_offset_1.y / (float)src_height,
+		(float)src_offset_0.z / (float)src_depth,
 	};

 	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
@@ -310,8 +315,8 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
 					       .pAttachments = (VkImageView[]) {
 					       radv_image_view_to_handle(dest_iview),
 				       },
-				       .width = dest_iview->extent.width,
-				       .height = dest_iview->extent.height,
+				       .width = dst_width,
+				       .height = dst_height,
 				       .layers = 1,
 				}, &cmd_buffer->pool->alloc, &fb);
 	VkPipeline pipeline;
@@ -512,21 +517,6 @@ void radv_CmdBlitImage(
 	for (unsigned r = 0; r < regionCount; r++) {
 		const VkImageSubresourceLayers *src_res = &pRegions[r].srcSubresource;
 		const VkImageSubresourceLayers *dst_res = &pRegions[r].dstSubresource;
-		struct radv_image_view src_iview;
-		radv_image_view_init(&src_iview, cmd_buffer->device,
-				     &(VkImageViewCreateInfo) {
-					     .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
-						     .image = srcImage,
-						     .viewType = radv_meta_get_view_type(src_image),
-						     .format = src_image->vk_format,
-						     .subresourceRange = {
-						     .aspectMask = src_res->aspectMask,
-						     .baseMipLevel = src_res->mipLevel,
-						     .levelCount = 1,
-						     .baseArrayLayer = src_res->baseArrayLayer,
-						     .layerCount = 1
-					     },
-				     });

 		unsigned dst_start, dst_end;
 		if (dest_image->type == VK_IMAGE_TYPE_3D) {
@@ -573,18 +563,17 @@ void radv_CmdBlitImage(
 		dest_box.extent.width = abs(dst_x1 - dst_x0);
 		dest_box.extent.height = abs(dst_y1 - dst_y0);

-		struct radv_image_view dest_iview;
 		const unsigned num_layers = dst_end - dst_start;
 		for (unsigned i = 0; i < num_layers; i++) {
-			const VkOffset3D dest_offset_0 = {
+			struct radv_image_view dest_iview, src_iview;
+
+			const VkOffset2D dest_offset_0 = {
 				.x = dst_x0,
 				.y = dst_y0,
-				.z = dst_start + i ,
 			};
-			const VkOffset3D dest_offset_1 = {
+			const VkOffset2D dest_offset_1 = {
 				.x = dst_x1,
 				.y = dst_y1,
-				.z = dst_start + i ,
 			};
 			VkOffset3D src_offset_0 = {
 				.x = src_x0,
@@ -596,9 +585,10 @@ void radv_CmdBlitImage(
 				.y = src_y1,
 				.z = src_start + i * src_z_step,
 			};
-			const uint32_t dest_array_slice =
-				radv_meta_get_iview_layer(dest_image, dst_res,
-							  &dest_offset_0);
+			const uint32_t dest_array_slice = dst_start + i;
+
+			/* 3D images have just 1 layer */
+			const uint32_t src_array_slice = src_image->type == VK_IMAGE_TYPE_3D ? 0 : src_start + i;

 			radv_image_view_init(&dest_iview, cmd_buffer->device,
 					     &(VkImageViewCreateInfo) {
@@ -614,6 +604,20 @@ void radv_CmdBlitImage(
 							     .layerCount = 1
 						     },
 					     });
+			radv_image_view_init(&src_iview, cmd_buffer->device,
+					     &(VkImageViewCreateInfo) {
+						.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+							.image = srcImage,
+							.viewType = radv_meta_get_view_type(src_image),
+							.format = src_image->vk_format,
+							.subresourceRange = {
+							.aspectMask = src_res->aspectMask,
+							.baseMipLevel = src_res->mipLevel,
+							.levelCount = 1,
+							.baseArrayLayer = src_array_slice,
+							.layerCount = 1
+						},
+					});
 			meta_emit_blit(cmd_buffer,
 				       src_image, &src_iview,
 				       src_offset_0, src_offset_1,
@@ -695,6 +699,8 @@ static VkFormat pipeline_formats[] = {
   VK_FORMAT_R8G8B8A8_UNORM,
   VK_FORMAT_R8G8B8A8_UINT,
   VK_FORMAT_R8G8B8A8_SINT,
+   VK_FORMAT_A2R10G10B10_UINT_PACK32,
+   VK_FORMAT_A2R10G10B10_SINT_PACK32,
   VK_FORMAT_R16G16B16A16_UNORM,
   VK_FORMAT_R16G16B16A16_SNORM,
   VK_FORMAT_R16G16B16A16_UINT,
--- a/src/amd/vulkan/radv_meta_blit2d.c
+++ b/src/amd/vulkan/radv_meta_blit2d.c
@@ -53,7 +53,8 @@ enum blit2d_src_type {
 static void
 create_iview(struct radv_cmd_buffer *cmd_buffer,
             struct radv_meta_blit2d_surf *surf,
-             struct radv_image_view *iview, VkFormat depth_format)
+             struct radv_image_view *iview, VkFormat depth_format,
+              VkImageAspectFlagBits aspects)
 {
 	VkFormat format;

@@ -69,7 +70,7 @@ create_iview(struct radv_cmd_buffer *cmd_buffer,
 					     .viewType = VK_IMAGE_VIEW_TYPE_2D,
 					     .format = format,
 					     .subresourceRange = {
-					     .aspectMask = surf->aspect_mask,
+					     .aspectMask = aspects,
 					     .baseMipLevel = surf->level,
 					     .levelCount = 1,
 					     .baseArrayLayer = surf->layer,
@@ -111,7 +112,8 @@ blit2d_bind_src(struct radv_cmd_buffer *cmd_buffer,
                struct radv_meta_blit2d_surf *src_img,
                struct radv_meta_blit2d_buffer *src_buf,
                struct blit2d_src_temps *tmp,
-                enum blit2d_src_type src_type, VkFormat depth_format)
+                enum blit2d_src_type src_type, VkFormat depth_format,
+                VkImageAspectFlagBits aspects)
 {
 	struct radv_device *device = cmd_buffer->device;

@@ -138,7 +140,7 @@ blit2d_bind_src(struct radv_cmd_buffer *cmd_buffer,
 				      VK_SHADER_STAGE_FRAGMENT_BIT, 16, 4,
 				      &src_buf->pitch);
 	} else {
-		create_iview(cmd_buffer, src_img, &tmp->iview, depth_format);
+		create_iview(cmd_buffer, src_img, &tmp->iview, depth_format, aspects);

 		radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
 					      device->meta_state.blit2d.p_layouts[src_type],
@@ -175,9 +177,10 @@ blit2d_bind_dst(struct radv_cmd_buffer *cmd_buffer,
                uint32_t width,
                uint32_t height,
 		VkFormat depth_format,
-                struct blit2d_dst_temps *tmp)
+                struct blit2d_dst_temps *tmp,
+                VkImageAspectFlagBits aspects)
 {
-	create_iview(cmd_buffer, dst, &tmp->iview, depth_format);
+	create_iview(cmd_buffer, dst, &tmp->iview, depth_format, aspects);

 	radv_CreateFramebuffer(radv_device_to_handle(cmd_buffer->device),
 			       &(VkFramebufferCreateInfo) {
@@ -250,106 +253,111 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer,
 	struct radv_device *device = cmd_buffer->device;

 	for (unsigned r = 0; r < num_rects; ++r) {
-		VkFormat depth_format = 0;
-		if (dst->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
-			depth_format = vk_format_stencil_only(dst->image->vk_format);
-		else if (dst->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT)
-			depth_format = vk_format_depth_only(dst->image->vk_format);
-		struct blit2d_src_temps src_temps;
-		blit2d_bind_src(cmd_buffer, src_img, src_buf, &src_temps, src_type, depth_format);
+		unsigned i;
+		for_each_bit(i, dst->aspect_mask) {
+			unsigned aspect_mask = 1u << i;
+			VkFormat depth_format = 0;
+			if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
+				depth_format = vk_format_stencil_only(dst->image->vk_format);
+			else if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT)
+				depth_format = vk_format_depth_only(dst->image->vk_format);
+			struct blit2d_src_temps src_temps;
+			blit2d_bind_src(cmd_buffer, src_img, src_buf, &src_temps, src_type, depth_format, aspect_mask);

-		struct blit2d_dst_temps dst_temps;
-		blit2d_bind_dst(cmd_buffer, dst, rects[r].dst_x + rects[r].width,
-				rects[r].dst_y + rects[r].height, depth_format, &dst_temps);
+			struct blit2d_dst_temps dst_temps;
+			blit2d_bind_dst(cmd_buffer, dst, rects[r].dst_x + rects[r].width,
+					rects[r].dst_y + rects[r].height, depth_format, &dst_temps, aspect_mask);

-		float vertex_push_constants[4] = {
-			rects[r].src_x,
-			rects[r].src_y,
-			rects[r].src_x + rects[r].width,
-			rects[r].src_y + rects[r].height,
-		};
+			float vertex_push_constants[4] = {
+				rects[r].src_x,
+				rects[r].src_y,
+				rects[r].src_x + rects[r].width,
+				rects[r].src_y + rects[r].height,
+			};

-		radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
-				      device->meta_state.blit2d.p_layouts[src_type],
-				      VK_SHADER_STAGE_VERTEX_BIT, 0, 16,
-				      vertex_push_constants);
+			radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+					device->meta_state.blit2d.p_layouts[src_type],
+					VK_SHADER_STAGE_VERTEX_BIT, 0, 16,
+					vertex_push_constants);

-		if (dst->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) {
-			unsigned fs_key = radv_format_meta_fs_key(dst_temps.iview.vk_format);
+			if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) {
+				unsigned fs_key = radv_format_meta_fs_key(dst_temps.iview.vk_format);

-			radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
-						      &(VkRenderPassBeginInfo) {
-							      .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
-								      .renderPass = device->meta_state.blit2d.render_passes[fs_key],
-								      .framebuffer = dst_temps.fb,
-								      .renderArea = {
-								      .offset = { rects[r].dst_x, rects[r].dst_y, },
-								      .extent = { rects[r].width, rects[r].height },
-							      },
-								      .clearValueCount = 0,
-									       .pClearValues = NULL,
-									       }, VK_SUBPASS_CONTENTS_INLINE);
+				radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
+							&(VkRenderPassBeginInfo) {
+								.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+									.renderPass = device->meta_state.blit2d.render_passes[fs_key],
+									.framebuffer = dst_temps.fb,
+									.renderArea = {
+									.offset = { rects[r].dst_x, rects[r].dst_y, },
+									.extent = { rects[r].width, rects[r].height },
+								},
+									.clearValueCount = 0,
+										.pClearValues = NULL,
+										}, VK_SUBPASS_CONTENTS_INLINE);


-			bind_pipeline(cmd_buffer, src_type, fs_key);
-		} else if (dst->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
-			radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
-						      &(VkRenderPassBeginInfo) {
-							      .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
-								      .renderPass = device->meta_state.blit2d.depth_only_rp,
-								      .framebuffer = dst_temps.fb,
-								      .renderArea = {
-								      .offset = { rects[r].dst_x, rects[r].dst_y, },
-								      .extent = { rects[r].width, rects[r].height },
-							      },
-								      .clearValueCount = 0,
-									       .pClearValues = NULL,
-									       }, VK_SUBPASS_CONTENTS_INLINE);
+				bind_pipeline(cmd_buffer, src_type, fs_key);
+			} else if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
+				radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
+							&(VkRenderPassBeginInfo) {
+								.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+									.renderPass = device->meta_state.blit2d.depth_only_rp,
+									.framebuffer = dst_temps.fb,
+									.renderArea = {
+									.offset = { rects[r].dst_x, rects[r].dst_y, },
+									.extent = { rects[r].width, rects[r].height },
+								},
+									.clearValueCount = 0,
+										.pClearValues = NULL,
+										}, VK_SUBPASS_CONTENTS_INLINE);


-			bind_depth_pipeline(cmd_buffer, src_type);
+				bind_depth_pipeline(cmd_buffer, src_type);

-		} else if (dst->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
-			radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
-						      &(VkRenderPassBeginInfo) {
-							      .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
-								      .renderPass = device->meta_state.blit2d.stencil_only_rp,
-								      .framebuffer = dst_temps.fb,
-								      .renderArea = {
-								      .offset = { rects[r].dst_x, rects[r].dst_y, },
-								      .extent = { rects[r].width, rects[r].height },
-							      },
-								      .clearValueCount = 0,
-									       .pClearValues = NULL,
-									       }, VK_SUBPASS_CONTENTS_INLINE);
+			} else if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
+				radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
+							&(VkRenderPassBeginInfo) {
+								.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+									.renderPass = device->meta_state.blit2d.stencil_only_rp,
+									.framebuffer = dst_temps.fb,
+									.renderArea = {
+									.offset = { rects[r].dst_x, rects[r].dst_y, },
+									.extent = { rects[r].width, rects[r].height },
+								},
+									.clearValueCount = 0,
+										.pClearValues = NULL,
+										}, VK_SUBPASS_CONTENTS_INLINE);


-			bind_stencil_pipeline(cmd_buffer, src_type);
+				bind_stencil_pipeline(cmd_buffer, src_type);
+			} else
+				unreachable("Processing blit2d with multiple aspects.");
+
+			radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
+				.x = rects[r].dst_x,
+				.y = rects[r].dst_y,
+				.width = rects[r].width,
+				.height = rects[r].height,
+				.minDepth = 0.0f,
+				.maxDepth = 1.0f
+			});
+
+			radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) {
+				.offset = (VkOffset2D) { rects[r].dst_x, rects[r].dst_y },
+				.extent = (VkExtent2D) { rects[r].width, rects[r].height },
+			});
+
+
+
+			radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
+			radv_CmdEndRenderPass(radv_cmd_buffer_to_handle(cmd_buffer));
+
+			/* At the point where we emit the draw call, all data from the
+			* descriptor sets, etc. has been used.  We are free to delete it.
+			*/
+			blit2d_unbind_dst(cmd_buffer, &dst_temps);
 		}
-
-		radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
-			.x = rects[r].dst_x,
-			.y = rects[r].dst_y,
-			.width = rects[r].width,
-			.height = rects[r].height,
-			.minDepth = 0.0f,
-			.maxDepth = 1.0f
-		});
-
-		radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) {
-			.offset = (VkOffset2D) { rects[r].dst_x, rects[r].dst_y },
-			.extent = (VkExtent2D) { rects[r].width, rects[r].height },
-		});
-
-
-
-		radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
-		radv_CmdEndRenderPass(radv_cmd_buffer_to_handle(cmd_buffer));
-
-		/* At the point where we emit the draw call, all data from the
-		 * descriptor sets, etc. has been used.  We are free to delete it.
-		 */
-		blit2d_unbind_dst(cmd_buffer, &dst_temps);
 	}
 }

@@ -1134,6 +1142,8 @@ static VkFormat pipeline_formats[] = {
   VK_FORMAT_R8G8B8A8_UNORM,
   VK_FORMAT_R8G8B8A8_UINT,
   VK_FORMAT_R8G8B8A8_SINT,
+   VK_FORMAT_A2R10G10B10_UINT_PACK32,
+   VK_FORMAT_A2R10G10B10_SINT_PACK32,
   VK_FORMAT_R16G16B16A16_UNORM,
   VK_FORMAT_R16G16B16A16_SNORM,
   VK_FORMAT_R16G16B16A16_UINT,
--- a/src/amd/vulkan/radv_meta_clear.c
+++ b/src/amd/vulkan/radv_meta_clear.c
@@ -754,6 +754,8 @@ static VkFormat pipeline_formats[] = {
 	VK_FORMAT_R8G8B8A8_UNORM,
 	VK_FORMAT_R8G8B8A8_UINT,
 	VK_FORMAT_R8G8B8A8_SINT,
+	VK_FORMAT_A2R10G10B10_UINT_PACK32,
+	VK_FORMAT_A2R10G10B10_SINT_PACK32,
 	VK_FORMAT_R16G16B16A16_UNORM,
 	VK_FORMAT_R16G16B16A16_SNORM,
 	VK_FORMAT_R16G16B16A16_UINT,
@@ -977,7 +979,7 @@ emit_fast_color_clear(struct radv_cmd_buffer *cmd_buffer,
 	if (iview->image->info.levels > 1)
 		goto fail;

-	if (iview->image->surface.u.legacy.level[0].mode < RADEON_SURF_MODE_1D)
+	if (iview->image->surface.is_linear)
 		goto fail;
 	if (!radv_image_extent_compare(iview->image, &iview->extent))
 		goto fail;
@@ -1174,6 +1176,9 @@ radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer,
 {
 	VkDevice device_h = radv_device_to_handle(cmd_buffer->device);
 	struct radv_image_view iview;
+	uint32_t width = radv_minify(image->info.width, range->baseMipLevel + level);
+	uint32_t height = radv_minify(image->info.height, range->baseMipLevel + level);
+
 	radv_image_view_init(&iview, cmd_buffer->device,
 			     &(VkImageViewCreateInfo) {
 				     .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
@@ -1197,9 +1202,9 @@ radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer,
 					       .pAttachments = (VkImageView[]) {
 					       radv_image_view_to_handle(&iview),
 				       },
-					       .width = iview.extent.width,
-							.height = iview.extent.height,
-							.layers = 1
+					       .width = width,
+					       .height = height,
+					       .layers = 1
 			       },
 			       &cmd_buffer->pool->alloc,
 			       &fb);
@@ -1255,8 +1260,8 @@ radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer,
 						.renderArea = {
 						.offset = { 0, 0, },
 						.extent = {
-							.width = iview.extent.width,
-							.height = iview.extent.height,
+							.width = width,
+							.height = height,
 						},
 					},
 						.renderPass = pass,
@@ -1275,7 +1280,7 @@ radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer,
 	VkClearRect clear_rect = {
 		.rect = {
 			.offset = { 0, 0 },
-			.extent = { iview.extent.width, iview.extent.height },
+			.extent = { width, height },
 		},
 		.baseArrayLayer = range->baseArrayLayer,
 		.layerCount = 1, /* FINISHME: clear multi-layer framebuffer */
--- a/src/amd/vulkan/radv_meta_decompress.c
+++ b/src/amd/vulkan/radv_meta_decompress.c
@@ -29,7 +29,9 @@
 #include "sid.h"

 static VkResult
-create_pass(struct radv_device *device)
+create_pass(struct radv_device *device,
+	    uint32_t samples,
+	    VkRenderPass *pass)
 {
 	VkResult result;
 	VkDevice device_h = radv_device_to_handle(device);
@@ -37,7 +39,7 @@ create_pass(struct radv_device *device)
 	VkAttachmentDescription attachment;

 	attachment.format = VK_FORMAT_D32_SFLOAT_S8_UINT;
-	attachment.samples = 1;
+	attachment.samples = samples;
 	attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
 	attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
 	attachment.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
@@ -65,14 +67,18 @@ create_pass(struct radv_device *device)
 								.dependencyCount = 0,
 								   },
 				       alloc,
-				       &device->meta_state.depth_decomp.pass);
+				       pass);

 	return result;
 }

 static VkResult
 create_pipeline(struct radv_device *device,
-                VkShaderModule vs_module_h)
+                VkShaderModule vs_module_h,
+		uint32_t samples,
+		VkRenderPass pass,
+		VkPipeline *decompress_pipeline,
+		VkPipeline *resummarize_pipeline)
 {
 	VkResult result;
 	VkDevice device_h = radv_device_to_handle(device);
@@ -129,7 +135,7 @@ create_pipeline(struct radv_device *device,
 		},
 		.pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
 			.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
-			.rasterizationSamples = 1,
+			.rasterizationSamples = samples,
 			.sampleShadingEnable = false,
 			.pSampleMask = NULL,
 			.alphaToCoverageEnable = false,
@@ -156,7 +162,7 @@ create_pipeline(struct radv_device *device,
 				VK_DYNAMIC_STATE_SCISSOR,
 			},
 		},
-		.renderPass = device->meta_state.depth_decomp.pass,
+		.renderPass = pass,
 		.subpass = 0,
 	};

@@ -169,7 +175,7 @@ create_pipeline(struct radv_device *device,
 							.db_flush_stencil_inplace = true,
 					       },
 					       &device->meta_state.alloc,
-					       &device->meta_state.depth_decomp.decompress_pipeline);
+					       decompress_pipeline);
 	if (result != VK_SUCCESS)
 		goto cleanup;

@@ -183,7 +189,7 @@ create_pipeline(struct radv_device *device,
 							.db_resummarize = true,
 					       },
 					       &device->meta_state.alloc,
-					       &device->meta_state.depth_decomp.resummarize_pipeline);
+					       resummarize_pipeline);
 	if (result != VK_SUCCESS)
 		goto cleanup;

@@ -199,29 +205,31 @@ radv_device_finish_meta_depth_decomp_state(struct radv_device *device)
 {
 	struct radv_meta_state *state = &device->meta_state;
 	VkDevice device_h = radv_device_to_handle(device);
-	VkRenderPass pass_h = device->meta_state.depth_decomp.pass;
 	const VkAllocationCallbacks *alloc = &device->meta_state.alloc;

-	if (pass_h)
-		radv_DestroyRenderPass(device_h, pass_h,
-					     &device->meta_state.alloc);
-
-	VkPipeline pipeline_h = state->depth_decomp.decompress_pipeline;
-	if (pipeline_h) {
-		radv_DestroyPipeline(device_h, pipeline_h, alloc);
-	}
-	pipeline_h = state->depth_decomp.resummarize_pipeline;
-	if (pipeline_h) {
-		radv_DestroyPipeline(device_h, pipeline_h, alloc);
+	for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
+		VkRenderPass pass_h = state->depth_decomp[i].pass;
+		if (pass_h) {
+			radv_DestroyRenderPass(device_h, pass_h, alloc);
+		}
+		VkPipeline pipeline_h = state->depth_decomp[i].decompress_pipeline;
+		if (pipeline_h) {
+			radv_DestroyPipeline(device_h, pipeline_h, alloc);
+		}
+		pipeline_h = state->depth_decomp[i].resummarize_pipeline;
+		if (pipeline_h) {
+			radv_DestroyPipeline(device_h, pipeline_h, alloc);
+		}
 	}
 }

 VkResult
 radv_device_init_meta_depth_decomp_state(struct radv_device *device)
 {
+	struct radv_meta_state *state = &device->meta_state;
 	VkResult res = VK_SUCCESS;

-	zero(device->meta_state.depth_decomp);
+	zero(state->depth_decomp);

 	struct radv_shader_module vs_module = { .nir = radv_meta_build_nir_vs_generate_vertices() };
 	if (!vs_module.nir) {
@@ -230,14 +238,22 @@ radv_device_init_meta_depth_decomp_state(struct radv_device *device)
 		goto fail;
 	}

-	res = create_pass(device);
-	if (res != VK_SUCCESS)
-		goto fail;
-
 	VkShaderModule vs_module_h = radv_shader_module_to_handle(&vs_module);
-	res = create_pipeline(device, vs_module_h);
-	if (res != VK_SUCCESS)
-		goto fail;
+
+	for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
+		uint32_t samples = 1 << i;
+
+		res = create_pass(device, samples, &state->depth_decomp[i].pass);
+		if (res != VK_SUCCESS)
+			goto fail;
+
+		res = create_pipeline(device, vs_module_h, samples,
+				      state->depth_decomp[i].pass,
+				      &state->depth_decomp[i].decompress_pipeline,
+				      &state->depth_decomp[i].resummarize_pipeline);
+		if (res != VK_SUCCESS)
+			goto fail;
+	}

 	goto cleanup;

@@ -283,10 +299,15 @@ emit_depth_decomp(struct radv_cmd_buffer *cmd_buffer,
 }


+enum radv_depth_op {
+	DEPTH_DECOMPRESS,
+	DEPTH_RESUMMARIZE,
+};
+
 static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
 					     struct radv_image *image,
 					     VkImageSubresourceRange *subresourceRange,
-					     VkPipeline pipeline_h)
+					     enum radv_depth_op op)
 {
 	struct radv_meta_saved_state saved_state;
 	struct radv_meta_saved_pass_state saved_pass_state;
@@ -296,6 +317,9 @@ static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
 				     subresourceRange->baseMipLevel);
 	uint32_t height = radv_minify(image->info.height,
 				     subresourceRange->baseMipLevel);
+	uint32_t samples = image->info.samples;
+	uint32_t samples_log2 = ffs(samples) - 1;
+	struct radv_meta_state *meta_state = &cmd_buffer->device->meta_state;

 	if (!image->surface.htile_size)
 		return;
@@ -339,7 +363,7 @@ static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
 		radv_CmdBeginRenderPass(cmd_buffer_h,
 					      &(VkRenderPassBeginInfo) {
 						      .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
-							      .renderPass = cmd_buffer->device->meta_state.depth_decomp.pass,
+							      .renderPass = meta_state->depth_decomp[samples_log2].pass,
 							      .framebuffer = fb_h,
 							      .renderArea = {
 							      .offset = {
@@ -356,6 +380,18 @@ static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
 					   },
 					   VK_SUBPASS_CONTENTS_INLINE);

+		VkPipeline pipeline_h;
+		switch (op) {
+		case DEPTH_DECOMPRESS:
+			pipeline_h = meta_state->depth_decomp[samples_log2].decompress_pipeline;
+			break;
+		case DEPTH_RESUMMARIZE:
+			pipeline_h = meta_state->depth_decomp[samples_log2].resummarize_pipeline;
+			break;
+		default:
+			unreachable("unknown operation");
+		}
+
 		emit_depth_decomp(cmd_buffer, &(VkOffset2D){0, 0 }, &(VkExtent2D){width, height}, pipeline_h);
 		radv_CmdEndRenderPass(cmd_buffer_h);

@@ -371,8 +407,7 @@ void radv_decompress_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
 					 VkImageSubresourceRange *subresourceRange)
 {
 	assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
-	radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange,
-					 cmd_buffer->device->meta_state.depth_decomp.decompress_pipeline);
+	radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange, DEPTH_DECOMPRESS);
 }

 void radv_resummarize_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
@@ -380,6 +415,5 @@ void radv_resummarize_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
 					 VkImageSubresourceRange *subresourceRange)
 {
 	assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
-	radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange,
-					 cmd_buffer->device->meta_state.depth_decomp.resummarize_pipeline);
+	radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange, DEPTH_RESUMMARIZE);
 }
--- a/src/amd/vulkan/radv_meta_resolve.c
+++ b/src/amd/vulkan/radv_meta_resolve.c
@@ -382,6 +382,11 @@ void radv_CmdResolveImage(
 	radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer);

 	assert(src_image->info.samples > 1);
+	if (src_image->info.samples <= 1) {
+		/* this causes GPU hangs if we get past here */
+		fprintf(stderr, "radv: Illegal resolve operation (src not multisampled), will hang GPU.");
+		return;
+	}
 	assert(dest_image->info.samples == 1);

 	if (src_image->info.samples >= 16) {
@@ -607,13 +612,6 @@ radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer)

 		radv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass, false);

-		/* Subpass resolves must respect the render area. We can ignore the
-		 * render area here because vkCmdBeginRenderPass set the render area
-		 * with 3DSTATE_DRAWING_RECTANGLE.
-		 *
-		 * XXX(chadv): Does the hardware really respect
-		 * 3DSTATE_DRAWING_RECTANGLE when draing a 3DPRIM_RECTLIST?
-		 */
 		emit_resolve(cmd_buffer,
 			     &(VkOffset2D) { 0, 0 },
 			     &(VkExtent2D) { fb->width, fb->height });
--- a/src/amd/vulkan/radv_meta_resolve_cs.c
+++ b/src/amd/vulkan/radv_meta_resolve_cs.c
@@ -31,6 +31,45 @@
 #include "sid.h"
 #include "vk_format.h"

+static nir_ssa_def *radv_meta_build_resolve_srgb_conversion(nir_builder *b,
+							    nir_ssa_def *input)
+{
+	nir_const_value v;
+	unsigned i;
+	v.u32[0] = 0x3b4d2e1c; // 0.00313080009
+
+	nir_ssa_def *cmp[3];
+	for (i = 0; i < 3; i++)
+		cmp[i] = nir_flt(b, nir_channel(b, input, i),
+				 nir_build_imm(b, 1, 32, v));
+
+	nir_ssa_def *ltvals[3];
+	v.f32[0] = 12.92;
+	for (i = 0; i < 3; i++)
+		ltvals[i] = nir_fmul(b, nir_channel(b, input, i),
+				     nir_build_imm(b, 1, 32, v));
+
+	nir_ssa_def *gtvals[3];
+
+	for (i = 0; i < 3; i++) {
+		v.f32[0] = 1.0/2.4;
+		gtvals[i] = nir_fpow(b, nir_channel(b, input, i),
+				     nir_build_imm(b, 1, 32, v));
+		v.f32[0] = 1.055;
+		gtvals[i] = nir_fmul(b, gtvals[i],
+				     nir_build_imm(b, 1, 32, v));
+		v.f32[0] = 0.055;
+		gtvals[i] = nir_fsub(b, gtvals[i],
+				     nir_build_imm(b, 1, 32, v));
+	}
+
+	nir_ssa_def *comp[4];
+	for (i = 0; i < 3; i++)
+		comp[i] = nir_bcsel(b, cmp[i], ltvals[i], gtvals[i]);
+	comp[3] = nir_channels(b, input, 1 << 3);
+	return nir_vec(b, comp, 4);
+}
+
 static nir_shader *
 build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_srgb, int samples)
 {
@@ -88,10 +127,13 @@ build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_s
 	nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, global_id, &src_offset->dest.ssa), 0x3);
 	nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");

-	radv_meta_build_resolve_shader_core(&b, is_integer, is_srgb, samples,
-					    input_img, color, img_coord);
+	radv_meta_build_resolve_shader_core(&b, is_integer, samples, input_img,
+	                                    color, img_coord);

 	nir_ssa_def *outval = nir_load_var(&b, color);
+	if (is_srgb)
+		outval = radv_meta_build_resolve_srgb_conversion(&b, outval);
+
 	nir_ssa_def *coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);
 	nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_store);
 	store->src[0] = nir_src_for_ssa(coord);
@@ -402,7 +444,7 @@ void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
 						     .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
 							     .image = radv_image_to_handle(dest_image),
 							     .viewType = radv_meta_get_view_type(dest_image),
-							     .format = dest_image->vk_format,
+							     .format = vk_to_non_srgb_format(dest_image->vk_format),
 							     .subresourceRange = {
 							     .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
 							     .baseMipLevel = region->dstSubresource.mipLevel,
@@ -479,21 +521,6 @@ radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer)
 		if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
 			continue;

-		struct radv_subpass resolve_subpass = {
-			.color_count = 1,
-			.color_attachments = (VkAttachmentReference[]) { dest_att },
-			.depth_stencil_attachment = { .attachment = VK_ATTACHMENT_UNUSED },
-		};
-
-		radv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass, false);
-
-		/* Subpass resolves must respect the render area. We can ignore the
-		 * render area here because vkCmdBeginRenderPass set the render area
-		 * with 3DSTATE_DRAWING_RECTANGLE.
-		 *
-		 * XXX(chadv): Does the hardware really respect
-		 * 3DSTATE_DRAWING_RECTANGLE when draing a 3DPRIM_RECTLIST?
-		 */
 		emit_resolve(cmd_buffer,
 			     src_iview,
 			     dst_iview,
--- a/src/amd/vulkan/radv_meta_resolve_fs.c
+++ b/src/amd/vulkan/radv_meta_resolve_fs.c
@@ -51,7 +51,7 @@ build_nir_vertex_shader(void)
 }

 static nir_shader *
-build_resolve_fragment_shader(struct radv_device *dev, bool is_integer, bool is_srgb, int samples)
+build_resolve_fragment_shader(struct radv_device *dev, bool is_integer, int samples)
 {
 	nir_builder b;
 	char name[64];
@@ -62,7 +62,7 @@ build_resolve_fragment_shader(struct radv_device *dev, bool is_integer, bool is_
 								 false,
 								 GLSL_TYPE_FLOAT);

-	snprintf(name, 64, "meta_resolve_fs-%d-%s", samples, is_integer ? "int" : (is_srgb ? "srgb" : "float"));
+	snprintf(name, 64, "meta_resolve_fs-%d-%s", samples, is_integer ? "int" : "float");
 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
 	b.shader->info.name = ralloc_strdup(b.shader, name);

@@ -92,8 +92,8 @@ build_resolve_fragment_shader(struct radv_device *dev, bool is_integer, bool is_
 	nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, pos_int, &src_offset->dest.ssa), 0x3);
 	nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");

-	radv_meta_build_resolve_shader_core(&b, is_integer, is_srgb,samples,
-					    input_img, color, img_coord);
+	radv_meta_build_resolve_shader_core(&b, is_integer, samples, input_img,
+	                                    color, img_coord);

 	nir_ssa_def *outval = nir_load_var(&b, color);
 	nir_store_var(&b, color_out, outval, 0xf);
@@ -160,6 +160,8 @@ static VkFormat pipeline_formats[] = {
   VK_FORMAT_R8G8B8A8_UNORM,
   VK_FORMAT_R8G8B8A8_UINT,
   VK_FORMAT_R8G8B8A8_SINT,
+   VK_FORMAT_A2R10G10B10_UINT_PACK32,
+   VK_FORMAT_A2R10G10B10_SINT_PACK32,
   VK_FORMAT_R16G16B16A16_UNORM,
   VK_FORMAT_R16G16B16A16_SNORM,
   VK_FORMAT_R16G16B16A16_UINT,
@@ -175,31 +177,25 @@ create_resolve_pipeline(struct radv_device *device,
 			VkFormat format)
 {
 	VkResult result;
-	bool is_integer = false, is_srgb = false;
+	bool is_integer = false;
 	uint32_t samples = 1 << samples_log2;
 	unsigned fs_key = radv_format_meta_fs_key(format);
 	const VkPipelineVertexInputStateCreateInfo *vi_create_info;
 	vi_create_info = &normal_vi_create_info;
 	if (vk_format_is_int(format))
 		is_integer = true;
-	else if (vk_format_is_srgb(format))
-		is_srgb = true;

 	struct radv_shader_module fs = { .nir = NULL };
-	fs.nir = build_resolve_fragment_shader(device, is_integer, is_srgb, samples);
+	fs.nir = build_resolve_fragment_shader(device, is_integer, samples);
 	struct radv_shader_module vs = {
 		.nir = build_nir_vertex_shader(),
 	};

-	VkRenderPass *rp = is_srgb ?
-		&device->meta_state.resolve_fragment.rc[samples_log2].srgb_render_pass :
-		&device->meta_state.resolve_fragment.rc[samples_log2].render_pass[fs_key];
+	VkRenderPass *rp = &device->meta_state.resolve_fragment.rc[samples_log2].render_pass[fs_key];

 	assert(!*rp);

-	VkPipeline *pipeline = is_srgb ?
-		&device->meta_state.resolve_fragment.rc[samples_log2].srgb_pipeline :
-		&device->meta_state.resolve_fragment.rc[samples_log2].pipeline[fs_key];
+	VkPipeline *pipeline = &device->meta_state.resolve_fragment.rc[samples_log2].pipeline[fs_key];
 	assert(!*pipeline);

 	VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
@@ -348,8 +344,6 @@ radv_device_init_meta_resolve_fragment_state(struct radv_device *device)
 		for (unsigned j = 0; j < ARRAY_SIZE(pipeline_formats); ++j) {
 			res = create_resolve_pipeline(device, i, pipeline_formats[j]);
 		}
-
-		res = create_resolve_pipeline(device, i, VK_FORMAT_R8G8B8A8_SRGB);
 	}

 	return res;
@@ -368,12 +362,6 @@ radv_device_finish_meta_resolve_fragment_state(struct radv_device *device)
 					     state->resolve_fragment.rc[i].pipeline[j],
 					     &state->alloc);
 		}
-		radv_DestroyRenderPass(radv_device_to_handle(device),
-				       state->resolve_fragment.rc[i].srgb_render_pass,
-					       &state->alloc);
-		radv_DestroyPipeline(radv_device_to_handle(device),
-				     state->resolve_fragment.rc[i].srgb_pipeline,
-				     &state->alloc);
 	}

 	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
@@ -430,9 +418,7 @@ emit_resolve(struct radv_cmd_buffer *cmd_buffer,
 			      push_constants);

 	unsigned fs_key = radv_format_meta_fs_key(dest_iview->vk_format);
-	VkPipeline pipeline_h = vk_format_is_srgb(dest_iview->vk_format) ?
-		device->meta_state.resolve_fragment.rc[samples_log2].srgb_pipeline :
-		device->meta_state.resolve_fragment.rc[samples_log2].pipeline[fs_key];
+	VkPipeline pipeline_h = device->meta_state.resolve_fragment.rc[samples_log2].pipeline[fs_key];

 	radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
 			     pipeline_h);
@@ -483,9 +469,7 @@ void radv_meta_resolve_fragment_image(struct radv_cmd_buffer *cmd_buffer,
 		radv_fast_clear_flush_image_inplace(cmd_buffer, src_image, &range);
 	}

-	rp = vk_format_is_srgb(dest_image->vk_format) ?
-		device->meta_state.resolve_fragment.rc[samples_log2].srgb_render_pass :
-		device->meta_state.resolve_fragment.rc[samples_log2].render_pass[fs_key];
+	rp = device->meta_state.resolve_fragment.rc[samples_log2].render_pass[fs_key];
 	radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer);

 	for (uint32_t r = 0; r < region_count; ++r) {
@@ -649,13 +633,6 @@ radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer)

 		radv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass, false);

-		/* Subpass resolves must respect the render area. We can ignore the
-		 * render area here because vkCmdBeginRenderPass set the render area
-		 * with 3DSTATE_DRAWING_RECTANGLE.
-		 *
-		 * XXX(chadv): Does the hardware really respect
-		 * 3DSTATE_DRAWING_RECTANGLE when draing a 3DPRIM_RECTLIST?
-		 */
 		emit_resolve(cmd_buffer,
 			     src_iview,
 			     dest_iview,
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -65,6 +65,7 @@ static const struct nir_shader_compiler_options nir_options = {
 	.lower_unpack_unorm_4x8 = true,
 	.lower_extract_byte = true,
 	.lower_extract_word = true,
+	.lower_ffma = true,
 	.max_unroll_iterations = 32
 };

@@ -161,6 +162,7 @@ radv_optimize_nir(struct nir_shader *shader)
                if (nir_opt_trivial_continues(shader)) {
                        progress = true;
                        NIR_PASS(progress, shader, nir_copy_prop);
+                        NIR_PASS(progress, shader, nir_opt_remove_phis);
                        NIR_PASS(progress, shader, nir_opt_dce);
                }
                NIR_PASS(progress, shader, nir_opt_if);
@@ -273,8 +275,32 @@ radv_shader_compile_to_nir(struct radv_device *device,

 	nir_shader_gather_info(nir, entry_point->impl);

+	/* While it would be nice not to have this flag, we are constrained
+	 * by the reality that LLVM 5.0 doesn't have working VGPR indexing
+	 * on GFX9.
+	 */
+	bool llvm_has_working_vgpr_indexing =
+		device->physical_device->rad_info.chip_class <= VI;
+
+	/* TODO: Indirect indexing of GS inputs is unimplemented.
+	 *
+	 * TCS and TES load inputs directly from LDS or offchip memory, so
+	 * indirect indexing is trivial.
+	 */
 	nir_variable_mode indirect_mask = 0;
 	indirect_mask |= nir_var_shader_in;
+
+	if (!llvm_has_working_vgpr_indexing &&
+	    nir->info.stage != MESA_SHADER_TESS_CTRL)
+		indirect_mask |= nir_var_shader_out;
+
+        /* TODO: We shouldn't need to do this, however LLVM isn't currently
+	 * smart enough to handle indirects without causing excess spilling
+	 * causing the gpu to hang.
+	 *
+	 * See the following thread for more details of the problem:
+	 * https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html
+	 */
 	indirect_mask |= nir_var_local;

 	nir_lower_indirect_derefs(nir, indirect_mask);
@@ -852,6 +878,79 @@ static uint32_t si_translate_blend_factor(VkBlendFactor factor)
 	}
 }

+static uint32_t si_translate_blend_opt_function(VkBlendOp op)
+{
+	switch (op) {
+	case VK_BLEND_OP_ADD:
+		return V_028760_OPT_COMB_ADD;
+	case VK_BLEND_OP_SUBTRACT:
+		return V_028760_OPT_COMB_SUBTRACT;
+	case VK_BLEND_OP_REVERSE_SUBTRACT:
+		return V_028760_OPT_COMB_REVSUBTRACT;
+	case VK_BLEND_OP_MIN:
+		return V_028760_OPT_COMB_MIN;
+	case VK_BLEND_OP_MAX:
+		return V_028760_OPT_COMB_MAX;
+	default:
+		return V_028760_OPT_COMB_BLEND_DISABLED;
+	}
+}
+
+static uint32_t si_translate_blend_opt_factor(VkBlendFactor factor, bool is_alpha)
+{
+	switch (factor) {
+	case VK_BLEND_FACTOR_ZERO:
+		return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL;
+	case VK_BLEND_FACTOR_ONE:
+		return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE;
+	case VK_BLEND_FACTOR_SRC_COLOR:
+		return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0
+				: V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0;
+	case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
+		return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1
+				: V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1;
+	case VK_BLEND_FACTOR_SRC_ALPHA:
+		return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0;
+	case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
+		return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1;
+	case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
+		return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE
+				: V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
+	default:
+		return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
+	}
+}
+
+/**
+ * Get rid of DST in the blend factors by commuting the operands:
+ *    func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
+ */
+static void si_blend_remove_dst(unsigned *func, unsigned *src_factor,
+				unsigned *dst_factor, unsigned expected_dst,
+				unsigned replacement_src)
+{
+	if (*src_factor == expected_dst &&
+	    *dst_factor == VK_BLEND_FACTOR_ZERO) {
+		*src_factor = VK_BLEND_FACTOR_ZERO;
+		*dst_factor = replacement_src;
+
+		/* Commuting the operands requires reversing subtractions. */
+		if (*func == VK_BLEND_OP_SUBTRACT)
+			*func = VK_BLEND_OP_REVERSE_SUBTRACT;
+		else if (*func == VK_BLEND_OP_REVERSE_SUBTRACT)
+			*func = VK_BLEND_OP_SUBTRACT;
+	}
+}
+
+static bool si_blend_factor_uses_dst(unsigned factor)
+{
+	return factor == VK_BLEND_FACTOR_DST_COLOR ||
+		factor == VK_BLEND_FACTOR_DST_ALPHA ||
+		factor == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE ||
+		factor == VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA ||
+		factor == VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR;
+}
+
 static bool is_dual_src(VkBlendFactor factor)
 {
 	switch (factor) {
@@ -1067,20 +1166,37 @@ format_is_int8(VkFormat format)
 	       desc->channel[channel].size == 8;
 }

+static bool
+format_is_int10(VkFormat format)
+{
+	const struct vk_format_description *desc = vk_format_description(format);
+
+	if (desc->nr_channels != 4)
+		return false;
+	for (unsigned i = 0; i < 4; i++) {
+		if (desc->channel[i].pure_integer && desc->channel[i].size == 10)
+			return true;
+	}
+	return false;
+}
+
 unsigned radv_format_meta_fs_key(VkFormat format)
 {
 	unsigned col_format = si_choose_spi_color_format(format, false, false) - 1;
 	bool is_int8 = format_is_int8(format);
+	bool is_int10 = format_is_int10(format);

-	return col_format + (is_int8 ? 3 : 0);
+	return col_format + (is_int8 ? 3 : is_int10 ? 5 : 0);
 }

-static unsigned
-radv_pipeline_compute_is_int8(const VkGraphicsPipelineCreateInfo *pCreateInfo)
+static void
+radv_pipeline_compute_get_int_clamp(const VkGraphicsPipelineCreateInfo *pCreateInfo,
+				    unsigned *is_int8, unsigned *is_int10)
 {
 	RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
 	struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
-	unsigned is_int8 = 0;
+	*is_int8 = 0;
+	*is_int10 = 0;

 	for (unsigned i = 0; i < subpass->color_count; ++i) {
 		struct radv_render_pass_attachment *attachment;
@@ -1091,10 +1207,10 @@ radv_pipeline_compute_is_int8(const VkGraphicsPipelineCreateInfo *pCreateInfo)
 		attachment = pass->attachments + subpass->color_attachments[i].attachment;

 		if (format_is_int8(attachment->format))
-			is_int8 |= 1 << i;
+			*is_int8 |= 1 << i;
+		if (format_is_int10(attachment->format))
+			*is_int10 |= 1 << i;
 	}
-
-	return is_int8;
 }

 static void
@@ -1132,6 +1248,7 @@ radv_pipeline_init_blend_state(struct radv_pipeline *pipeline,
 	for (i = 0; i < vkblend->attachmentCount; i++) {
 		const VkPipelineColorBlendAttachmentState *att = &vkblend->pAttachments[i];
 		unsigned blend_cntl = 0;
+		unsigned srcRGB_opt, dstRGB_opt, srcA_opt, dstA_opt;
 		VkBlendOp eqRGB = att->colorBlendOp;
 		VkBlendFactor srcRGB = att->srcColorBlendFactor;
 		VkBlendFactor dstRGB = att->dstColorBlendFactor;
@@ -1139,7 +1256,7 @@ radv_pipeline_init_blend_state(struct radv_pipeline *pipeline,
 		VkBlendFactor srcA = att->srcAlphaBlendFactor;
 		VkBlendFactor dstA = att->dstAlphaBlendFactor;

-		blend->sx_mrt0_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) | S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
+		blend->sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) | S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);

 		if (!att->colorWriteMask)
 			continue;
@@ -1163,6 +1280,50 @@ radv_pipeline_init_blend_state(struct radv_pipeline *pipeline,
 			dstA = VK_BLEND_FACTOR_ONE;
 		}

+		/* Blending optimizations for RB+.
+		 * These transformations don't change the behavior.
+		 *
+		 * First, get rid of DST in the blend factors:
+		 *    func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
+		 */
+		si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB,
+				    VK_BLEND_FACTOR_DST_COLOR,
+				    VK_BLEND_FACTOR_SRC_COLOR);
+
+		si_blend_remove_dst(&eqA, &srcA, &dstA,
+				    VK_BLEND_FACTOR_DST_COLOR,
+				    VK_BLEND_FACTOR_SRC_COLOR);
+
+		si_blend_remove_dst(&eqA, &srcA, &dstA,
+				    VK_BLEND_FACTOR_DST_ALPHA,
+				    VK_BLEND_FACTOR_SRC_ALPHA);
+
+		/* Look up the ideal settings from tables. */
+		srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false);
+		dstRGB_opt = si_translate_blend_opt_factor(dstRGB, false);
+		srcA_opt = si_translate_blend_opt_factor(srcA, true);
+		dstA_opt = si_translate_blend_opt_factor(dstA, true);
+
+				/* Handle interdependencies. */
+		if (si_blend_factor_uses_dst(srcRGB))
+			dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
+		if (si_blend_factor_uses_dst(srcA))
+			dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
+
+		if (srcRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE &&
+		    (dstRGB == VK_BLEND_FACTOR_ZERO ||
+		     dstRGB == VK_BLEND_FACTOR_SRC_ALPHA ||
+		     dstRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE))
+			dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
+
+		/* Set the final value. */
+		blend->sx_mrt_blend_opt[i] =
+			S_028760_COLOR_SRC_OPT(srcRGB_opt) |
+			S_028760_COLOR_DST_OPT(dstRGB_opt) |
+			S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) |
+			S_028760_ALPHA_SRC_OPT(srcA_opt) |
+			S_028760_ALPHA_DST_OPT(dstA_opt) |
+			S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA));
 		blend_cntl |= S_028780_ENABLE(1);

 		blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB));
@@ -1186,8 +1347,14 @@ radv_pipeline_init_blend_state(struct radv_pipeline *pipeline,
 		    dstRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA)
 			blend_need_alpha |= 1 << i;
 	}
-	for (i = vkblend->attachmentCount; i < 8; i++)
+	for (i = vkblend->attachmentCount; i < 8; i++) {
 		blend->cb_blend_control[i] = 0;
+		blend->sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) | S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
+	}
+
+	/* disable RB+ for now */
+	if (pipeline->device->physical_device->has_rbplus)
+		blend->cb_color_control |= S_028808_DISABLE_DUAL_QUAD(1);

 	if (blend->cb_target_mask)
 		blend->cb_color_control |= S_028808_MODE(mode);
@@ -2053,9 +2220,11 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
 	}

 	if (modules[MESA_SHADER_FRAGMENT]) {
-		union ac_shader_variant_key key;
+		union ac_shader_variant_key key = {0};
 		key.fs.col_format = pipeline->graphics.blend.spi_shader_col_format;
-		key.fs.is_int8 = radv_pipeline_compute_is_int8(pCreateInfo);
+
+		if (pipeline->device->physical_device->rad_info.chip_class < VI)
+			radv_pipeline_compute_get_int_clamp(pCreateInfo, &key.fs.is_int8, &key.fs.is_int10);

 		const VkPipelineShaderStageCreateInfo *stage = pStages[MESA_SHADER_FRAGMENT];

@@ -2180,6 +2349,9 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
 		S_02880C_EXEC_ON_HIER_FAIL(ps->info.fs.writes_memory) |
 		S_02880C_EXEC_ON_NOOP(ps->info.fs.writes_memory);

+	if (pipeline->device->physical_device->has_rbplus)
+		pipeline->graphics.db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1);
+
 	pipeline->graphics.shader_z_format =
 		ps->info.fs.writes_sample_mask ? V_028710_SPI_SHADER_32_ABGR :
 		ps->info.fs.writes_stencil ? V_028710_SPI_SHADER_32_GR :
--- a/src/amd/vulkan/radv_pipeline_cache.c
+++ b/src/amd/vulkan/radv_pipeline_cache.c
@@ -118,6 +118,9 @@ radv_pipeline_cache_search_unlocked(struct radv_pipeline_cache *cache,
 	const uint32_t mask = cache->table_size - 1;
 	const uint32_t start = (*(uint32_t *) sha1);

+	if (cache->table_size == 0)
+		return NULL;
+
 	for (uint32_t i = 0; i < cache->table_size; i++) {
 		const uint32_t index = (start + i) & mask;
 		struct cache_entry *entry = cache->hash_table[index];
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -84,7 +84,7 @@ typedef uint32_t xcb_window_t;
 #define MAX_PUSH_DESCRIPTORS 32
 #define MAX_DYNAMIC_BUFFERS 16
 #define MAX_SAMPLES_LOG2 4
-#define NUM_META_FS_KEYS 11
+#define NUM_META_FS_KEYS 13
 #define RADV_MAX_DRM_DEVICES 8

 #define NUM_DEPTH_CLEAR_PIPELINES 3
@@ -266,7 +266,7 @@ struct radv_physical_device {
 	struct radeon_winsys *ws;
 	struct radeon_info rad_info;
 	char                                        path[20];
-	const char *                                name;
+	char                                        name[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE];
 	uint8_t                                     uuid[VK_UUID_SIZE];
 	uint8_t                                     device_uuid[VK_UUID_SIZE];

@@ -276,6 +276,9 @@ struct radv_physical_device {

 	bool has_rbplus; /* if RB+ register exist */
 	bool rbplus_allowed; /* if RB+ is allowed */
+
+	VkPhysicalDeviceMemoryProperties memory_properties;
+	enum radv_mem_type mem_type_indices[RADV_MEM_TYPE_COUNT];
 };

 struct radv_instance {
@@ -433,8 +436,6 @@ struct radv_meta_state {
 		VkPipelineLayout                          p_layout;

 		struct {
-			VkRenderPass srgb_render_pass;
-			VkPipeline   srgb_pipeline;
 			VkRenderPass render_pass[NUM_META_FS_KEYS];
 			VkPipeline   pipeline[NUM_META_FS_KEYS];
 		} rc[MAX_SAMPLES_LOG2];
@@ -444,7 +445,7 @@ struct radv_meta_state {
 		VkPipeline                                decompress_pipeline;
 		VkPipeline                                resummarize_pipeline;
 		VkRenderPass                              pass;
-	} depth_decomp;
+	} depth_decomp[1 + MAX_SAMPLES_LOG2];

 	struct {
 		VkPipeline                                cmask_eliminate_pipeline;
@@ -1002,7 +1003,7 @@ struct radv_depth_stencil_state {
 struct radv_blend_state {
 	uint32_t cb_color_control;
 	uint32_t cb_target_mask;
-	uint32_t sx_mrt0_blend_opt[8];
+	uint32_t sx_mrt_blend_opt[8];
 	uint32_t cb_blend_control[8];

 	uint32_t spi_shader_col_format;
@@ -1215,14 +1216,14 @@ struct radv_image {
 	/* Set when bound */
 	struct radeon_winsys_bo *bo;
 	VkDeviceSize offset;
-	uint32_t dcc_offset;
-	uint32_t htile_offset;
+	uint64_t dcc_offset;
+	uint64_t htile_offset;
 	struct radeon_surf surface;

 	struct radv_fmask_info fmask;
 	struct radv_cmask_info cmask;
-	uint32_t clear_value_offset;
-	uint32_t dcc_pred_offset;
+	uint64_t clear_value_offset;
+	uint64_t dcc_pred_offset;
 };

 /* Whether the image has a htile that is known consistent with the contents of
@@ -1280,6 +1281,7 @@ struct radv_image_view {
 	uint32_t base_layer;
 	uint32_t layer_count;
 	uint32_t base_mip;
+	uint32_t level_count;
 	VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */

 	uint32_t descriptor[8];
--- a/src/amd/vulkan/radv_query.c
+++ b/src/amd/vulkan/radv_query.c
@@ -653,7 +653,7 @@ static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer,
 	struct radv_device *device = cmd_buffer->device;
 	struct radv_meta_saved_compute_state saved_state;

-	radv_meta_save_compute(&saved_state, cmd_buffer, 4);
+	radv_meta_save_compute(&saved_state, cmd_buffer, 16);

 	struct radv_buffer dst_buffer = {
 		.bo = dst_bo,
@@ -737,7 +737,7 @@ static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer,
 	                                RADV_CMD_FLAG_INV_VMEM_L1 |
 	                                RADV_CMD_FLAG_CS_PARTIAL_FLUSH;

-	radv_meta_restore_compute(&saved_state, cmd_buffer, 4);
+	radv_meta_restore_compute(&saved_state, cmd_buffer, 16);
 }

 VkResult radv_CreateQueryPool(
--- a/src/amd/vulkan/radv_radeon_winsys.h
+++ b/src/amd/vulkan/radv_radeon_winsys.h
@@ -51,7 +51,8 @@ enum radeon_bo_flag { /* bitfield */
 	RADEON_FLAG_GTT_WC =        (1 << 0),
 	RADEON_FLAG_CPU_ACCESS =    (1 << 1),
 	RADEON_FLAG_NO_CPU_ACCESS = (1 << 2),
-	RADEON_FLAG_VIRTUAL =       (1 << 3)
+	RADEON_FLAG_VIRTUAL =       (1 << 3),
+	RADEON_FLAG_VA_UNCACHED =   (1 << 4),
 };

 enum radeon_bo_usage { /* bitfield */
--- a/src/amd/vulkan/radv_wsi.c
+++ b/src/amd/vulkan/radv_wsi.c
@@ -154,6 +154,7 @@ radv_wsi_image_create(VkDevice device_h,
 	VkImage image_h;
 	struct radv_image *image;
 	int fd;
+	RADV_FROM_HANDLE(radv_device, device, device_h);

 	result = radv_image_create(device_h,
 				   &(struct radv_image_create_info) {
@@ -192,12 +193,26 @@ radv_wsi_image_create(VkDevice device_h,
 		.image = image_h
 	};

+	/* Find the first VRAM memory type, or GART for PRIME images. */
+	int memory_type_index = -1;
+	for (int i = 0; i < device->physical_device->memory_properties.memoryTypeCount; ++i) {
+		bool is_local = !!(device->physical_device->memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+		if ((linear && !is_local) || (!linear && is_local)) {
+			memory_type_index = i;
+			break;
+		}
+	}
+
+	/* fallback */
+	if (memory_type_index == -1)
+		memory_type_index = 0;
+
 	result = radv_AllocateMemory(device_h,
 				     &(VkMemoryAllocateInfo) {
 					     .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
 					     .pNext = &ded_alloc,
 					     .allocationSize = image->size,
-					     .memoryTypeIndex = linear ? 1 : 0,
+					     .memoryTypeIndex = memory_type_index,
 				     },
 				     NULL /* XXX: pAllocator */,
 				     &memory_h);
@@ -211,7 +226,6 @@ radv_wsi_image_create(VkDevice device_h,
 	 * or the fd for the linear image if a copy is required.
 	 */
 	if (!needs_linear_copy || (needs_linear_copy && linear)) {
-		RADV_FROM_HANDLE(radv_device, device, device_h);
 		RADV_FROM_HANDLE(radv_device_memory, memory, memory_h);
 		if (!radv_get_memory_fd(device, memory, &fd))
 			goto fail_alloc_memory;
@@ -224,7 +238,11 @@ radv_wsi_image_create(VkDevice device_h,
 	*memory_p = memory_h;
 	*size = image->size;
 	*offset = image->offset;
-	*row_pitch = surface->u.legacy.level[0].nblk_x * surface->bpe;
+
+	if (device->physical_device->rad_info.chip_class >= GFX9)
+		*row_pitch = surface->u.gfx9.surf_pitch * surface->bpe;
+	else
+		*row_pitch = surface->u.legacy.level[0].nblk_x * surface->bpe;
 	return VK_SUCCESS;
 fail_alloc_memory:
 	radv_FreeMemory(device_h, memory_h, pAllocator);
--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@@ -1133,15 +1133,20 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
 void
 si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, uint64_t va)
 {
-	uint32_t val = 0;
+	uint32_t op = 0;

 	if (va)
-		val = (((va >> 32) & 0xff) |
-		       PRED_OP(PREDICATION_OP_BOOL64)|
-		       PREDICATION_DRAW_VISIBLE);
-	radeon_emit(cmd_buffer->cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
-	radeon_emit(cmd_buffer->cs, va);
-	radeon_emit(cmd_buffer->cs, val);
+		op = PRED_OP(PREDICATION_OP_BOOL64) | PREDICATION_DRAW_VISIBLE;
+	if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
+		radeon_emit(cmd_buffer->cs, PKT3(PKT3_SET_PREDICATION, 2, 0));
+		radeon_emit(cmd_buffer->cs, op);
+		radeon_emit(cmd_buffer->cs, va);
+		radeon_emit(cmd_buffer->cs, va >> 32);
+	} else {
+		radeon_emit(cmd_buffer->cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
+		radeon_emit(cmd_buffer->cs, va);
+		radeon_emit(cmd_buffer->cs, op | ((va >> 32) & 0xFF));
+	}
 }

 /* Set this if you want the 3D engine to wait until CP DMA is done.
--- a/src/amd/vulkan/vk_format.h
+++ b/src/amd/vulkan/vk_format.h
@@ -465,4 +465,27 @@ vk_format_get_component_bits(VkFormat format,
 	}
 }

+static inline VkFormat
+vk_to_non_srgb_format(VkFormat format)
+{
+	switch(format) {
+	case VK_FORMAT_R8_SRGB :
+		return VK_FORMAT_R8_UNORM;
+	case VK_FORMAT_R8G8_SRGB:
+		return VK_FORMAT_R8G8_UNORM;
+	case VK_FORMAT_R8G8B8_SRGB:
+		return VK_FORMAT_R8G8B8_UNORM;
+	case VK_FORMAT_B8G8R8_SRGB:
+		return VK_FORMAT_B8G8R8_UNORM;
+	case VK_FORMAT_R8G8B8A8_SRGB :
+		return VK_FORMAT_R8G8B8A8_UNORM;
+	case VK_FORMAT_B8G8R8A8_SRGB:
+		return VK_FORMAT_B8G8R8A8_UNORM;
+	case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
+		return VK_FORMAT_A8B8G8R8_UNORM_PACK32;
+	default:
+		return format;
+	}
+}
+
 #endif /* VK_FORMAT_H */
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
@@ -39,6 +39,23 @@

 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo);

+static int
+radv_amdgpu_bo_va_op(amdgpu_device_handle dev,
+		     amdgpu_bo_handle bo,
+		     uint64_t offset,
+		     uint64_t size,
+		     uint64_t addr,
+		     uint64_t flags,
+		     uint32_t ops)
+{
+	size = ALIGN(size, getpagesize());
+	flags |= (AMDGPU_VM_PAGE_READABLE |
+		  AMDGPU_VM_PAGE_WRITEABLE |
+		  AMDGPU_VM_PAGE_EXECUTABLE);
+	return amdgpu_bo_va_op_raw(dev, bo, offset, size, addr,
+				   flags, ops);
+}
+
 static void
 radv_amdgpu_winsys_virtual_map(struct radv_amdgpu_winsys_bo *bo,
                               const struct radv_amdgpu_map_range *range)
@@ -49,8 +66,8 @@ radv_amdgpu_winsys_virtual_map(struct radv_amdgpu_winsys_bo *bo,
 		return; /* TODO: PRT mapping */

 	p_atomic_inc(&range->bo->ref_count);
-	int r = amdgpu_bo_va_op(range->bo->bo, range->bo_offset, range->size,
-	                        range->offset + bo->va, 0, AMDGPU_VA_OP_MAP);
+	int r = radv_amdgpu_bo_va_op(bo->ws->dev, range->bo->bo, range->bo_offset, range->size,
+				     range->offset + bo->va, 0, AMDGPU_VA_OP_MAP);
 	if (r)
 		abort();
 }
@@ -64,8 +81,8 @@ radv_amdgpu_winsys_virtual_unmap(struct radv_amdgpu_winsys_bo *bo,
 	if (!range->bo)
 		return; /* TODO: PRT mapping */

-	int r = amdgpu_bo_va_op(range->bo->bo, range->bo_offset, range->size,
-	                        range->offset + bo->va, 0, AMDGPU_VA_OP_UNMAP);
+	int r = radv_amdgpu_bo_va_op(bo->ws->dev, range->bo->bo, range->bo_offset, range->size,
+				     range->offset + bo->va, 0, AMDGPU_VA_OP_UNMAP);
 	if (r)
 		abort();
 	radv_amdgpu_winsys_bo_destroy((struct radeon_winsys_bo *)range->bo);
@@ -149,6 +166,7 @@ radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys_bo *_parent,
 	if (parent->ranges[first].bo == bo && (!bo || offset - bo_offset == parent->ranges[first].offset - parent->ranges[first].bo_offset)) {
 		size += offset - parent->ranges[first].offset;
 		offset = parent->ranges[first].offset;
+		bo_offset = parent->ranges[first].bo_offset;
 		remove_first = true;
 	}

@@ -234,7 +252,7 @@ static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo)
 			bo->ws->num_buffers--;
 			pthread_mutex_unlock(&bo->ws->global_bo_list_lock);
 		}
-		amdgpu_bo_va_op(bo->bo, 0, bo->size, bo->va, 0, AMDGPU_VA_OP_UNMAP);
+		radv_amdgpu_bo_va_op(bo->ws->dev, bo->bo, 0, bo->size, bo->va, 0, AMDGPU_VA_OP_UNMAP);
 		amdgpu_bo_free(bo->bo);
 	}
 	amdgpu_va_range_free(bo->va_handle);
@@ -322,7 +340,11 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
 		goto error_bo_alloc;
 	}

-	r = amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP);
+
+	uint32_t va_flags = 0;
+	if ((flags & RADEON_FLAG_VA_UNCACHED) && ws->info.chip_class >= GFX9)
+		va_flags |= AMDGPU_VM_MTYPE_UC;
+	r = radv_amdgpu_bo_va_op(ws->dev, buf_handle, 0, size, va, va_flags, AMDGPU_VA_OP_MAP);
 	if (r)
 		goto error_va_map;

@@ -398,7 +420,7 @@ radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws,
 	if (r)
 		goto error_query;

-	r = amdgpu_bo_va_op(result.buf_handle, 0, result.alloc_size, va, 0, AMDGPU_VA_OP_MAP);
+	r = radv_amdgpu_bo_va_op(ws->dev, result.buf_handle, 0, result.alloc_size, va, 0, AMDGPU_VA_OP_MAP);
 	if (r)
 		goto error_va_map;

--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -86,6 +86,7 @@ LIBGLSL_FILES = \
 	glsl/lower_buffer_access.cpp \
 	glsl/lower_buffer_access.h \
 	glsl/lower_const_arrays_to_uniforms.cpp \
+	glsl/lower_cs_derived.cpp \
 	glsl/lower_discard.cpp \
 	glsl/lower_discard_flow.cpp \
 	glsl/lower_distance.cpp \
@@ -140,7 +141,9 @@ LIBGLSL_FILES = \
 	glsl/program.h \
 	glsl/propagate_invariance.cpp \
 	glsl/s_expression.cpp \
-	glsl/s_expression.h
+	glsl/s_expression.h \
+	glsl/string_to_uint_map.cpp \
+	glsl/string_to_uint_map.h

 LIBGLSL_SHADER_CACHE_FILES = \
 	glsl/shader_cache.cpp \
--- a/src/compiler/glsl/ast_function.cpp
+++ b/src/compiler/glsl/ast_function.cpp
@@ -224,19 +224,28 @@ verify_parameter_modes(_mesa_glsl_parse_state *state,
            val = ((ir_swizzle *)val)->val;
         }

-         while (val->ir_type == ir_type_dereference_array) {
-            val = ((ir_dereference_array *)val)->array;
+         for (;;) {
+            if (val->ir_type == ir_type_dereference_array) {
+               val = ((ir_dereference_array *)val)->array;
+            } else if (val->ir_type == ir_type_dereference_record &&
+                       !state->es_shader) {
+               val = ((ir_dereference_record *)val)->record;
+            } else
+               break;
         }

-         if (!val->as_dereference_variable() ||
-             val->variable_referenced()->data.mode != ir_var_shader_in) {
+         ir_variable *var = NULL;
+         if (const ir_dereference_variable *deref_var = val->as_dereference_variable())
+            var = deref_var->variable_referenced();
+
+         if (!var || var->data.mode != ir_var_shader_in) {
            _mesa_glsl_error(&loc, state,
                             "parameter `%s` must be a shader input",
                             formal->name);
            return false;
         }

-         val->variable_referenced()->data.must_be_shader_input = 1;
+         var->data.must_be_shader_input = 1;
      }

      /* Verify that 'out' and 'inout' actual parameters are lvalues. */
@@ -663,8 +672,13 @@ generate_array_index(void *mem_ctx, exec_list *instructions,
      ir_variable *sub_var = NULL;
      *function_name = array->primary_expression.identifier;

-      match_subroutine_by_name(*function_name, actual_parameters,
-                               state, &sub_var);
+      if (!match_subroutine_by_name(*function_name, actual_parameters,
+                                    state, &sub_var)) {
+         _mesa_glsl_error(&loc, state, "Unknown subroutine `%s'",
+                          *function_name);
+         *function_name = NULL; /* indicate error condition to caller */
+         return NULL;
+      }

      ir_rvalue *outer_array_idx = idx->hir(instructions, state);
      return new(mem_ctx) ir_dereference_array(sub_var, outer_array_idx);
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -4495,7 +4495,7 @@ process_initializer(ir_variable *var, ast_declaration *decl,
      } else {
         if (var->type->is_numeric()) {
            /* Reduce cascading errors. */
-            var->constant_value = type->qualifier.flags.q.constant
+            rhs = var->constant_value = type->qualifier.flags.q.constant
               ? ir_constant::zero(state, var->type) : NULL;
         }
      }
--- a/src/compiler/glsl/blob.c
+++ b/src/compiler/glsl/blob.c
@@ -46,6 +46,9 @@ grow_to_fit(struct blob *blob, size_t additional)
   size_t to_allocate;
   uint8_t *new_data;

+   if (blob->out_of_memory)
+      return false;
+
   if (blob->size + additional <= blob->allocated)
      return true;

@@ -57,8 +60,10 @@ grow_to_fit(struct blob *blob, size_t additional)
   to_allocate = MAX2(to_allocate, blob->allocated + additional);

   new_data = realloc(blob->data, to_allocate);
-   if (new_data == NULL)
+   if (new_data == NULL) {
+      blob->out_of_memory = true;
      return false;
+   }

   blob->data = new_data;
   blob->allocated = to_allocate;
@@ -104,6 +109,7 @@ blob_create()
   blob->data = NULL;
   blob->allocated = 0;
   blob->size = 0;
+   blob->out_of_memory = false;

   return blob;
 }
@@ -207,6 +213,9 @@ blob_reader_init(struct blob_reader *blob, uint8_t *data, size_t size)
 static bool
 ensure_can_read(struct blob_reader *blob, size_t size)
 {
+   if (blob->overrun)
+      return false;
+
   if (blob->current < blob->end && blob->end - blob->current >= size)
      return true;

--- a/src/compiler/glsl/blob.h
+++ b/src/compiler/glsl/blob.h
@@ -55,6 +55,12 @@ struct blob {

   /** The number of bytes that have actual data written to them. */
   size_t size;
+
+   /**
+    * True if we've ever failed to realloc or if we go pas the end of a fixed
+    * allocation blob.
+    */
+   bool out_of_memory;
 };

 /* When done reading, the caller can ensure that everything was consumed by
--- a/src/compiler/glsl/builtin_variables.cpp
+++ b/src/compiler/glsl/builtin_variables.cpp
@@ -90,9 +90,9 @@ static const struct gl_builtin_uniform_element gl_LightSource_elements[] = {
 		  SWIZZLE_Y,
 		  SWIZZLE_Z,
 		  SWIZZLE_Z)},
-   {"spotCosCutoff", {STATE_LIGHT, 0, STATE_SPOT_DIRECTION}, SWIZZLE_WWWW},
-   {"spotCutoff", {STATE_LIGHT, 0, STATE_SPOT_CUTOFF}, SWIZZLE_XXXX},
   {"spotExponent", {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_WWWW},
+   {"spotCutoff", {STATE_LIGHT, 0, STATE_SPOT_CUTOFF}, SWIZZLE_XXXX},
+   {"spotCosCutoff", {STATE_LIGHT, 0, STATE_SPOT_DIRECTION}, SWIZZLE_WWWW},
   {"constantAttenuation", {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_XXXX},
   {"linearAttenuation", {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_YYYY},
   {"quadraticAttenuation", {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_ZZZZ},
@@ -1290,15 +1290,10 @@ builtin_variable_generator::generate_cs_special_vars()
                       uvec3_t, "gl_LocalGroupSizeARB");
   }

-   if (state->ctx->Const.LowerCsDerivedVariables) {
-      add_variable("gl_GlobalInvocationID", uvec3_t, ir_var_auto, 0);
-      add_variable("gl_LocalInvocationIndex", uint_t, ir_var_auto, 0);
-   } else {
-      add_system_value(SYSTEM_VALUE_GLOBAL_INVOCATION_ID,
-                       uvec3_t, "gl_GlobalInvocationID");
-      add_system_value(SYSTEM_VALUE_LOCAL_INVOCATION_INDEX,
-                       uint_t, "gl_LocalInvocationIndex");
-   }
+   add_system_value(SYSTEM_VALUE_GLOBAL_INVOCATION_ID,
+                    uvec3_t, "gl_GlobalInvocationID");
+   add_system_value(SYSTEM_VALUE_LOCAL_INVOCATION_INDEX,
+                    uint_t, "gl_LocalInvocationIndex");
 }


@@ -1469,84 +1464,3 @@ _mesa_glsl_initialize_variables(exec_list *instructions,
      break;
   }
 }
-
-
-/**
- * Initialize compute shader variables with values that are derived from other
- * compute shader variable.
- */
-static void
-initialize_cs_derived_variables(gl_shader *shader,
-                                ir_function_signature *const main_sig)
-{
-   assert(shader->Stage == MESA_SHADER_COMPUTE);
-
-   ir_variable *gl_GlobalInvocationID =
-      shader->symbols->get_variable("gl_GlobalInvocationID");
-   assert(gl_GlobalInvocationID);
-   ir_variable *gl_WorkGroupID =
-      shader->symbols->get_variable("gl_WorkGroupID");
-   assert(gl_WorkGroupID);
-   ir_variable *gl_WorkGroupSize =
-      shader->symbols->get_variable("gl_WorkGroupSize");
-   if (gl_WorkGroupSize == NULL) {
-      void *const mem_ctx = ralloc_parent(shader->ir);
-      gl_WorkGroupSize = new(mem_ctx) ir_variable(glsl_type::uvec3_type,
-                                                  "gl_WorkGroupSize",
-                                                  ir_var_auto);
-      gl_WorkGroupSize->data.how_declared = ir_var_declared_implicitly;
-      gl_WorkGroupSize->data.read_only = true;
-      shader->ir->push_head(gl_WorkGroupSize);
-   }
-   ir_variable *gl_LocalInvocationID =
-      shader->symbols->get_variable("gl_LocalInvocationID");
-   assert(gl_LocalInvocationID);
-
-   /* gl_GlobalInvocationID =
-    *    gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID
-    */
-   ir_instruction *inst =
-      assign(gl_GlobalInvocationID,
-             add(mul(gl_WorkGroupID, gl_WorkGroupSize),
-                 gl_LocalInvocationID));
-   main_sig->body.push_head(inst);
-
-   /* gl_LocalInvocationIndex =
-    *    gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y +
-    *    gl_LocalInvocationID.y * gl_WorkGroupSize.x +
-    *    gl_LocalInvocationID.x;
-    */
-   ir_expression *index_z =
-      mul(mul(swizzle_z(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize)),
-          swizzle_y(gl_WorkGroupSize));
-   ir_expression *index_y =
-      mul(swizzle_y(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize));
-   ir_expression *index_y_plus_z = add(index_y, index_z);
-   operand index_x(swizzle_x(gl_LocalInvocationID));
-   ir_expression *index_x_plus_y_plus_z = add(index_y_plus_z, index_x);
-   ir_variable *gl_LocalInvocationIndex =
-      shader->symbols->get_variable("gl_LocalInvocationIndex");
-   assert(gl_LocalInvocationIndex);
-   inst = assign(gl_LocalInvocationIndex, index_x_plus_y_plus_z);
-   main_sig->body.push_head(inst);
-}
-
-
-/**
- * Initialize builtin variables with values based on other builtin variables.
- * These are initialized in the main function.
- */
-void
-_mesa_glsl_initialize_derived_variables(struct gl_context *ctx,
-                                        gl_shader *shader)
-{
-   /* We only need to set CS variables currently. */
-   if (shader->Stage == MESA_SHADER_COMPUTE &&
-       ctx->Const.LowerCsDerivedVariables) {
-      ir_function_signature *const main_sig =
-         _mesa_get_main_function_signature(shader->symbols);
-
-      if (main_sig != NULL)
-         initialize_cs_derived_variables(shader, main_sig);
-   }
-}
--- a/src/compiler/glsl/glsl_parser_extras.cpp
+++ b/src/compiler/glsl/glsl_parser_extras.cpp
@@ -1863,6 +1863,49 @@ set_shader_inout_layout(struct gl_shader *shader,
   shader->bound_image = state->bound_image_specified;
 }

+/* src can be NULL if only the symbols found in the exec_list should be
+ * copied
+ */
+void
+_mesa_glsl_copy_symbols_from_table(struct exec_list *shader_ir,
+                                   struct glsl_symbol_table *src,
+                                   struct glsl_symbol_table *dest)
+{
+   foreach_in_list (ir_instruction, ir, shader_ir) {
+      switch (ir->ir_type) {
+      case ir_type_function:
+         dest->add_function((ir_function *) ir);
+         break;
+      case ir_type_variable: {
+         ir_variable *const var = (ir_variable *) ir;
+
+         if (var->data.mode != ir_var_temporary)
+            dest->add_variable(var);
+         break;
+      }
+      default:
+         break;
+      }
+   }
+
+   if (src != NULL) {
+      /* Explicitly copy the gl_PerVertex interface definitions because these
+       * are needed to check they are the same during the interstage link.
+       * They can’t necessarily be found via the exec_list because the members
+       * might not be referenced. The GL spec still requires that they match
+       * in that case.
+       */
+      const glsl_type *iface =
+         src->get_interface("gl_PerVertex", ir_var_shader_in);
+      if (iface)
+         dest->add_interface(iface->name, iface, ir_var_shader_in);
+
+      iface = src->get_interface("gl_PerVertex", ir_var_shader_out);
+      if (iface)
+         dest->add_interface(iface->name, iface, ir_var_shader_out);
+   }
+}
+
 extern "C" {

 static void
@@ -1937,6 +1980,7 @@ do_late_parsing_checks(struct _mesa_glsl_parse_state *state)

 static void
 opt_shader_and_create_symbol_table(struct gl_context *ctx,
+                                   struct glsl_symbol_table *source_symbols,
                                   struct gl_shader *shader)
 {
   assert(shader->CompileStatus != compile_failure &&
@@ -1994,24 +2038,8 @@ opt_shader_and_create_symbol_table(struct gl_context *ctx,
    * We don't have to worry about types or interface-types here because those
    * are fly-weights that are looked up by glsl_type.
    */
-   foreach_in_list (ir_instruction, ir, shader->ir) {
-      switch (ir->ir_type) {
-      case ir_type_function:
-         shader->symbols->add_function((ir_function *) ir);
-         break;
-      case ir_type_variable: {
-         ir_variable *const var = (ir_variable *) ir;
-
-         if (var->data.mode != ir_var_temporary)
-            shader->symbols->add_variable(var);
-         break;
-      }
-      default:
-         break;
-      }
-   }
-
-   _mesa_glsl_initialize_derived_variables(ctx, shader);
+   _mesa_glsl_copy_symbols_from_table(shader->ir, source_symbols,
+                                      shader->symbols);
 }

 void
@@ -2048,7 +2076,9 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader,
         return;

      if (shader->CompileStatus == compiled_no_opts) {
-         opt_shader_and_create_symbol_table(ctx, shader);
+         opt_shader_and_create_symbol_table(ctx,
+                                            NULL, /* source_symbols */
+                                            shader);
         shader->CompileStatus = compile_success;
         return;
      }
@@ -2109,7 +2139,7 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader,
      lower_subroutine(shader->ir, state);

      if (!ctx->Cache || force_recompile)
-         opt_shader_and_create_symbol_table(ctx, shader);
+         opt_shader_and_create_symbol_table(ctx, state->symbols, shader);
      else {
         reparent_ir(shader->ir, shader->ir);
         shader->CompileStatus = compiled_no_opts;
--- a/src/compiler/glsl/glsl_parser_extras.h
+++ b/src/compiler/glsl/glsl_parser_extras.h
@@ -948,6 +948,11 @@ extern int glcpp_preprocess(void *ctx, const char **shader, char **info_log,
 extern void _mesa_destroy_shader_compiler(void);
 extern void _mesa_destroy_shader_compiler_caches(void);

+extern void
+_mesa_glsl_copy_symbols_from_table(struct exec_list *shader_ir,
+                                   struct glsl_symbol_table *src,
+                                   struct glsl_symbol_table *dest);
+
 #ifdef __cplusplus
 }
 #endif
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -1158,8 +1158,6 @@ nir_visitor::visit(ir_call *ir)
      case nir_intrinsic_vote_eq: {
         nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL);

-         instr->variables[0] = evaluate_deref(&instr->instr, ir->return_deref);
-
         ir_rvalue *value = (ir_rvalue *) ir->actual_parameters.get_head();
         instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value));

--- a/src/compiler/glsl/ir.h
+++ b/src/compiler/glsl/ir.h
@@ -2402,10 +2402,6 @@ extern void
 _mesa_glsl_initialize_variables(exec_list *instructions,
 				struct _mesa_glsl_parse_state *state);

-extern void
-_mesa_glsl_initialize_derived_variables(struct gl_context *ctx,
-                                        gl_shader *shader);
-
 extern void
 reparent_ir(exec_list *list, void *mem_ctx);

--- a/src/compiler/glsl/ir_constant_expression.cpp
+++ b/src/compiler/glsl/ir_constant_expression.cpp
@@ -725,6 +725,8 @@ ir_swizzle::constant_expression_value(struct hash_table *variable_context)
         case GLSL_TYPE_FLOAT: data.f[i] = v->value.f[swiz_idx[i]]; break;
         case GLSL_TYPE_BOOL:  data.b[i] = v->value.b[swiz_idx[i]]; break;
         case GLSL_TYPE_DOUBLE:data.d[i] = v->value.d[swiz_idx[i]]; break;
+         case GLSL_TYPE_UINT64:data.u64[i] = v->value.u64[swiz_idx[i]]; break;
+         case GLSL_TYPE_INT64: data.i64[i] = v->value.i64[swiz_idx[i]]; break;
         default:              assert(!"Should not get here."); break;
         }
      }
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -165,6 +165,7 @@ void optimize_dead_builtin_variables(exec_list *instructions,
 bool lower_tess_level(gl_linked_shader *shader);

 bool lower_vertex_id(gl_linked_shader *shader);
+bool lower_cs_derived(gl_linked_shader *shader);
 bool lower_blend_equation_advanced(gl_linked_shader *shader);

 bool lower_subroutine(exec_list *instructions, struct _mesa_glsl_parse_state *state);
--- a/src/compiler/glsl/link_atomics.cpp
+++ b/src/compiler/glsl/link_atomics.cpp
@@ -207,7 +207,7 @@ link_assign_atomic_counter_resources(struct gl_context *ctx,
   active_atomic_buffer *abs =
      find_active_atomic_counters(ctx, prog, &num_buffers);

-   prog->data->AtomicBuffers = rzalloc_array(prog, gl_active_atomic_buffer,
+   prog->data->AtomicBuffers = rzalloc_array(prog->data, gl_active_atomic_buffer,
                                             num_buffers);
   prog->data->NumAtomicBuffers = num_buffers;

@@ -270,7 +270,7 @@ link_assign_atomic_counter_resources(struct gl_context *ctx,
         struct gl_program *gl_prog = prog->_LinkedShaders[j]->Program;
         gl_prog->info.num_abos = num_atomic_buffers[j];
         gl_prog->sh.AtomicBuffers =
-            rzalloc_array(prog, gl_active_atomic_buffer *,
+            rzalloc_array(gl_prog, gl_active_atomic_buffer *,
                          num_atomic_buffers[j]);

         unsigned intra_stage_idx = 0;
--- a/src/compiler/glsl/link_interface_blocks.cpp
+++ b/src/compiler/glsl/link_interface_blocks.cpp
@@ -364,6 +364,35 @@ validate_interstage_inout_blocks(struct gl_shader_program *prog,
                                   consumer->Stage != MESA_SHADER_FRAGMENT) ||
                                  consumer->Stage == MESA_SHADER_GEOMETRY;

+   /* Check that block re-declarations of gl_PerVertex are compatible
+    * across shaders: From OpenGL Shading Language 4.5, section
+    * "7.1 Built-In Language Variables", page 130 of the PDF:
+    *
+    *    "If multiple shaders using members of a built-in block belonging
+    *     to the same interface are linked together in the same program,
+    *     they must all redeclare the built-in block in the same way, as
+    *     described in section 4.3.9 “Interface Blocks” for interface-block
+    *     matching, or a link-time error will result."
+    *
+    * This is done explicitly outside of iterating the member variable
+    * declarations because it is possible that the variables are not used and
+    * so they would have been optimised out.
+    */
+   const glsl_type *consumer_iface =
+      consumer->symbols->get_interface("gl_PerVertex",
+                                       ir_var_shader_in);
+
+   const glsl_type *producer_iface =
+      producer->symbols->get_interface("gl_PerVertex",
+                                       ir_var_shader_out);
+
+   if (producer_iface && consumer_iface &&
+       interstage_member_mismatch(prog, consumer_iface, producer_iface)) {
+      linker_error(prog, "Incompatible or missing gl_PerVertex re-declaration "
+                   "in consecutive shaders");
+      return;
+   }
+
   /* Add output interfaces from the producer to the symbol table. */
   foreach_in_list(ir_instruction, node, producer->ir) {
      ir_variable *var = node->as_variable();
--- a/src/compiler/glsl/link_uniform_initializers.cpp
+++ b/src/compiler/glsl/link_uniform_initializers.cpp
@@ -25,7 +25,7 @@
 #include "ir.h"
 #include "linker.h"
 #include "ir_uniform.h"
-#include "util/string_to_uint_map.h"
+#include "string_to_uint_map.h"

 /* These functions are put in a "private" namespace instead of being marked
 * static so that the unit tests can access them.  See
--- a/src/compiler/glsl/link_uniforms.cpp
+++ b/src/compiler/glsl/link_uniforms.cpp
@@ -27,7 +27,7 @@
 #include "ir_uniform.h"
 #include "glsl_symbol_table.h"
 #include "program.h"
-#include "util/string_to_uint_map.h"
+#include "string_to_uint_map.h"
 #include "ir_array_refcount.h"

 /**
@@ -1319,7 +1319,7 @@ link_assign_uniform_storage(struct gl_context *ctx,

   union gl_constant_value *data;
   if (prog->data->UniformStorage == NULL) {
-      prog->data->UniformStorage = rzalloc_array(prog,
+      prog->data->UniformStorage = rzalloc_array(prog->data,
                                                 struct gl_uniform_storage,
                                                 prog->data->NumUniformStorage);
      data = rzalloc_array(prog->data->UniformStorage,
@@ -1385,13 +1385,6 @@ link_assign_uniform_storage(struct gl_context *ctx,
             sizeof(shader->Program->sh.SamplerTargets));
   }

-   /* If this is a fallback compile for a cache miss we already have the
-    * correct uniform mappings and we don't want to reinitialise uniforms so
-    * just return now.
-    */
-   if (prog->data->cache_fallback)
-      return;
-
 #ifndef NDEBUG
   for (unsigned i = 0; i < prog->data->NumUniformStorage; i++) {
      assert(prog->data->UniformStorage[i].storage != NULL ||
@@ -1416,11 +1409,9 @@ void
 link_assign_uniform_locations(struct gl_shader_program *prog,
                              struct gl_context *ctx)
 {
-   if (!prog->data->cache_fallback) {
-      ralloc_free(prog->data->UniformStorage);
-      prog->data->UniformStorage = NULL;
-      prog->data->NumUniformStorage = 0;
-   }
+   ralloc_free(prog->data->UniformStorage);
+   prog->data->UniformStorage = NULL;
+   prog->data->NumUniformStorage = 0;

   if (prog->UniformHash != NULL) {
      prog->UniformHash->clear();
--- a/src/compiler/glsl/link_varyings.cpp
+++ b/src/compiler/glsl/link_varyings.cpp
@@ -165,10 +165,12 @@ process_xfb_layout_qualifiers(void *mem_ctx, const gl_linked_shader *sh,

         if (var->data.from_named_ifc_block) {
            type = var->get_interface_type();
+
            /* Find the member type before it was altered by lowering */
+            const glsl_type *type_wa = type->without_array();
            member_type =
-               type->fields.structure[type->field_index(var->name)].type;
-            name = ralloc_strdup(NULL, type->without_array()->name);
+               type_wa->fields.structure[type_wa->field_index(var->name)].type;
+            name = ralloc_strdup(NULL, type_wa->name);
         } else {
            type = var->type;
            member_type = NULL;
@@ -1119,7 +1121,6 @@ store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog,
      if (has_xfb_qualifiers) {
         for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
            if (prog->TransformFeedback.BufferStride[j]) {
-               buffers |= 1 << j;
               explicit_stride[j] = true;
               xfb_prog->sh.LinkedTransformFeedback->Buffers[j].Stride =
                  prog->TransformFeedback.BufferStride[j] / 4;
@@ -1144,10 +1145,24 @@ store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog,
            num_buffers++;
            buffer_stream_id = -1;
            continue;
-         } else if (tfeedback_decls[i].is_varying()) {
+         }
+
+         if (has_xfb_qualifiers) {
+            buffer = tfeedback_decls[i].get_buffer();
+         } else {
+            buffer = num_buffers;
+         }
+
+         if (tfeedback_decls[i].is_varying()) {
            if (buffer_stream_id == -1)  {
               /* First varying writing to this buffer: remember its stream */
               buffer_stream_id = (int) tfeedback_decls[i].get_stream_id();
+
+               /* Only mark a buffer as active when there is a varying
+                * attached to it. This behaviour is based on a revised version
+                * of section 13.2.2 of the GL 4.6 spec.
+                */
+               buffers |= 1 << buffer;
            } else if (buffer_stream_id !=
                       (int) tfeedback_decls[i].get_stream_id()) {
               /* Varying writes to the same buffer from a different stream */
@@ -1163,13 +1178,6 @@ store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog,
            }
         }

-         if (has_xfb_qualifiers) {
-            buffer = tfeedback_decls[i].get_buffer();
-         } else {
-            buffer = num_buffers;
-         }
-         buffers |= 1 << buffer;
-
         if (!tfeedback_decls[i].store(ctx, prog,
                                       xfb_prog->sh.LinkedTransformFeedback,
                                       buffer, num_buffers, num_outputs,
@@ -2072,7 +2080,8 @@ reserved_varying_slot(struct gl_linked_shader *stage,
      var_slot = var->data.location - VARYING_SLOT_VAR0;

      unsigned num_elements = get_varying_type(var, stage->Stage)
-         ->count_attribute_slots(stage->Stage == MESA_SHADER_VERTEX);
+         ->count_attribute_slots(io_mode == ir_var_shader_in &&
+                                 stage->Stage == MESA_SHADER_VERTEX);
      for (unsigned i = 0; i < num_elements; i++) {
         if (var_slot >= 0 && var_slot < MAX_VARYINGS_INCL_PATCH)
            slots |= UINT64_C(1) << var_slot;
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -75,7 +75,7 @@
 #include "program/program.h"
 #include "util/mesa-sha1.h"
 #include "util/set.h"
-#include "util/string_to_uint_map.h"
+#include "string_to_uint_map.h"
 #include "linker.h"
 #include "link_varyings.h"
 #include "ir_optimization.h"
@@ -1128,10 +1128,16 @@ cross_validate_globals(struct gl_shader_program *prog,
         if (prog->IsES && (prog->data->Version != 310 ||
                            !var->get_interface_type()) &&
             existing->data.precision != var->data.precision) {
-            linker_error(prog, "declarations for %s `%s` have "
-                         "mismatching precision qualifiers\n",
-                         mode_string(var), var->name);
-            return;
+            if ((existing->data.used && var->data.used) || prog->data->Version >= 300) {
+               linker_error(prog, "declarations for %s `%s` have "
+                            "mismatching precision qualifiers\n",
+                            mode_string(var), var->name);
+               return;
+            } else {
+               linker_warning(prog, "declarations for %s `%s` have "
+                              "mismatching precision qualifiers\n",
+                              mode_string(var), var->name);
+            }
         }
      } else
         variables->add_variable(var);
@@ -1202,8 +1208,8 @@ interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog,
      }

      for (unsigned int j = 0; j < sh_num_blocks; j++) {
-         int index = link_cross_validate_uniform_block(prog, &blks, num_blks,
-                                                       sh_blks[j]);
+         int index = link_cross_validate_uniform_block(prog->data, &blks,
+                                                       num_blks, sh_blks[j]);

         if (index == -1) {
            linker_error(prog, "buffer block `%s' has mismatching "
@@ -1262,21 +1268,11 @@ interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog,
 * Populates a shaders symbol table with all global declarations
 */
 static void
-populate_symbol_table(gl_linked_shader *sh)
+populate_symbol_table(gl_linked_shader *sh, glsl_symbol_table *symbols)
 {
   sh->symbols = new(sh) glsl_symbol_table;

-   foreach_in_list(ir_instruction, inst, sh->ir) {
-      ir_variable *var;
-      ir_function *func;
-
-      if ((func = inst->as_function()) != NULL) {
-         sh->symbols->add_function(func);
-      } else if ((var = inst->as_variable()) != NULL) {
-         if (var->data.mode != ir_var_temporary)
-            sh->symbols->add_variable(var);
-      }
-   }
+   _mesa_glsl_copy_symbols_from_table(sh->ir, symbols, sh->symbols);
 }


@@ -2277,8 +2273,7 @@ link_intrastage_shaders(void *mem_ctx,
      return NULL;
   }

-   if (!prog->data->cache_fallback)
-      _mesa_reference_shader_program_data(ctx, &gl_prog->sh.data, prog->data);
+   _mesa_reference_shader_program_data(ctx, &gl_prog->sh.data, prog->data);

   /* Don't use _mesa_reference_program() just take ownership */
   linked->Program = gl_prog;
@@ -2298,7 +2293,7 @@ link_intrastage_shaders(void *mem_ctx,

   link_bindless_layout_qualifiers(prog, gl_prog, shader_list, num_shaders);

-   populate_symbol_table(linked);
+   populate_symbol_table(linked, shader_list[0]->symbols);

   /* The pointer to the main function in the final linked shader (i.e., the
    * copy of the original shader that contained the main function).
@@ -2336,35 +2331,33 @@ link_intrastage_shaders(void *mem_ctx,
   v.run(linked->ir);
   v.fixup_unnamed_interface_types();

-   if (!prog->data->cache_fallback) {
-      /* Link up uniform blocks defined within this stage. */
-      link_uniform_blocks(mem_ctx, ctx, prog, linked, &ubo_blocks,
-                          &num_ubo_blocks, &ssbo_blocks, &num_ssbo_blocks);
+   /* Link up uniform blocks defined within this stage. */
+   link_uniform_blocks(mem_ctx, ctx, prog, linked, &ubo_blocks,
+                       &num_ubo_blocks, &ssbo_blocks, &num_ssbo_blocks);

-      if (!prog->data->LinkStatus) {
-         _mesa_delete_linked_shader(ctx, linked);
-         return NULL;
-      }
-
-      /* Copy ubo blocks to linked shader list */
-      linked->Program->sh.UniformBlocks =
-         ralloc_array(linked, gl_uniform_block *, num_ubo_blocks);
-      ralloc_steal(linked, ubo_blocks);
-      for (unsigned i = 0; i < num_ubo_blocks; i++) {
-         linked->Program->sh.UniformBlocks[i] = &ubo_blocks[i];
-      }
-      linked->Program->info.num_ubos = num_ubo_blocks;
-
-      /* Copy ssbo blocks to linked shader list */
-      linked->Program->sh.ShaderStorageBlocks =
-         ralloc_array(linked, gl_uniform_block *, num_ssbo_blocks);
-      ralloc_steal(linked, ssbo_blocks);
-      for (unsigned i = 0; i < num_ssbo_blocks; i++) {
-         linked->Program->sh.ShaderStorageBlocks[i] = &ssbo_blocks[i];
-      }
-      linked->Program->info.num_ssbos = num_ssbo_blocks;
+   if (!prog->data->LinkStatus) {
+      _mesa_delete_linked_shader(ctx, linked);
+      return NULL;
   }

+   /* Copy ubo blocks to linked shader list */
+   linked->Program->sh.UniformBlocks =
+      ralloc_array(linked, gl_uniform_block *, num_ubo_blocks);
+   ralloc_steal(linked, ubo_blocks);
+   for (unsigned i = 0; i < num_ubo_blocks; i++) {
+      linked->Program->sh.UniformBlocks[i] = &ubo_blocks[i];
+   }
+   linked->Program->info.num_ubos = num_ubo_blocks;
+
+   /* Copy ssbo blocks to linked shader list */
+   linked->Program->sh.ShaderStorageBlocks =
+      ralloc_array(linked, gl_uniform_block *, num_ssbo_blocks);
+   ralloc_steal(linked, ssbo_blocks);
+   for (unsigned i = 0; i < num_ssbo_blocks; i++) {
+      linked->Program->sh.ShaderStorageBlocks[i] = &ssbo_blocks[i];
+   }
+   linked->Program->info.num_ssbos = num_ssbo_blocks;
+
   /* At this point linked should contain all of the linked IR, so
    * validate it to make sure nothing went wrong.
    */
@@ -2384,6 +2377,9 @@ link_intrastage_shaders(void *mem_ctx,
   if (ctx->Const.VertexID_is_zero_based)
      lower_vertex_id(linked);

+   if (ctx->Const.LowerCsDerivedVariables)
+      lower_cs_derived(linked);
+
 #ifdef DEBUG
   /* Compute the source checksum. */
   linked->SourceChecksum = 0;
@@ -2661,12 +2657,14 @@ assign_attribute_or_color_locations(void *mem_ctx,
   } to_assign[32];
   assert(max_index <= 32);

-   /* Temporary array for the set of attributes that have locations assigned.
+   /* Temporary array for the set of attributes that have locations assigned,
+    * for the purpose of checking overlapping slots/components of (non-ES)
+    * fragment shader outputs.
    */
-   ir_variable *assigned[16];
+   ir_variable *assigned[12 * 4]; /* (max # of FS outputs) * # components */
+   unsigned assigned_attr = 0;

   unsigned num_attr = 0;
-   unsigned assigned_attr = 0;

   foreach_in_list(ir_instruction, node, sh->ir) {
      ir_variable *const var = node->as_variable();
@@ -2905,6 +2903,18 @@ assign_attribute_or_color_locations(void *mem_ctx,
               }
            }

+            if (target_index == MESA_SHADER_FRAGMENT && !prog->IsES) {
+               /* Only track assigned variables for non-ES fragment shaders
+                * to avoid overflowing the array.
+                *
+                * At most one variable per fragment output component should
+                * reach this.
+                */
+               assert(assigned_attr < ARRAY_SIZE(assigned));
+               assigned[assigned_attr] = var;
+               assigned_attr++;
+            }
+
            used_locations |= (use_mask << attr);

            /* From the GL 4.5 core spec, section 11.1.1 (Vertex Attributes):
@@ -2931,9 +2941,6 @@ assign_attribute_or_color_locations(void *mem_ctx,
               double_storage_locations |= (use_mask << attr);
         }

-         assigned[assigned_attr] = var;
-         assigned_attr++;
-
         continue;
      }

@@ -3613,7 +3620,7 @@ add_program_resource(struct gl_shader_program *prog,
      return true;

   prog->data->ProgramResourceList =
-      reralloc(prog,
+      reralloc(prog->data,
               prog->data->ProgramResourceList,
               gl_program_resource,
               prog->data->NumProgramResourceList + 1);
@@ -3808,6 +3815,7 @@ add_shader_variable(const struct gl_context *ctx,
                    GLenum programInterface, ir_variable *var,
                    const char *name, const glsl_type *type,
                    bool use_implicit_location, int location,
+                    bool inouts_share_location,
                    const glsl_type *outermost_struct_type = NULL)
 {
   const glsl_type *interface_type = var->get_interface_type();
@@ -3870,7 +3878,7 @@ add_shader_variable(const struct gl_context *ctx,
                                  stage_mask, programInterface,
                                  var, field_name, field->type,
                                  use_implicit_location, field_location,
-                                  outermost_struct_type))
+                                  false, outermost_struct_type))
            return false;

         field_location += field->type->count_attribute_slots(false);
@@ -3878,6 +3886,43 @@ add_shader_variable(const struct gl_context *ctx,
      return true;
   }

+   case GLSL_TYPE_ARRAY: {
+      /* The ARB_program_interface_query spec says:
+       *
+       *     "For an active variable declared as an array of basic types, a
+       *      single entry will be generated, with its name string formed by
+       *      concatenating the name of the array and the string "[0]"."
+       *
+       *     "For an active variable declared as an array of an aggregate data
+       *      type (structures or arrays), a separate entry will be generated
+       *      for each active array element, unless noted immediately below.
+       *      The name of each entry is formed by concatenating the name of
+       *      the array, the "[" character, an integer identifying the element
+       *      number, and the "]" character.  These enumeration rules are
+       *      applied recursively, treating each enumerated array element as a
+       *      separate active variable."
+       */
+      const struct glsl_type *array_type = type->fields.array;
+      if (array_type->base_type == GLSL_TYPE_STRUCT ||
+          array_type->base_type == GLSL_TYPE_ARRAY) {
+         unsigned elem_location = location;
+         unsigned stride = inouts_share_location ? 0 :
+                           array_type->count_attribute_slots(false);
+         for (unsigned i = 0; i < type->length; i++) {
+            char *elem = ralloc_asprintf(shProg, "%s[%d]", name, i);
+            if (!add_shader_variable(ctx, shProg, resource_set,
+                                     stage_mask, programInterface,
+                                     var, elem, array_type,
+                                     use_implicit_location, elem_location,
+                                     false, outermost_struct_type))
+               return false;
+            elem_location += stride;
+         }
+         return true;
+      }
+      /* fallthrough */
+   }
+
   default: {
      /* The ARB_program_interface_query spec says:
       *
@@ -3898,6 +3943,20 @@ add_shader_variable(const struct gl_context *ctx,
   }
 }

+static bool
+inout_has_same_location(const ir_variable *var, unsigned stage)
+{
+   if (!var->data.patch &&
+       ((var->data.mode == ir_var_shader_out &&
+         stage == MESA_SHADER_TESS_CTRL) ||
+        (var->data.mode == ir_var_shader_in &&
+         (stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_TESS_EVAL ||
+          stage == MESA_SHADER_GEOMETRY))))
+      return true;
+   else
+      return false;
+}
+
 static bool
 add_interface_variables(const struct gl_context *ctx,
                        struct gl_shader_program *shProg,
@@ -3954,7 +4013,8 @@ add_interface_variables(const struct gl_context *ctx,
      if (!add_shader_variable(ctx, shProg, resource_set,
                               1 << stage, programInterface,
                               var, var->name, var->type, vs_input_or_fs_output,
-                               var->data.location - loc_bias))
+                               var->data.location - loc_bias,
+                               inout_has_same_location(var, stage)))
         return false;
   }
   return true;
@@ -3992,7 +4052,8 @@ add_packed_varyings(const struct gl_context *ctx,
            if (!add_shader_variable(ctx, shProg, resource_set,
                                     stage_mask,
                                     iface, var, var->name, var->type, false,
-                                     var->data.location - VARYING_SLOT_VAR0))
+                                     var->data.location - VARYING_SLOT_VAR0,
+                                     inout_has_same_location(var, stage)))
               return false;
         }
      }
@@ -4018,7 +4079,8 @@ add_fragdata_arrays(const struct gl_context *ctx,
         if (!add_shader_variable(ctx, shProg, resource_set,
                                  1 << MESA_SHADER_FRAGMENT,
                                  GL_PROGRAM_OUTPUT, var, var->name, var->type,
-                                  true, var->data.location - FRAG_RESULT_DATA0))
+                                  true, var->data.location - FRAG_RESULT_DATA0,
+                                  false))
            return false;
      }
   }
@@ -4581,14 +4643,12 @@ link_and_validate_uniforms(struct gl_context *ctx,
   update_array_sizes(prog);
   link_assign_uniform_locations(prog, ctx);

-   if (!prog->data->cache_fallback) {
-      link_assign_atomic_counter_resources(ctx, prog);
-      link_calculate_subroutine_compat(prog);
-      check_resources(ctx, prog);
-      check_subroutine_resources(prog);
-      check_image_resources(ctx, prog);
-      link_check_atomic_counter_resources(ctx, prog);
-   }
+   link_assign_atomic_counter_resources(ctx, prog);
+   link_calculate_subroutine_compat(prog);
+   check_resources(ctx, prog);
+   check_subroutine_resources(prog);
+   check_image_resources(ctx, prog);
+   link_check_atomic_counter_resources(ctx, prog);
 }

 static bool
@@ -4902,10 +4962,8 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
      last = i;
   }

-   if (!prog->data->cache_fallback) {
-      check_explicit_uniform_locations(ctx, prog);
-      link_assign_subroutine_types(prog);
-   }
+   check_explicit_uniform_locations(ctx, prog);
+   link_assign_subroutine_types(prog);

   if (!prog->data->LinkStatus)
      goto done;
@@ -4960,15 +5018,13 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
   if (prog->SeparateShader)
      disable_varying_optimizations_for_sso(prog);

-   if (!prog->data->cache_fallback) {
-      /* Process UBOs */
-      if (!interstage_cross_validate_uniform_blocks(prog, false))
-         goto done;
+   /* Process UBOs */
+   if (!interstage_cross_validate_uniform_blocks(prog, false))
+      goto done;

-      /* Process SSBOs */
-      if (!interstage_cross_validate_uniform_blocks(prog, true))
-         goto done;
-   }
+   /* Process SSBOs */
+   if (!interstage_cross_validate_uniform_blocks(prog, true))
+      goto done;

   /* Do common optimization before assigning storage for attributes,
    * uniforms, and varyings.  Later optimization could possibly make
--- a/src/compiler/glsl/lower_cs_derived.cpp
+++ b/src/compiler/glsl/lower_cs_derived.cpp
@@ -0,0 +1,234 @@
+/*
+ * Copyright © 2017 Ilia Mirkin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file lower_cs_derived.cpp
+ *
+ * For hardware that does not support the gl_GlobalInvocationID and
+ * gl_LocalInvocationIndex system values, replace them with fresh
+ * globals. Note that we can't rely on gl_WorkGroupSize or
+ * gl_LocalGroupSizeARB being available, since they may only have been defined
+ * in a non-main shader.
+ *
+ * [ This can happen if only a secondary shader has the layout(local_size_*)
+ *   declaration. ]
+ *
+ * This is meant to be run post-linking.
+ */
+
+#include "glsl_symbol_table.h"
+#include "ir_hierarchical_visitor.h"
+#include "ir.h"
+#include "ir_builder.h"
+#include "linker.h"
+#include "program/prog_statevars.h"
+#include "builtin_functions.h"
+
+using namespace ir_builder;
+
+namespace {
+
+class lower_cs_derived_visitor : public ir_hierarchical_visitor {
+public:
+   explicit lower_cs_derived_visitor(gl_linked_shader *shader)
+      : progress(false),
+        shader(shader),
+        local_size_variable(shader->Program->info.cs.local_size_variable),
+        gl_WorkGroupSize(NULL),
+        gl_WorkGroupID(NULL),
+        gl_LocalInvocationID(NULL),
+        gl_GlobalInvocationID(NULL),
+        gl_LocalInvocationIndex(NULL)
+   {
+      main_sig = _mesa_get_main_function_signature(shader->symbols);
+      assert(main_sig);
+   }
+
+   virtual ir_visitor_status visit(ir_dereference_variable *);
+
+   ir_variable *add_system_value(
+         int slot, const glsl_type *type, const char *name);
+   void find_sysvals();
+   void make_gl_GlobalInvocationID();
+   void make_gl_LocalInvocationIndex();
+
+   bool progress;
+
+private:
+   gl_linked_shader *shader;
+   bool local_size_variable;
+   ir_function_signature *main_sig;
+
+   ir_rvalue *gl_WorkGroupSize;
+   ir_variable *gl_WorkGroupID;
+   ir_variable *gl_LocalInvocationID;
+
+   ir_variable *gl_GlobalInvocationID;
+   ir_variable *gl_LocalInvocationIndex;
+};
+
+} /* anonymous namespace */
+
+ir_variable *
+lower_cs_derived_visitor::add_system_value(
+      int slot, const glsl_type *type, const char *name)
+{
+   ir_variable *var = new(shader) ir_variable(type, name, ir_var_system_value);
+   var->data.how_declared = ir_var_declared_implicitly;
+   var->data.read_only = true;
+   var->data.location = slot;
+   var->data.explicit_location = true;
+   var->data.explicit_index = 0;
+   shader->ir->push_head(var);
+
+   return var;
+}
+
+void
+lower_cs_derived_visitor::find_sysvals()
+{
+   if (gl_WorkGroupSize != NULL)
+      return;
+
+   ir_variable *WorkGroupSize;
+   if (local_size_variable)
+      WorkGroupSize = shader->symbols->get_variable("gl_LocalGroupSizeARB");
+   else
+      WorkGroupSize = shader->symbols->get_variable("gl_WorkGroupSize");
+   if (WorkGroupSize)
+      gl_WorkGroupSize = new(shader) ir_dereference_variable(WorkGroupSize);
+   gl_WorkGroupID = shader->symbols->get_variable("gl_WorkGroupID");
+   gl_LocalInvocationID = shader->symbols->get_variable("gl_LocalInvocationID");
+
+   /*
+    * These may be missing due to either dead code elimination, or, in the
+    * case of the group size, due to the layout being declared in a non-main
+    * shader. Re-create them.
+    */
+
+   if (!gl_WorkGroupID)
+      gl_WorkGroupID = add_system_value(
+            SYSTEM_VALUE_WORK_GROUP_ID, glsl_type::uvec3_type, "gl_WorkGroupID");
+   if (!gl_LocalInvocationID)
+      gl_LocalInvocationID = add_system_value(
+            SYSTEM_VALUE_LOCAL_INVOCATION_ID, glsl_type::uvec3_type,
+            "gl_LocalInvocationID");
+   if (!WorkGroupSize) {
+      if (local_size_variable) {
+         gl_WorkGroupSize = new(shader) ir_dereference_variable(
+               add_system_value(
+                     SYSTEM_VALUE_LOCAL_GROUP_SIZE, glsl_type::uvec3_type,
+                     "gl_LocalGroupSizeARB"));
+      } else {
+         ir_constant_data data;
+         memset(&data, 0, sizeof(data));
+         for (int i = 0; i < 3; i++)
+            data.u[i] = shader->Program->info.cs.local_size[i];
+         gl_WorkGroupSize = new(shader) ir_constant(glsl_type::uvec3_type, &data);
+      }
+   }
+}
+
+void
+lower_cs_derived_visitor::make_gl_GlobalInvocationID()
+{
+   if (gl_GlobalInvocationID != NULL)
+      return;
+
+   find_sysvals();
+
+   /* gl_GlobalInvocationID =
+    *    gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID
+    */
+   gl_GlobalInvocationID = new(shader) ir_variable(
+         glsl_type::uvec3_type, "__GlobalInvocationID", ir_var_temporary);
+   shader->ir->push_head(gl_GlobalInvocationID);
+
+   ir_instruction *inst =
+      assign(gl_GlobalInvocationID,
+             add(mul(gl_WorkGroupID, gl_WorkGroupSize->clone(shader, NULL)),
+                 gl_LocalInvocationID));
+   main_sig->body.push_head(inst);
+}
+
+void
+lower_cs_derived_visitor::make_gl_LocalInvocationIndex()
+{
+   if (gl_LocalInvocationIndex != NULL)
+      return;
+
+   find_sysvals();
+
+   /* gl_LocalInvocationIndex =
+    *    gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y +
+    *    gl_LocalInvocationID.y * gl_WorkGroupSize.x +
+    *    gl_LocalInvocationID.x;
+    */
+   gl_LocalInvocationIndex = new(shader)
+      ir_variable(glsl_type::uint_type, "__LocalInvocationIndex", ir_var_temporary);
+   shader->ir->push_head(gl_LocalInvocationIndex);
+
+   ir_expression *index_z =
+      mul(mul(swizzle_z(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize->clone(shader, NULL))),
+          swizzle_y(gl_WorkGroupSize->clone(shader, NULL)));
+   ir_expression *index_y =
+      mul(swizzle_y(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize->clone(shader, NULL)));
+   ir_expression *index_y_plus_z = add(index_y, index_z);
+   operand index_x(swizzle_x(gl_LocalInvocationID));
+   ir_expression *index_x_plus_y_plus_z = add(index_y_plus_z, index_x);
+   ir_instruction *inst =
+      assign(gl_LocalInvocationIndex, index_x_plus_y_plus_z);
+   main_sig->body.push_head(inst);
+}
+
+ir_visitor_status
+lower_cs_derived_visitor::visit(ir_dereference_variable *ir)
+{
+   if (ir->var->data.mode == ir_var_system_value &&
+       ir->var->data.location == SYSTEM_VALUE_GLOBAL_INVOCATION_ID) {
+      make_gl_GlobalInvocationID();
+      ir->var = gl_GlobalInvocationID;
+      progress = true;
+   }
+
+   if (ir->var->data.mode == ir_var_system_value &&
+       ir->var->data.location == SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) {
+      make_gl_LocalInvocationIndex();
+      ir->var = gl_LocalInvocationIndex;
+      progress = true;
+   }
+
+   return visit_continue;
+}
+
+bool
+lower_cs_derived(gl_linked_shader *shader)
+{
+   if (shader->Stage != MESA_SHADER_COMPUTE)
+      return false;
+
+   lower_cs_derived_visitor v(shader);
+   v.run(shader->ir);
+
+   return v.progress;
+}
--- a/src/compiler/glsl/lower_instructions.cpp
+++ b/src/compiler/glsl/lower_instructions.cpp
@@ -358,13 +358,21 @@ lower_instructions_visitor::ldexp_to_arith(ir_expression *ir)
    * into
    *
    *    extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift);
-    *    resulting_biased_exp = extracted_biased_exp + exp;
+    *    resulting_biased_exp = min(extracted_biased_exp + exp, 255);
    *
-    *    if (resulting_biased_exp < 1 || x == 0.0f) {
-    *       return copysign(0.0, x);
+    *    if (extracted_biased_exp >= 255)
+    *       return x; // +/-inf, NaN
+    *
+    *    sign_mantissa = bitcast_f2u(x) & sign_mantissa_mask;
+    *
+    *    if (min(resulting_biased_exp, extracted_biased_exp) < 1)
+    *       resulting_biased_exp = 0;
+    *    if (resulting_biased_exp >= 255 ||
+    *        min(resulting_biased_exp, extracted_biased_exp) < 1) {
+    *       sign_mantissa &= sign_mask;
    *    }
    *
-    *    return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) |
+    *    return bitcast_u2f(sign_mantissa |
    *                       lshift(i2u(resulting_biased_exp), exp_shift));
    *
    * which we can't actually implement as such, since the GLSL IR doesn't
@@ -372,45 +380,58 @@ lower_instructions_visitor::ldexp_to_arith(ir_expression *ir)
    * using conditional-select:
    *
    *    extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift);
-    *    resulting_biased_exp = extracted_biased_exp + exp;
+    *    resulting_biased_exp = min(extracted_biased_exp + exp, 255);
    *
-    *    is_not_zero_or_underflow = logic_and(nequal(x, 0.0f),
-    *                                         gequal(resulting_biased_exp, 1);
-    *    x = csel(is_not_zero_or_underflow, x, copysign(0.0f, x));
-    *    resulting_biased_exp = csel(is_not_zero_or_underflow,
-    *                                resulting_biased_exp, 0);
+    *    sign_mantissa = bitcast_f2u(x) & sign_mantissa_mask;
    *
-    *    return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) |
-    *                       lshift(i2u(resulting_biased_exp), exp_shift));
+    *    flush_to_zero = lequal(min(resulting_biased_exp, extracted_biased_exp), 0);
+    *    resulting_biased_exp = csel(flush_to_zero, 0, resulting_biased_exp)
+    *    zero_mantissa = logic_or(flush_to_zero,
+    *                             gequal(resulting_biased_exp, 255));
+    *    sign_mantissa = csel(zero_mantissa, sign_mantissa & sign_mask, sign_mantissa);
+    *
+    *    result = sign_mantissa |
+    *             lshift(i2u(resulting_biased_exp), exp_shift));
+    *
+    *    return csel(extracted_biased_exp >= 255, x, bitcast_u2f(result));
+    *
+    * The definition of ldexp in the GLSL spec says:
+    *
+    *    "If this product is too large to be represented in the
+    *     floating-point type, the result is undefined."
+    *
+    * However, the definition of ldexp in the GLSL ES spec does not contain
+    * this sentence, so we do need to handle overflow correctly.
+    *
+    * There is additional language limiting the defined range of exp, but this
+    * is merely to allow implementations that store 2^exp in a temporary
+    * variable.
    */

   const unsigned vec_elem = ir->type->vector_elements;

   /* Types */
   const glsl_type *ivec = glsl_type::get_instance(GLSL_TYPE_INT, vec_elem, 1);
+   const glsl_type *uvec = glsl_type::get_instance(GLSL_TYPE_UINT, vec_elem, 1);
   const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1);

-   /* Constants */
-   ir_constant *zeroi = ir_constant::zero(ir, ivec);
-
-   ir_constant *sign_mask = new(ir) ir_constant(0x80000000u, vec_elem);
-
-   ir_constant *exp_shift = new(ir) ir_constant(23, vec_elem);
-
   /* Temporary variables */
   ir_variable *x = new(ir) ir_variable(ir->type, "x", ir_var_temporary);
   ir_variable *exp = new(ir) ir_variable(ivec, "exp", ir_var_temporary);
-
-   ir_variable *zero_sign_x = new(ir) ir_variable(ir->type, "zero_sign_x",
-                                                  ir_var_temporary);
+   ir_variable *result = new(ir) ir_variable(uvec, "result", ir_var_temporary);

   ir_variable *extracted_biased_exp =
      new(ir) ir_variable(ivec, "extracted_biased_exp", ir_var_temporary);
   ir_variable *resulting_biased_exp =
      new(ir) ir_variable(ivec, "resulting_biased_exp", ir_var_temporary);

-   ir_variable *is_not_zero_or_underflow =
-      new(ir) ir_variable(bvec, "is_not_zero_or_underflow", ir_var_temporary);
+   ir_variable *sign_mantissa =
+      new(ir) ir_variable(uvec, "sign_mantissa", ir_var_temporary);
+
+   ir_variable *flush_to_zero =
+      new(ir) ir_variable(bvec, "flush_to_zero", ir_var_temporary);
+   ir_variable *zero_mantissa =
+      new(ir) ir_variable(bvec, "zero_mantissa", ir_var_temporary);

   ir_instruction &i = *base_ir;

@@ -423,58 +444,82 @@ lower_instructions_visitor::ldexp_to_arith(ir_expression *ir)
   /* Extract the biased exponent from <x>. */
   i.insert_before(extracted_biased_exp);
   i.insert_before(assign(extracted_biased_exp,
-                          rshift(bitcast_f2i(abs(x)), exp_shift)));
+                          rshift(bitcast_f2i(abs(x)),
+                                 new(ir) ir_constant(23, vec_elem))));

+   /* The definition of ldexp in the GLSL 4.60 spec says:
+    *
+    *    "If exp is greater than +128 (single-precision) or +1024
+    *     (double-precision), the value returned is undefined. If exp is less
+    *     than -126 (single-precision) or -1022 (double-precision), the value
+    *     returned may be flushed to zero."
+    *
+    * So we do not have to guard against the possibility of addition overflow,
+    * which could happen when exp is close to INT_MAX. Addition underflow
+    * cannot happen (the worst case is 0 + (-INT_MAX)).
+    */
   i.insert_before(resulting_biased_exp);
   i.insert_before(assign(resulting_biased_exp,
-                          add(extracted_biased_exp, exp)));
+                          min2(add(extracted_biased_exp, exp),
+                               new(ir) ir_constant(255, vec_elem))));

-   /* Test if result is ±0.0, subnormal, or underflow by checking if the
-    * resulting biased exponent would be less than 0x1. If so, the result is
-    * 0.0 with the sign of x. (Actually, invert the conditions so that
-    * immediate values are the second arguments, which is better for i965)
-    */
-   i.insert_before(zero_sign_x);
-   i.insert_before(assign(zero_sign_x,
-                          bitcast_u2f(bit_and(bitcast_f2u(x), sign_mask))));
+   i.insert_before(sign_mantissa);
+   i.insert_before(assign(sign_mantissa,
+                          bit_and(bitcast_f2u(x),
+                                  new(ir) ir_constant(0x807fffffu, vec_elem))));

-   i.insert_before(is_not_zero_or_underflow);
-   i.insert_before(assign(is_not_zero_or_underflow,
-                          logic_and(nequal(x, new(ir) ir_constant(0.0f, vec_elem)),
-                                    gequal(resulting_biased_exp,
-                                           new(ir) ir_constant(0x1, vec_elem)))));
-   i.insert_before(assign(x, csel(is_not_zero_or_underflow,
-                                  x, zero_sign_x)));
-   i.insert_before(assign(resulting_biased_exp,
-                          csel(is_not_zero_or_underflow,
-                               resulting_biased_exp, zeroi)));
-
-   /* We could test for overflows by checking if the resulting biased exponent
-    * would be greater than 0xFE. Turns out we don't need to because the GLSL
-    * spec says:
+   /* We flush to zero if the original or resulting biased exponent is 0,
+    * indicating a +/-0.0 or subnormal input or output.
    *
-    *    "If this product is too large to be represented in the
-    *     floating-point type, the result is undefined."
+    * The mantissa is set to 0 if the resulting biased exponent is 255, since
+    * an overflow should produce a +/-inf result.
+    *
+    * Note that NaN inputs are handled separately.
    */
+   i.insert_before(flush_to_zero);
+   i.insert_before(assign(flush_to_zero,
+                          lequal(min2(resulting_biased_exp,
+                                      extracted_biased_exp),
+                                 ir_constant::zero(ir, ivec))));
+   i.insert_before(assign(resulting_biased_exp,
+                          csel(flush_to_zero,
+                               ir_constant::zero(ir, ivec),
+                               resulting_biased_exp)));

-   ir_constant *exp_shift_clone = exp_shift->clone(ir, NULL);
+   i.insert_before(zero_mantissa);
+   i.insert_before(assign(zero_mantissa,
+                          logic_or(flush_to_zero,
+                                   equal(resulting_biased_exp,
+                                         new(ir) ir_constant(255, vec_elem)))));
+   i.insert_before(assign(sign_mantissa,
+                          csel(zero_mantissa,
+                               bit_and(sign_mantissa,
+                                       new(ir) ir_constant(0x80000000u, vec_elem)),
+                               sign_mantissa)));

   /* Don't generate new IR that would need to be lowered in an additional
    * pass.
    */
+   i.insert_before(result);
   if (!lowering(INSERT_TO_SHIFTS)) {
-      ir_constant *exp_width = new(ir) ir_constant(8, vec_elem);
-      ir->operation = ir_unop_bitcast_i2f;
-      ir->operands[0] = bitfield_insert(bitcast_f2i(x), resulting_biased_exp,
-                                        exp_shift_clone, exp_width);
-      ir->operands[1] = NULL;
+      i.insert_before(assign(result,
+                             bitfield_insert(sign_mantissa,
+                                             i2u(resulting_biased_exp),
+                                             new(ir) ir_constant(23u, vec_elem),
+                                             new(ir) ir_constant(8u, vec_elem))));
   } else {
-      ir_constant *sign_mantissa_mask = new(ir) ir_constant(0x807fffffu, vec_elem);
-      ir->operation = ir_unop_bitcast_u2f;
-      ir->operands[0] = bit_or(bit_and(bitcast_f2u(x), sign_mantissa_mask),
-                               lshift(i2u(resulting_biased_exp), exp_shift_clone));
+      i.insert_before(assign(result,
+                             bit_or(sign_mantissa,
+                                    lshift(i2u(resulting_biased_exp),
+                                           new(ir) ir_constant(23, vec_elem)))));
   }

+   ir->operation = ir_triop_csel;
+   ir->operands[0] = gequal(extracted_biased_exp,
+                            new(ir) ir_constant(255, vec_elem));
+   ir->operands[1] = new(ir) ir_dereference_variable(x);
+   ir->operands[2] = bitcast_u2f(result);
+
   this->progress = true;
 }

--- a/src/compiler/glsl/lower_named_interface_blocks.cpp
+++ b/src/compiler/glsl/lower_named_interface_blocks.cpp
@@ -115,6 +115,7 @@ public:
   void run(exec_list *instructions);

   virtual ir_visitor_status visit_leave(ir_assignment *);
+   virtual ir_visitor_status visit_leave(ir_expression *);
   virtual void handle_rvalue(ir_rvalue **rvalue);
 };

@@ -238,6 +239,23 @@ flatten_named_interface_blocks_declarations::visit_leave(ir_assignment *ir)
   return rvalue_visit(ir);
 }

+ir_visitor_status
+flatten_named_interface_blocks_declarations::visit_leave(ir_expression *ir)
+{
+   ir_visitor_status status = rvalue_visit(ir);
+
+   if (ir->operation == ir_unop_interpolate_at_centroid ||
+       ir->operation == ir_binop_interpolate_at_offset ||
+       ir->operation == ir_binop_interpolate_at_sample) {
+      const ir_rvalue *val = ir->operands[0];
+
+      /* This disables varying packing for this input. */
+      val->variable_referenced()->data.must_be_shader_input = 1;
+   }
+
+   return status;
+}
+
 void
 flatten_named_interface_blocks_declarations::handle_rvalue(ir_rvalue **rvalue)
 {
--- a/src/compiler/glsl/lower_vec_index_to_cond_assign.cpp
+++ b/src/compiler/glsl/lower_vec_index_to_cond_assign.cpp
@@ -151,7 +151,36 @@ ir_vec_index_to_cond_assign_visitor::convert_vector_extract_to_cond_assign(ir_rv
 {
   ir_expression *const expr = ir->as_expression();

-   if (expr == NULL || expr->operation != ir_binop_vector_extract)
+   if (expr == NULL)
+      return ir;
+
+   if (expr->operation == ir_unop_interpolate_at_centroid ||
+       expr->operation == ir_binop_interpolate_at_offset ||
+       expr->operation == ir_binop_interpolate_at_sample) {
+      /* Lower interpolateAtXxx(some_vec[idx], ...) to
+       * interpolateAtXxx(some_vec, ...)[idx] before lowering to conditional
+       * assignments, to maintain the rule that the interpolant is an l-value
+       * referring to a (part of a) shader input.
+       *
+       * This is required when idx is dynamic (otherwise it gets lowered to
+       * a swizzle).
+       */
+      ir_expression *const interpolant = expr->operands[0]->as_expression();
+      if (!interpolant || interpolant->operation != ir_binop_vector_extract)
+         return ir;
+
+      ir_rvalue *vec_input = interpolant->operands[0];
+      ir_expression *const vec_interpolate =
+         new(base_ir) ir_expression(expr->operation, vec_input->type,
+                                    vec_input, expr->operands[1]);
+
+      return convert_vec_index_to_cond_assign(ralloc_parent(ir),
+                                              vec_interpolate,
+                                              interpolant->operands[1],
+                                              ir->type);
+   }
+
+   if (expr->operation != ir_binop_vector_extract)
      return ir;

   return convert_vec_index_to_cond_assign(ralloc_parent(ir),
--- a/src/compiler/glsl/opt_constant_propagation.cpp
+++ b/src/compiler/glsl/opt_constant_propagation.cpp
@@ -237,6 +237,12 @@ ir_constant_propagation_visitor::constant_propagation(ir_rvalue **rvalue) {
      case GLSL_TYPE_BOOL:
 	 data.b[i] = found->constant->value.b[rhs_channel];
 	 break;
+      case GLSL_TYPE_UINT64:
+	 data.u64[i] = found->constant->value.u64[rhs_channel];
+	 break;
+      case GLSL_TYPE_INT64:
+	 data.i64[i] = found->constant->value.i64[rhs_channel];
+	 break;
      default:
 	 assert(!"not reached");
 	 break;
--- a/src/compiler/glsl/opt_dead_builtin_variables.cpp
+++ b/src/compiler/glsl/opt_dead_builtin_variables.cpp
@@ -62,23 +62,6 @@ optimize_dead_builtin_variables(exec_list *instructions,
       * information, so removing these variables from the user shader will
       * cause problems later.
       *
-       * For compute shaders, gl_GlobalInvocationID has some dependencies, so
-       * we avoid removing these dependencies.
-       *
-       * We also avoid removing gl_GlobalInvocationID at this stage because it
-       * might be used by a linked shader. In this case it still needs to be
-       * initialized by the main function.
-       *
-       *    gl_GlobalInvocationID =
-       *       gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID
-       *
-       * Similarly, we initialize gl_LocalInvocationIndex in the main function:
-       *
-       *    gl_LocalInvocationIndex =
-       *       gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y +
-       *       gl_LocalInvocationID.y * gl_WorkGroupSize.x +
-       *       gl_LocalInvocationID.x;
-       *
       * Matrix uniforms with "Transpose" are not eliminated because there's
       * an optimization pass that can turn references to the regular matrix
       * into references to the transpose matrix.  Eliminating the transpose
@@ -90,11 +73,6 @@ optimize_dead_builtin_variables(exec_list *instructions,
       */
      if (strcmp(var->name, "gl_ModelViewProjectionMatrix") == 0
          || strcmp(var->name, "gl_Vertex") == 0
-          || strcmp(var->name, "gl_WorkGroupID") == 0
-          || strcmp(var->name, "gl_WorkGroupSize") == 0
-          || strcmp(var->name, "gl_LocalInvocationID") == 0
-          || strcmp(var->name, "gl_GlobalInvocationID") == 0
-          || strcmp(var->name, "gl_LocalInvocationIndex") == 0
          || strstr(var->name, "Transpose") != NULL)
         continue;

--- a/src/compiler/glsl/shader_cache.cpp
+++ b/src/compiler/glsl/shader_cache.cpp
@@ -59,7 +59,7 @@
 #include "program.h"
 #include "shader_cache.h"
 #include "util/mesa-sha1.h"
-#include "util/string_to_uint_map.h"
+#include "string_to_uint_map.h"

 extern "C" {
 #include "main/enums.h"
@@ -74,11 +74,26 @@ compile_shaders(struct gl_context *ctx, struct gl_shader_program *prog) {
   }
 }

+static void
+get_struct_type_field_and_pointer_sizes(size_t *s_field_size,
+                                        size_t *s_field_ptrs)
+{
+   *s_field_size = sizeof(glsl_struct_field);
+   *s_field_ptrs =
+     sizeof(((glsl_struct_field *)0)->type) +
+     sizeof(((glsl_struct_field *)0)->name);
+}
+
 static void
 encode_type_to_blob(struct blob *blob, const glsl_type *type)
 {
   uint32_t encoding;

+   if (!type) {
+      blob_write_uint32(blob, 0);
+      return;
+   }
+
   switch (type->base_type) {
   case GLSL_TYPE_UINT:
   case GLSL_TYPE_INT:
@@ -122,11 +137,18 @@ encode_type_to_blob(struct blob *blob, const glsl_type *type)
      blob_write_uint32(blob, (type->base_type) << 24);
      blob_write_string(blob, type->name);
      blob_write_uint32(blob, type->length);
-      blob_write_bytes(blob, type->fields.structure,
-                       sizeof(glsl_struct_field) * type->length);
+
+      size_t s_field_size, s_field_ptrs;
+      get_struct_type_field_and_pointer_sizes(&s_field_size, &s_field_ptrs);
+
      for (unsigned i = 0; i < type->length; i++) {
         encode_type_to_blob(blob, type->fields.structure[i].type);
         blob_write_string(blob, type->fields.structure[i].name);
+
+         /* Write the struct field skipping the pointers */
+         blob_write_bytes(blob,
+                          ((char *)&type->fields.structure[i]) + s_field_ptrs,
+                          s_field_size - s_field_ptrs);
      }

      if (type->is_interface()) {
@@ -149,6 +171,11 @@ static const glsl_type *
 decode_type_from_blob(struct blob_reader *blob)
 {
   uint32_t u = blob_read_uint32(blob);
+
+   if (u == 0) {
+      return NULL;
+   }
+
   glsl_base_type base_type = (glsl_base_type) (u >> 24);

   switch (base_type) {
@@ -182,22 +209,33 @@ decode_type_from_blob(struct blob_reader *blob)
   case GLSL_TYPE_INTERFACE: {
      char *name = blob_read_string(blob);
      unsigned num_fields = blob_read_uint32(blob);
-      glsl_struct_field *fields = (glsl_struct_field *)
-         blob_read_bytes(blob, sizeof(glsl_struct_field) * num_fields);
+
+      size_t s_field_size, s_field_ptrs;
+      get_struct_type_field_and_pointer_sizes(&s_field_size, &s_field_ptrs);
+
+      glsl_struct_field *fields =
+         (glsl_struct_field *) malloc(s_field_size * num_fields);
      for (unsigned i = 0; i < num_fields; i++) {
         fields[i].type = decode_type_from_blob(blob);
         fields[i].name = blob_read_string(blob);
+
+         blob_copy_bytes(blob, ((uint8_t *) &fields[i]) + s_field_ptrs,
+                         s_field_size - s_field_ptrs);
      }

+      const glsl_type *t;
      if (base_type == GLSL_TYPE_INTERFACE) {
         enum glsl_interface_packing packing =
            (glsl_interface_packing) blob_read_uint32(blob);
         bool row_major = blob_read_uint32(blob);
-         return glsl_type::get_interface_instance(fields, num_fields,
-                                                  packing, row_major, name);
+         t = glsl_type::get_interface_instance(fields, num_fields, packing,
+                                               row_major, name);
      } else {
-         return glsl_type::get_record_instance(fields, num_fields, name);
+         t = glsl_type::get_record_instance(fields, num_fields, name);
      }
+
+      free(fields);
+      return t;
   }
   case GLSL_TYPE_VOID:
   case GLSL_TYPE_ERROR:
@@ -555,6 +593,17 @@ read_xfb(struct blob_reader *metadata, struct gl_shader_program *shProg)
                      MAX_FEEDBACK_BUFFERS);
 }

+static bool
+has_uniform_storage(struct gl_shader_program *prog, unsigned idx)
+{
+   if (!prog->data->UniformStorage[idx].builtin &&
+       !prog->data->UniformStorage[idx].is_shader_storage &&
+       prog->data->UniformStorage[idx].block_index == -1)
+      return true;
+
+   return false;
+}
+
 static void
 write_uniforms(struct blob *metadata, struct gl_shader_program *prog)
 {
@@ -566,8 +615,6 @@ write_uniforms(struct blob *metadata, struct gl_shader_program *prog)
      encode_type_to_blob(metadata, prog->data->UniformStorage[i].type);
      blob_write_uint32(metadata, prog->data->UniformStorage[i].array_elements);
      blob_write_string(metadata, prog->data->UniformStorage[i].name);
-      blob_write_uint32(metadata, prog->data->UniformStorage[i].storage -
-                                  prog->data->UniformDataSlots);
      blob_write_uint32(metadata, prog->data->UniformStorage[i].builtin);
      blob_write_uint32(metadata, prog->data->UniformStorage[i].remap_location);
      blob_write_uint32(metadata, prog->data->UniformStorage[i].block_index);
@@ -586,6 +633,12 @@ write_uniforms(struct blob *metadata, struct gl_shader_program *prog)
                        prog->data->UniformStorage[i].top_level_array_size);
      blob_write_uint32(metadata,
                        prog->data->UniformStorage[i].top_level_array_stride);
+
+     if (has_uniform_storage(prog, i)) {
+         blob_write_uint32(metadata, prog->data->UniformStorage[i].storage -
+                                     prog->data->UniformDataSlots);
+      }
+
      blob_write_bytes(metadata, prog->data->UniformStorage[i].opaque,
                       sizeof(prog->data->UniformStorage[i].opaque));
   }
@@ -597,9 +650,7 @@ write_uniforms(struct blob *metadata, struct gl_shader_program *prog)
    */
   blob_write_uint32(metadata, prog->data->NumHiddenUniforms);
   for (unsigned i = 0; i < prog->data->NumUniformStorage; i++) {
-      if (!prog->data->UniformStorage[i].builtin &&
-          !prog->data->UniformStorage[i].is_shader_storage &&
-          prog->data->UniformStorage[i].block_index == -1) {
+      if (has_uniform_storage(prog, i)) {
         unsigned vec_size =
            prog->data->UniformStorage[i].type->component_slots() *
            MAX2(prog->data->UniformStorage[i].array_elements, 1);
@@ -619,7 +670,7 @@ read_uniforms(struct blob_reader *metadata, struct gl_shader_program *prog)
   prog->data->NumUniformStorage = blob_read_uint32(metadata);
   prog->data->NumUniformDataSlots = blob_read_uint32(metadata);

-   uniforms = rzalloc_array(prog, struct gl_uniform_storage,
+   uniforms = rzalloc_array(prog->data, struct gl_uniform_storage,
                            prog->data->NumUniformStorage);
   prog->data->UniformStorage = uniforms;

@@ -633,7 +684,6 @@ read_uniforms(struct blob_reader *metadata, struct gl_shader_program *prog)
      uniforms[i].type = decode_type_from_blob(metadata);
      uniforms[i].array_elements = blob_read_uint32(metadata);
      uniforms[i].name = ralloc_strdup(prog, blob_read_string (metadata));
-      uniforms[i].storage = data + blob_read_uint32(metadata);
      uniforms[i].builtin = blob_read_uint32(metadata);
      uniforms[i].remap_location = blob_read_uint32(metadata);
      uniforms[i].block_index = blob_read_uint32(metadata);
@@ -651,6 +701,10 @@ read_uniforms(struct blob_reader *metadata, struct gl_shader_program *prog)
      uniforms[i].top_level_array_stride = blob_read_uint32(metadata);
      prog->UniformHash->put(i, uniforms[i].name);

+      if (has_uniform_storage(prog, i)) {
+         uniforms[i].storage = data + blob_read_uint32(metadata);
+      }
+
      memcpy(uniforms[i].opaque,
             blob_read_bytes(metadata, sizeof(uniforms[i].opaque)),
             sizeof(uniforms[i].opaque));
@@ -659,9 +713,7 @@ read_uniforms(struct blob_reader *metadata, struct gl_shader_program *prog)
   /* Restore uniform values. */
   prog->data->NumHiddenUniforms = blob_read_uint32(metadata);
   for (unsigned i = 0; i < prog->data->NumUniformStorage; i++) {
-      if (!prog->data->UniformStorage[i].builtin &&
-          !prog->data->UniformStorage[i].is_shader_storage &&
-          prog->data->UniformStorage[i].block_index == -1) {
+      if (has_uniform_storage(prog, i)) {
         unsigned vec_size =
            prog->data->UniformStorage[i].type->component_slots() *
            MAX2(prog->data->UniformStorage[i].array_elements, 1);
@@ -867,6 +919,18 @@ write_shader_subroutine_index(struct blob *metadata,
   }
 }

+static void
+get_shader_var_and_pointer_sizes(size_t *s_var_size, size_t *s_var_ptrs,
+                                 const gl_shader_variable *var)
+{
+   *s_var_size = sizeof(gl_shader_variable);
+   *s_var_ptrs =
+      sizeof(var->type) +
+      sizeof(var->interface_type) +
+      sizeof(var->outermost_struct_type) +
+      sizeof(var->name);
+}
+
 static void
 write_program_resource_data(struct blob *metadata,
                            struct gl_shader_program *prog,
@@ -878,16 +942,19 @@ write_program_resource_data(struct blob *metadata,
   case GL_PROGRAM_INPUT:
   case GL_PROGRAM_OUTPUT: {
      const gl_shader_variable *var = (gl_shader_variable *)res->Data;
-      blob_write_bytes(metadata, var, sizeof(gl_shader_variable));
+
      encode_type_to_blob(metadata, var->type);
-
-      if (var->interface_type)
-         encode_type_to_blob(metadata, var->interface_type);
-
-      if (var->outermost_struct_type)
-         encode_type_to_blob(metadata, var->outermost_struct_type);
+      encode_type_to_blob(metadata, var->interface_type);
+      encode_type_to_blob(metadata, var->outermost_struct_type);

      blob_write_string(metadata, var->name);
+
+      size_t s_var_size, s_var_ptrs;
+      get_shader_var_and_pointer_sizes(&s_var_size, &s_var_ptrs, var);
+
+      /* Write gl_shader_variable skipping over the pointers */
+      blob_write_bytes(metadata, ((char *)var) + s_var_ptrs,
+                       s_var_size - s_var_ptrs);
      break;
   }
   case GL_UNIFORM_BLOCK:
@@ -978,17 +1045,18 @@ read_program_resource_data(struct blob_reader *metadata,
   case GL_PROGRAM_OUTPUT: {
      gl_shader_variable *var = ralloc(prog, struct gl_shader_variable);

-      blob_copy_bytes(metadata, (uint8_t *) var, sizeof(gl_shader_variable));
      var->type = decode_type_from_blob(metadata);
-
-      if (var->interface_type)
-         var->interface_type = decode_type_from_blob(metadata);
-
-      if (var->outermost_struct_type)
-         var->outermost_struct_type = decode_type_from_blob(metadata);
+      var->interface_type = decode_type_from_blob(metadata);
+      var->outermost_struct_type = decode_type_from_blob(metadata);

      var->name = ralloc_strdup(prog, blob_read_string(metadata));

+      size_t s_var_size, s_var_ptrs;
+      get_shader_var_and_pointer_sizes(&s_var_size, &s_var_ptrs, var);
+
+      blob_copy_bytes(metadata, ((uint8_t *) var) + s_var_ptrs,
+                      s_var_size - s_var_ptrs);
+
      res->Data = var;
      break;
   }
@@ -1058,7 +1126,7 @@ read_program_resource_list(struct blob_reader *metadata,
   prog->data->NumProgramResourceList = blob_read_uint32(metadata);

   prog->data->ProgramResourceList =
-      ralloc_array(prog, gl_program_resource,
+      ralloc_array(prog->data, gl_program_resource,
                   prog->data->NumProgramResourceList);

   for (unsigned i = 0; i < prog->data->NumProgramResourceList; i++) {
@@ -1148,18 +1216,20 @@ write_shader_metadata(struct blob *metadata, gl_linked_shader *shader)
   blob_write_bytes(metadata, glprog->sh.ImageUnits,
                    sizeof(glprog->sh.ImageUnits));

+   size_t ptr_size = sizeof(GLvoid *);
+
   blob_write_uint32(metadata, glprog->sh.NumBindlessSamplers);
   blob_write_uint32(metadata, glprog->sh.HasBoundBindlessSampler);
   for (i = 0; i < glprog->sh.NumBindlessSamplers; i++) {
      blob_write_bytes(metadata, &glprog->sh.BindlessSamplers[i],
-                       sizeof(struct gl_bindless_sampler));
+                       sizeof(struct gl_bindless_sampler) - ptr_size);
   }

   blob_write_uint32(metadata, glprog->sh.NumBindlessImages);
   blob_write_uint32(metadata, glprog->sh.HasBoundBindlessImage);
   for (i = 0; i < glprog->sh.NumBindlessImages; i++) {
      blob_write_bytes(metadata, &glprog->sh.BindlessImages[i],
-                       sizeof(struct gl_bindless_image));
+                       sizeof(struct gl_bindless_image) - ptr_size);
   }

   write_shader_parameters(metadata, glprog->Parameters);
@@ -1187,6 +1257,8 @@ read_shader_metadata(struct blob_reader *metadata,
   blob_copy_bytes(metadata, (uint8_t *) glprog->sh.ImageUnits,
                   sizeof(glprog->sh.ImageUnits));

+   size_t ptr_size = sizeof(GLvoid *);
+
   glprog->sh.NumBindlessSamplers = blob_read_uint32(metadata);
   glprog->sh.HasBoundBindlessSampler = blob_read_uint32(metadata);
   if (glprog->sh.NumBindlessSamplers > 0) {
@@ -1196,7 +1268,7 @@ read_shader_metadata(struct blob_reader *metadata,

      for (i = 0; i < glprog->sh.NumBindlessSamplers; i++) {
         blob_copy_bytes(metadata, (uint8_t *) &glprog->sh.BindlessSamplers[i],
-                         sizeof(struct gl_bindless_sampler));
+                         sizeof(struct gl_bindless_sampler) - ptr_size);
      }
   }

@@ -1209,7 +1281,7 @@ read_shader_metadata(struct blob_reader *metadata,

      for (i = 0; i < glprog->sh.NumBindlessImages; i++) {
         blob_copy_bytes(metadata, (uint8_t *) &glprog->sh.BindlessImages[i],
-                        sizeof(struct gl_bindless_image));
+                        sizeof(struct gl_bindless_image) - ptr_size);
      }
   }

@@ -1224,6 +1296,14 @@ create_binding_str(const char *key, unsigned value, void *closure)
   ralloc_asprintf_append(bindings_str, "%s:%u,", key, value);
 }

+static void
+get_shader_info_and_pointer_sizes(size_t *s_info_size, size_t *s_info_ptrs,
+                                  shader_info *info)
+{
+   *s_info_size = sizeof(shader_info);
+   *s_info_ptrs = sizeof(info->name) + sizeof(info->label);
+}
+
 static void
 create_linked_shader_and_program(struct gl_context *ctx,
                                 gl_shader_stage stage,
@@ -1242,12 +1322,16 @@ create_linked_shader_and_program(struct gl_context *ctx,

   read_shader_metadata(metadata, glprog, linked);

+   glprog->info.name = ralloc_strdup(glprog, blob_read_string(metadata));
+   glprog->info.label = ralloc_strdup(glprog, blob_read_string(metadata));
+
+   size_t s_info_size, s_info_ptrs;
+   get_shader_info_and_pointer_sizes(&s_info_size, &s_info_ptrs,
+                                     &glprog->info);
+
   /* Restore shader info */
-   blob_copy_bytes(metadata, (uint8_t *) &glprog->info, sizeof(shader_info));
-   if (glprog->info.name)
-      glprog->info.name = ralloc_strdup(glprog, blob_read_string(metadata));
-   if (glprog->info.label)
-      glprog->info.label = ralloc_strdup(glprog, blob_read_string(metadata));
+   blob_copy_bytes(metadata, ((uint8_t *) &glprog->info) + s_info_ptrs,
+                   s_info_size - s_info_ptrs);

   _mesa_reference_shader_program_data(ctx, &glprog->sh.data, prog->data);
   _mesa_reference_program(ctx, &linked->Program, glprog);
@@ -1286,14 +1370,24 @@ shader_cache_write_program_metadata(struct gl_context *ctx,
      if (sh) {
         write_shader_metadata(metadata, sh);

-         /* Store nir shader info */
-         blob_write_bytes(metadata, &sh->Program->info, sizeof(shader_info));
-
         if (sh->Program->info.name)
            blob_write_string(metadata, sh->Program->info.name);
+         else
+            blob_write_string(metadata, "");

         if (sh->Program->info.label)
            blob_write_string(metadata, sh->Program->info.label);
+         else
+            blob_write_string(metadata, "");
+
+         size_t s_info_size, s_info_ptrs;
+         get_shader_info_and_pointer_sizes(&s_info_size, &s_info_ptrs,
+                                           &sh->Program->info);
+
+         /* Store shader info */
+         blob_write_bytes(metadata,
+                          ((char *) &sh->Program->info) + s_info_ptrs,
+                          s_info_size - s_info_ptrs);
      }
   }

@@ -1339,7 +1433,7 @@ shader_cache_read_program_metadata(struct gl_context *ctx,
      return false;

   struct disk_cache *cache = ctx->Cache;
-   if (!cache || prog->data->cache_fallback || prog->data->skip_cache)
+   if (!cache || prog->data->skip_cache)
      return false;

   /* Include bindings when creating sha1. These bindings change the resulting
--- a/src/compiler/glsl/standalone.cpp
+++ b/src/compiler/glsl/standalone.cpp
@@ -36,7 +36,7 @@
 #include "loop_analysis.h"
 #include "standalone_scaffolding.h"
 #include "standalone.h"
-#include "util/string_to_uint_map.h"
+#include "string_to_uint_map.h"
 #include "util/set.h"
 #include "linker.h"
 #include "glsl_parser_extras.h"
--- a/src/compiler/glsl/string_to_uint_map.cpp
+++ b/src/compiler/glsl/string_to_uint_map.cpp
--- a/src/compiler/glsl/string_to_uint_map.h
+++ b/src/compiler/glsl/string_to_uint_map.h
--- a/src/compiler/glsl/tests/set_uniform_initializer_tests.cpp
+++ b/src/compiler/glsl/tests/set_uniform_initializer_tests.cpp
@@ -25,7 +25,7 @@
 #include "main/mtypes.h"
 #include "main/macros.h"
 #include "util/ralloc.h"
-#include "util/string_to_uint_map.h"
+#include "string_to_uint_map.h"
 #include "uniform_initializer_utils.h"

 namespace linker {
--- a/src/compiler/nir/nir.c
+++ b/src/compiler/nir/nir.c
@@ -1975,10 +1975,10 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
      return SYSTEM_VALUE_HELPER_INVOCATION;
   case nir_intrinsic_load_view_index:
      return SYSTEM_VALUE_VIEW_INDEX;
-   case SYSTEM_VALUE_SUBGROUP_SIZE:
-      return nir_intrinsic_load_subgroup_size;
-   case SYSTEM_VALUE_SUBGROUP_INVOCATION:
-      return nir_intrinsic_load_subgroup_invocation;
+   case nir_intrinsic_load_subgroup_size:
+      return SYSTEM_VALUE_SUBGROUP_SIZE;
+   case nir_intrinsic_load_subgroup_invocation:
+      return SYSTEM_VALUE_SUBGROUP_INVOCATION;
   case nir_intrinsic_load_subgroup_eq_mask:
      return SYSTEM_VALUE_SUBGROUP_EQ_MASK;
   case nir_intrinsic_load_subgroup_ge_mask:
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -41,9 +41,9 @@
 #include "compiler/shader_info.h"
 #include <stdio.h>

-#ifdef DEBUG
+#ifndef NDEBUG
 #include "util/debug.h"
-#endif /* DEBUG */
+#endif /* NDEBUG */

 #include "nir_opcodes.h"

@@ -1204,7 +1204,6 @@ typedef struct {
    *    - nir_texop_txf_ms
    *    - nir_texop_txs
    *    - nir_texop_lod
-    *    - nir_texop_tg4
    *    - nir_texop_query_levels
    *    - nir_texop_texture_samples
    *    - nir_texop_samples_identical
@@ -2299,7 +2298,7 @@ nir_variable *nir_variable_clone(const nir_variable *c, nir_shader *shader);
 nir_deref *nir_deref_clone(const nir_deref *deref, void *mem_ctx);
 nir_deref_var *nir_deref_var_clone(const nir_deref_var *deref, void *mem_ctx);

-#ifdef DEBUG
+#ifndef NDEBUG
 void nir_validate_shader(nir_shader *shader);
 void nir_metadata_set_validation_flag(nir_shader *shader);
 void nir_metadata_check_validation_flag(nir_shader *shader);
@@ -2329,7 +2328,7 @@ static inline void nir_metadata_set_validation_flag(nir_shader *shader) { (void)
 static inline void nir_metadata_check_validation_flag(nir_shader *shader) { (void) shader; }
 static inline bool should_clone_nir(void) { return false; }
 static inline bool should_print_nir(void) { return false; }
-#endif /* DEBUG */
+#endif /* NDEBUG */

 #define _PASS(nir, do_pass) do {                                     \
   do_pass                                                           \
--- a/src/compiler/nir/nir_intrinsics.h
+++ b/src/compiler/nir/nir_intrinsics.h
@@ -121,9 +121,9 @@ BARRIER(memory_barrier_shared)
 INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, xx, xx, xx, 0)

 /** ARB_shader_group_vote intrinsics */
-INTRINSIC(vote_any, 1, ARR(1), true, 1, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
-INTRINSIC(vote_all, 1, ARR(1), true, 1, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
-INTRINSIC(vote_eq,  1, ARR(1), true, 1, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
+INTRINSIC(vote_any, 1, ARR(1), true, 1, 0, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
+INTRINSIC(vote_all, 1, ARR(1), true, 1, 0, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
+INTRINSIC(vote_eq,  1, ARR(1), true, 1, 0, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)

 /**
 * Basic Geometry Shader intrinsics.
@@ -433,7 +433,7 @@ INTRINSIC(load_interpolated_input, 2, ARR(2, 1), true, 0, 0,
 /* src[] = { buffer_index, offset }. No const_index */
 LOAD(ssbo, 2, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
 /* src[] = { offset }. const_index[] = { base, component } */
-LOAD(output, 1, 1, BASE, COMPONENT, xx, NIR_INTRINSIC_CAN_ELIMINATE)
+LOAD(output, 1, 2, BASE, COMPONENT, xx, NIR_INTRINSIC_CAN_ELIMINATE)
 /* src[] = { vertex, offset }. const_index[] = { base, component } */
 LOAD(per_vertex_output, 2, 1, BASE, COMPONENT, xx, NIR_INTRINSIC_CAN_ELIMINATE)
 /* src[] = { offset }. const_index[] = { base } */
--- a/src/compiler/nir/nir_metadata.c
+++ b/src/compiler/nir/nir_metadata.c
@@ -59,7 +59,7 @@ nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved)
   impl->valid_metadata &= preserved;
 }

-#ifdef DEBUG
+#ifndef NDEBUG
 /**
 * Make sure passes properly invalidate metadata (part 1).
 *
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -308,7 +308,7 @@ for (unsigned bit = 0; bit < 32; bit++) {

 unop_convert("ufind_msb", tint32, tuint32, """
 dst = -1;
-for (int bit = 31; bit > 0; bit--) {
+for (int bit = 31; bit >= 0; bit--) {
   if ((src0 >> bit) & 1) {
      dst = bit;
      break;
@@ -717,12 +717,12 @@ opcode("bitfield_insert", 0, tuint32, [0, 0, 0, 0],
 unsigned base = src0, insert = src1;
 int offset = src2, bits = src3;
 if (bits == 0) {
-   dst = 0;
+   dst = base;
 } else if (offset < 0 || bits < 0 || bits + offset > 32) {
   dst = 0;
 } else {
   unsigned mask = ((1ull << bits) - 1) << offset;
-   dst = (base & ~mask) | ((insert << bits) & mask);
+   dst = (base & ~mask) | ((insert << offset) & mask);
 }
 """)

--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -250,8 +250,8 @@ optimizations = [
   (('ishr', a, 0), a),
   (('ushr', 0, a), 0),
   (('ushr', a, 0), a),
-   (('iand', 0xff, ('ushr', a, 24)), ('ushr', a, 24)),
-   (('iand', 0xffff, ('ushr', a, 16)), ('ushr', a, 16)),
+   (('iand', 0xff, ('ushr@32', a, 24)), ('ushr', a, 24)),
+   (('iand', 0xffff, ('ushr@32', a, 16)), ('ushr', a, 16)),
   # Exponential/logarithmic identities
   (('~fexp2', ('flog2', a)), a), # 2^lg2(a) = a
   (('~flog2', ('fexp2', a)), a), # lg2(2^a) = a
--- a/src/compiler/nir/nir_opt_intrinsics.c
+++ b/src/compiler/nir/nir_opt_intrinsics.c
@@ -28,6 +28,26 @@
 * \file nir_opt_intrinsics.c
 */

+static nir_ssa_def *
+high_subgroup_mask(nir_builder *b,
+                   nir_ssa_def *count,
+                   uint64_t base_mask)
+{
+   /* group_mask could probably be calculated more efficiently but we want to
+    * be sure not to shift by 64 if the subgroup size is 64 because the GLSL
+    * shift operator is undefined in that case. In any case if we were worried
+    * about efficency this should probably be done further down because the
+    * subgroup size is likely to be known at compile time.
+    */
+   nir_ssa_def *subgroup_size = nir_load_subgroup_size(b);
+   nir_ssa_def *all_bits = nir_imm_int64(b, ~0ull);
+   nir_ssa_def *shift = nir_isub(b, nir_imm_int(b, 64), subgroup_size);
+   nir_ssa_def *group_mask = nir_ushr(b, all_bits, shift);
+   nir_ssa_def *higher_bits = nir_ishl(b, nir_imm_int64(b, base_mask), count);
+
+   return nir_iand(b, higher_bits, group_mask);
+}
+
 static bool
 opt_intrinsics_impl(nir_function_impl *impl)
 {
@@ -95,10 +115,10 @@ opt_intrinsics_impl(nir_function_impl *impl)
               replacement = nir_ishl(&b, nir_imm_int64(&b, 1ull), count);
               break;
            case nir_intrinsic_load_subgroup_ge_mask:
-               replacement = nir_ishl(&b, nir_imm_int64(&b, ~0ull), count);
+               replacement = high_subgroup_mask(&b, count, ~0ull);
               break;
            case nir_intrinsic_load_subgroup_gt_mask:
-               replacement = nir_ishl(&b, nir_imm_int64(&b, ~1ull), count);
+               replacement = high_subgroup_mask(&b, count, ~1ull);
               break;
            case nir_intrinsic_load_subgroup_le_mask:
               replacement = nir_inot(&b, nir_ishl(&b, nir_imm_int64(&b, ~1ull), count));
--- a/src/compiler/nir/nir_validate.c
+++ b/src/compiler/nir/nir_validate.c
@@ -35,7 +35,7 @@
 /* Since this file is just a pile of asserts, don't bother compiling it if
 * we're not building a debug build.
 */
-#ifdef DEBUG
+#ifndef NDEBUG

 /*
 * Per-register validation state.
--- a/src/compiler/shader_info.h
+++ b/src/compiler/shader_info.h
@@ -32,14 +32,14 @@ extern "C" {
 #endif

 typedef struct shader_info {
-   /** The shader stage, such as MESA_SHADER_VERTEX. */
-   gl_shader_stage stage;
-
   const char *name;

   /* Descriptive name provided by the client; may be NULL */
   const char *label;

+   /** The shader stage, such as MESA_SHADER_VERTEX. */
+   gl_shader_stage stage;
+
   /* Number of textures used by this shader */
   unsigned num_textures;
   /* Number of uniform buffers used by this shader */
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -721,7 +721,7 @@ translate_image_format(SpvImageFormat format)
   case SpvImageFormatRg32ui:       return 0x823C; /* GL_RG32UI */
   case SpvImageFormatRg16ui:       return 0x823A; /* GL_RG16UI */
   case SpvImageFormatRg8ui:        return 0x8238; /* GL_RG8UI */
-   case SpvImageFormatR16ui:        return 0x823A; /* GL_RG16UI */
+   case SpvImageFormatR16ui:        return 0x8234; /* GL_R16UI */
   case SpvImageFormatR8ui:         return 0x8232; /* GL_R8UI */
   default:
      assert(!"Invalid image format");
@@ -1490,6 +1490,8 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
      struct vtn_value *val =
         vtn_push_value(b, w[2], vtn_value_type_sampled_image);
      val->sampled_image = ralloc(b, struct vtn_sampled_image);
+      val->sampled_image->type =
+         vtn_value(b, w[1], vtn_value_type_type)->type;
      val->sampled_image->image =
         vtn_value(b, w[3], vtn_value_type_pointer)->pointer;
      val->sampled_image->sampler =
@@ -1516,16 +1518,12 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
      sampled = *sampled_val->sampled_image;
   } else {
      assert(sampled_val->value_type == vtn_value_type_pointer);
+      sampled.type = sampled_val->pointer->type;
      sampled.image = NULL;
      sampled.sampler = sampled_val->pointer;
   }

-   const struct glsl_type *image_type;
-   if (sampled.image) {
-      image_type = sampled.image->var->var->interface_type;
-   } else {
-      image_type = sampled.sampler->var->var->interface_type;
-   }
+   const struct glsl_type *image_type = sampled.type->type;
   const enum glsl_sampler_dim sampler_dim = glsl_get_sampler_dim(image_type);
   const bool is_array = glsl_sampler_type_is_array(image_type);
   const bool is_shadow = glsl_sampler_type_is_shadow(image_type);
@@ -1757,6 +1755,7 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
   case nir_texop_txb:
   case nir_texop_txl:
   case nir_texop_txd:
+   case nir_texop_tg4:
      /* These operations require a sampler */
      instr->sampler = nir_deref_var_clone(sampler, instr);
      break;
@@ -1764,7 +1763,6 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
   case nir_texop_txf_ms:
   case nir_texop_txs:
   case nir_texop_lod:
-   case nir_texop_tg4:
   case nir_texop_query_levels:
   case nir_texop_texture_samples:
   case nir_texop_samples_identical:
@@ -2034,6 +2032,7 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode,
   case SpvOpAtomicIDecrement:
   case SpvOpAtomicExchange:
   case SpvOpAtomicIAdd:
+   case SpvOpAtomicISub:
   case SpvOpAtomicSMin:
   case SpvOpAtomicUMin:
   case SpvOpAtomicSMax:
@@ -2801,7 +2800,8 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode,

   case SpvOpMemoryModel:
      assert(w[1] == SpvAddressingModelLogical);
-      assert(w[2] == SpvMemoryModelGLSL450);
+      assert(w[2] == SpvMemoryModelSimple ||
+             w[2] == SpvMemoryModelGLSL450);
      break;

   case SpvOpEntryPoint: {
--- a/Show More
+++ b/Show More