docs: add sha256 checksums for 17.3.9

Signed-off-by: Juan A. Suarez Romero <jasuarez@igalia.com>
docs: add release notes for 17.3.9
2018-04-18 09:39:48 +00:00 · 2018-04-18 08:40:26 +00:00 · 2018-04-18 08:32:35 +00:00 · 2018-04-18 09:59:46 +02:00 · 2018-04-18 09:59:46 +02:00 · 2018-04-12 00:42:54 +02:00
320 changed files with 7413 additions and 2759 deletions
--- a/.travis.yml
+++ b/.travis.yml
@@ -364,38 +364,6 @@ matrix:
            - libexpat1-dev
            - libx11-xcb-dev
            - libelf-dev
-    - env:
-        - LABEL="meson Vulkan"
-        - BUILD=meson
-        - MESON_OPTIONS="-Ddri-drivers= -Dgallium-drivers="
-      addons:
-        apt:
-          sources:
-            - llvm-toolchain-trusty-3.9
-          packages:
-            # LLVM packaging is broken and misses these dependencies
-            - libedit-dev
-            # From sources above
-            - llvm-3.9-dev
-            # Common
-            - xz-utils
-            - libexpat1-dev
-            - libelf-dev
-            - python3-pip
-    - env:
-        - LABEL="meson loaders/classic DRI"
-        - BUILD=meson
-        - MESON_OPTIONS="-Dvulkan-drivers= -Dgallium-drivers="
-      addons:
-        apt:
-          packages:
-            - xz-utils
-            - x11proto-xf86vidmode-dev
-            - libexpat1-dev
-            - libx11-xcb-dev
-            - libxdamage-dev
-            - libxfixes-dev
-            - python3-pip

 install:
  - pip install --user mako
--- a/Android.common.mk
+++ b/Android.common.mk
@@ -65,6 +65,7 @@ LOCAL_CFLAGS += \
 	-DHAVE_PTHREAD=1 \
 	-DHAVE_DLADDR \
 	-DHAVE_DL_ITERATE_PHDR \
+	-DHAVE_ENDIAN_H \
 	-DMAJOR_IN_SYSMACROS \
 	-fvisibility=hidden \
 	-Wno-sign-compare
--- a/2
+++ b/2
@@ -1 +1 @@
-17.3.0-devel
+17.3.9
--- a/bin/.cherry-ignore
+++ b/bin/.cherry-ignore
@@ -0,0 +1,185 @@
+# fixes:  The commit addresses Meson which is explicitly disabled for 17.3
+ab0809e5529725bd0af6f7b6ce06415020b9d32e meson: fix strtof locale support check
+
+# fixes:  The commit addresses Meson which is explicitly disabled for 17.3
+44fbbd6fd07e5784b05e08e762e54b6c71f95ab1 util: add mesa-sha1 test to meson
+
+# stable: The commit addresses earlier commit 6132992cdb which did not land in
+#         branch
+3d2b157e23c9d66df97d59be6efd1098878cc110 i965/fs: Use UW types when using V immediates
+
+# extra: The commit just references a fix for an additional change in its v2.
+c1ff99fd70cd2ceb2cac4723e4fd5efc93834746 main: Clear shader program data whenever ProgramBinary is called
+
+# fixes: The commit addresses earlier commits 40a01c9a0ef and 8d745abc009 which
+#        did not land in branch
+9b0223046668593deb9c0be0b557994bb5218788 egl: pass the dri2_dpy to the $plat_teardown functions
+
+# fixes: The commit addresses earlier commit d50937f137 which did not land in
+#        branch
+78a8b73e7d45f55ced98a148b26247d91f4e0171 vulkan/wsi: free cmd pools
+
+# stable: The commit addresses earlier commit 6d87500fe12 which did not land in
+#         branch
+525b4f7548462bfc2e82f2d1f04f61ce6854a3c5 i965: Accept CONTEXT_ATTRIB_PRIORITY for brwCreateContext
+
+# stable: The commit depends on earlier commit a4be2bcee2 which did not land in
+#         branch
+a29d63ecf71546c4798c609e37810f0ec81793d8 swr: refactor swr_create_screen to allow for proper cleanup on error
+
+# stable: Explicit 18.0 only nominations
+4b69ba381766cd911eb1284f1b0332a139ec8a75 anv/pipeline: Don't assert on more than 32 samplers
+bc0a21e34811e0e1542236dbaf5fb1fa56bbb98c anv/cmd_state: Drop the scratch_size field
+d6c9a89d1324ed2c723cbd3c6d8390691c58dfd2 anv/cmd_buffer: Get rid of the meta query workaround
+cd3feea74582cea2d18306d167609f4fbe681bb3 anv/cmd_buffer: Rework anv_cmd_state_reset
+ddc2d285484a1607f79ffeb2fc6c09367c6aea1f anv/cmd_buffer: Use some pre-existing pipeline temporaries
+9af5379228d7be9c7ea41e0912a8770d28ead92b anv/cmd_buffer: Add substructs to anv_cmd_state for graphics and compute
+d5592e2fdaa9ce8b98d38b2d29e2a7d2c4abda08 anv: Remove semicolons from vk_error[f] definitions
+90cceaa9dd3b12e039a131a50c6866dce04e7fb2 anv/cmd_buffer: Refactor ensure_push_descriptor_set
+b9e1ca16f84016f1d40efa9bfee89db48a7702b4 anv/cmd_buffer: Add a helper for binding descriptor sets
+31b2144c836485ef6476bd455f1c02b96deafab7 anv/cmd_buffer: Use anv_descriptor_for_binding for samplers
+97f96610c8b858267c121c0ad6ffc630e2aafc09 anv: Separate compute and graphics descriptor sets
+e85aaec1489b00f24ebef4ae5b1da598091275e1 anv/cmd_buffer: Move dirty bits into anv_cmd_*_state
+8bd5ec5b862333c936426ff18d093d07dd006182 anv/cmd_buffer: Move vb_dirty bits into anv_cmd_graphics_state
+24caee8975355a2b54b41c484ff3c897e1911760 anv/cmd_buffer: Use a temporary variable for dynamic state
+95ff2322948692f5f7b1d444aabe878fba53304c anv/cmd_buffer: Move dynamic state to graphics state
+38ec78049f69821091a2d42b0f457a1b044d4273 anv/cmd_buffer: Move num_workgroups to compute state
+4064fe59e7144fa822568543cfcc043387645d4e anv/cmd_buffer: Move gen7 index buffer state to graphics state
+
+# fixes: The commit requires earlier commit 49d035122ee which did not land in
+#        branch
+766589d89a211e67f313e8cb38f2d05b09975f96 radv: fix sample_mask_in loading. (v3.1)
+
+# stable: The commits address the Meson build that is explicitly disabled in
+#         branch
+c38c60a63c63b02d1030c6c349aa0a73105e10eb meson: fix BSD build
+5781c3d1db4a01e77f416c1685025c4d830ae87d meson: correctly set SYSCONFDIR for loading dirrc
+7c8cfe2d59bfc0dbf718a74b08b6dceaa84f7242 meson: fix missing dependencies
+53f9131205a63fa8b282ab2a7e96c48209447da0 meson: fix getting cflags from pkg-config
+8fae5eddd9982f4586d76471d0196befeb46de24 meson: handle LLVM 'x.x.xgit-revision' versionsi
+
+# stable: The commit requires earlier commit 01ab218bbc which did not land in
+#        branch
+0e879aad2fd1dac102c13d680edf455aa068d5df swr/rast: support llvm 3.9 type declarations
+
+# stable: The commit requires earlier commit w41c36c45 which did not land in
+#        branch
+49b0a140a731069e0e4959c65bfd1b597a4fb141 ac/nir: set amdgpu.uniform and invariant.load for UBOs
+
+# stable: The commits address gen10 support which is missing in branch
+ca19ee33d7d39cb89d948b1c983763065975ce5b i965/gen10: Ignore push constant packets during context restore.
+78c125af3904c539ea69bec2dd9fdf7a5162854f anv/gen10: Ignore push constant packets during context restore.
+bcfd78e4489f538e34138269650fc6cbe8c9d75f i965/gen10: Re-enable push constants.
+
+# stable: The commits are explicit 18.0 nominations
+17423c993d0b083c7a77a404b85788687f5efe36 winsys/amdgpu: fix assertion failure with UVD and VCE rings
+e0e23ea69cab23b9193b1e7c568fd23fc7073071 r600/eg: construct proper rat mask for image/buffers.
+
+# stable: The commits address the initial shader cache support which did not land in branch
+28db950b51274ce296cd625db62abe935d1e4ed9 i965: fix prog_data leak in brw_disk_cache
+b99c88037bf64b033579f237ec287857c53b0ad6 i965: fix disk_cache leak when destroying context
+
+# stable: The commit covers nir serialise, which did not land in branch
+d0343bef6680cc660ba691bbed31a2a1b7449f79 nir: mark unused space in packed_tex_data
+
+# stable: The KHX extension is disabled all together in the stable branches.
+bee9270853c34aa8e4b3d19a125608ee67c87b86 radv: Don't expose VK_KHX_multiview on android.
+
+# fixes: The commit addresses the meson build, which is disabled in branch
+4a0bab1d7f942ad0ac9b98ab34e6a9e4694f3c04 meson: libdrm shouldn't appear in Requires.private: if it wasn't found
+16bf8138308008f4b889caa827a8291ff72745b8 meson/swr: re-shuffle generated files
+bbef9474fa52d9aba06eeede52558fc5ccb762dd meson/swr: Updated copyright dates
+d7235ef83b92175537e3b538634ffcff29bf0dce meson: Don't confuse the install and search paths for dri drivers
+c75a4e5b465261e982ea31ef875325a3cc30e79d meson: Check for actual LLVM required versions
+105178db8f5d7d45b268c7664388d7db90350704 meson: fix test source name for static glapi
+c74719cf4adae2fa142e154bff56716427d3d992 glapi: fix check_table test for non-shared glapi with meson
+
+# stable: Explicit 18.0 only nominations
+2ffe395cba0f7b3c1f1c41062f4376eae3a188b5 radv: Don't expose VK_KHX_multiview on android.
+4195eed961ccfe404ae81b9112189fc93a254ded glsl/linker: check same name is not used in block and outside
+a5053ba27ed76f666e315de7150433c5aaaaf2c3 anv/device: initialize the list of enabled extensions properly
+bd6c0cab606fa0a3b821e50542ba06ff714292bf i965: perf: use drmIoctl() instead of ioctl()
+bf1577fe0972ae910c071743dc89d261a46c2926 i965/gen10: Remove warning message.
+fcae3d1a9acc080bf31cf7b5c4d0b18e67319b09 anv/gen10: Remove warning message.
+eb2e17e2d15bf58b60460437330d719131fb859e docs: Add Cannonlake support to 18.0 release notes.
+9a508b719be32ef10ca929250b7aafba313104c6 android: anv/extensions: fix generated sources build
+d448954228e69fd1b4000ea13e28c2ba2832db13 android: anv: add dependency on libnativewindow for O and later
+6451b0703ff3027b746d6268b98dd2b3e6698be5 android: vulkan/util: add dependency on libnativewindow for O and later
+c956d0f4069cf39d8d3c57ebed8d905575e9ea34 radv: make sure to emit cache flushes before starting a query
+c133a3411bbf47c2ba7d9cdae7e35a64fe276068 radv: do not set pending_reset_query in BeginCommandBuffer()
+55376cb31e2f495a4d872b4ffce2135c3365b873 st/mesa: expose 0 shader binary formats for compat profiles for Qt
+
+# stable: The commits address gen10 support which is missing in branch
+56dc9f9f49638e0769d6bc696ff7f5dafccec9fc intel/compiler: Memory fence commit must always be enabled for gen10+
+
+# stable: The commit requires earlier commits 4e7f6437b535 and a6b379284365
+#         which did not land in branch
+ab5cee4c241cb360cf67101dd751e0f38637b526 r600/compute: only mark buffer/image state dirty for fragment shaders
+
+# stable: The commits have a specific version for the 17.3 branch
+4796025ba518baa0e8893337591a3f452a375d94 intel/isl: Add an isl_color_value_is_zero helper
+85d0bec9616bc1ffa8e4ab5e7c5d12ff4e414872 anv: Be more careful about fast-clear colors
+
+# stable: The commit fixes earlier commit cd3feea74582 which did not land in
+#         branch
+4c77e21c814145e845bac64cce40eadfd7ac0bd9 anv: Move setting current_pipeline to cmd_state_init
+
+# stable: The commit is causing several regressions in Vulkan CTS tests in
+#         different platforms (hsw, bdw, bsw, ...)
+85d0bec9616bc1ffa8e4ab5e7c5d12ff4e414872 anv: Be more careful about fast-clear colors
+
+# stable: The commit requires earlier commit a03d456f5a41 which did not land in
+#         branch
+c7cadcbda47537d474eea52b9e77e57ef9287f9b r600: Take ALU_EXTENDED into account when evaluating jump offsets
+
+# fixes: The commit requires earlier commits 77097d96a0 and a5a654b19a which
+#        did not land in branch
+c7dcee58b5fe183e1653c13bff6a212f0d157b29 i965: Avoid problems from referencing orphaned BOs after growing.
+
+# fixes: The commit addresses the meson build, which is disabled in branch
+5317211fa029ee8d0e1c802ef8c01f64c470e3d5 meson: use a custom target instead of a generator for i965 oa
+d672084ba29a64f5ec8c9cd23d4b77c0efa05693 meson: define empty variables for libswdri and libswkmsdri
+8eb608df61912cfd0633fe982b140e22e7563770 meson: add libswdri and libswkmsdri to dri link_with
+7023b373ec76a2ea25b1bd0a7501276de9007047 meson: link dri3 xcb libs into vlwinsys instead of into each target
+5c460337fd9c1096dea4bc569bd876a112ed6f16 meson: Fix GL and EGL pkg-config files with glvnd
+e23192022a2cde122a6ccc70e5495fda009bee12 meson: install vulkan_intel.h header
+
+# fixes: The commit fixes earlier commit 1c57a6da5e which did not land in
+#        branch
+3401b028df1074a06a7fbc3fb1cda949646ef75d ac/shader: fix vertex input with components.
+
+# extra: The commit requires earlier commit a63c74be851 which did not land in
+#        branch
+fa8a764b62588420ac789df79ec0ab858b38639f i965: Use absolute addressing for constant buffer 0 on Kernel 4.16+.
+
+# extra: The commit requires earlier commit a44744e01d which did not land in
+#        branch
+adca1e4a92a53a403b7620c3356dcf038f0bcecc anv/image: Separate modifiers from legacy scanout
+
+# stable: The commit requires earlier commits fe81e1f9751 and 92c1290dc57 which
+#         did not land in branch
+fb5825e7ceeb16ac05f870ffe1e5a5daa09e68dd glsl: Fix memory leak with known glsl_type instances
+
+# fixes: The commits require earlier commits 2deb82207572 and b2653007b980
+#        which did not land in branch
+4f0c89d66c570e82d832e2e49227517302e271a2 ac/nir: pass the nir variable through tcs loading.
+27a5e5366e89498d98d786cc84fafbdb220c4d94 radv: mark all tess output for an indirect access.
+
+# fixes: The commit requires earlier commits b358e0e67fac and b2653007b980
+#        which did not land in branch
+8f052a3e257a61240cb311032497d016278117a8 radv: handle exporting view index to fragment shader. (v1.1)
+
+# fixes: The commit fixes earier commits 83d4a5d5aea5a8a05be2,
+#        b2f2236dc565dd1460f0 and c62cf1f165919bc74296 which did not land in
+#        branch
+880c1718b6d14b33fe5ba918af70fea5be890c6b omx: always define ENABLE_ST_OMX_{BELLAGIO,TIZONIA}
+
+# stable: Explicit 18.0 only nominations
+d77844a5290948a490ce6921c1623d1dd7af6c31 docs: fix 18.0 release note version
+
+# stable: Explicit 18.0 only nominations
+1866f76f7bc3ec54b4e91eb7d329b2e6f7b6277c freedreno/a5xx: fix page faults on last level
+2f175bfe5d8ca59a8a68b6d6d072cd7bf2f8baa9 freedreno/a5xx: don't align height for PIPE_BUFFER
+
+# fixes: A specific backport of this commit was applied for this branch.
+4503ff760c794c3bb15b978a47c530037d56498e ac/nir: Add workaround for GFX9 buffer views.
--- a/configure.ac
+++ b/configure.ac
@@ -74,7 +74,7 @@ AC_SUBST([OPENCL_VERSION])
 # in the first entry.
 LIBDRM_REQUIRED=2.4.75
 LIBDRM_RADEON_REQUIRED=2.4.71
-LIBDRM_AMDGPU_REQUIRED=2.4.85
+LIBDRM_AMDGPU_REQUIRED=2.4.89
 LIBDRM_INTEL_REQUIRED=2.4.75
 LIBDRM_NVVIEUX_REQUIRED=2.4.66
 LIBDRM_NOUVEAU_REQUIRED=2.4.66
@@ -791,8 +791,10 @@ fi
 AC_HEADER_MAJOR
 AC_CHECK_HEADER([xlocale.h], [DEFINES="$DEFINES -DHAVE_XLOCALE_H"])
 AC_CHECK_HEADER([sys/sysctl.h], [DEFINES="$DEFINES -DHAVE_SYS_SYSCTL_H"])
+AC_CHECK_HEADERS([endian.h])
 AC_CHECK_FUNC([strtof], [DEFINES="$DEFINES -DHAVE_STRTOF"])
 AC_CHECK_FUNC([mkostemp], [DEFINES="$DEFINES -DHAVE_MKOSTEMP"])
+AC_CHECK_FUNC([memfd_create], [DEFINES="$DEFINES -DHAVE_MEMFD_CREATE"])

 AC_MSG_CHECKING([whether strtod has locale support])
 AC_LINK_IFELSE([AC_LANG_SOURCE([[
@@ -865,10 +867,10 @@ dnl In practise that should be sufficient for all platforms, since any
 dnl platforms build with GCC and Clang support the flag.
 PTHREAD_LIBS="$PTHREAD_LIBS -pthread"

-dnl pthread-stubs is mandatory on BSD platforms, due to the nature of the
+dnl pthread-stubs is mandatory on some BSD platforms, due to the nature of the
 dnl project. Even then there's a notable issue as described in the project README
 case "$host_os" in
-linux* | cygwin* | darwin* | solaris* | *-gnu* | gnu*)
+linux* | cygwin* | darwin* | solaris* | *-gnu* | gnu* | openbsd*)
    pthread_stubs_possible="no"
    ;;
 * )
@@ -1206,10 +1208,10 @@ AC_ARG_ENABLE([xa],
    [enable_xa=no])
 AC_ARG_ENABLE([gbm],
   [AS_HELP_STRING([--enable-gbm],
-         [enable gbm library @<:@default=yes except cygwin@:>@])],
+         [enable gbm library @<:@default=yes except cygwin and macOS@:>@])],
   [enable_gbm="$enableval"],
   [case "$host_os" in
-       cygwin*)
+       cygwin* | darwin*)
          enable_gbm=no
          ;;
       *)
@@ -1383,18 +1385,10 @@ AC_ARG_ENABLE([libglvnd],
 AM_CONDITIONAL(USE_LIBGLVND, test "x$enable_libglvnd" = xyes)

 if test "x$enable_libglvnd" = xyes ; then
-    dnl XXX: update once we can handle more than libGL/glx.
-    dnl Namely: we should error out if neither of the glvnd enabled libraries
-    dnl are built
    case "x$enable_glx" in
-    xno)
-        AC_MSG_ERROR([cannot build libglvnd without GLX])
-        ;;
    xxlib | xgallium-xlib )
        AC_MSG_ERROR([cannot build libgvnd when Xlib-GLX or Gallium-Xlib-GLX is enabled])
        ;;
-    xdri)
-        ;;
    esac

    PKG_CHECK_MODULES([GLVND], libglvnd >= 0.2.0)
@@ -1403,6 +1397,10 @@ if test "x$enable_libglvnd" = xyes ; then

    DEFINES="${DEFINES} -DUSE_LIBGLVND=1"
    DEFAULT_GL_LIB_NAME=GLX_mesa
+
+    if test "x$enable_glx" = xno -a "x$enable_egl" = xno; then
+        AC_MSG_ERROR([cannot build libglvnd without GLX or EGL])
+    fi
 fi

 AC_ARG_WITH([gl-lib-name],
@@ -1538,7 +1536,7 @@ fi
 AC_ARG_ENABLE([driglx-direct],
    [AS_HELP_STRING([--disable-driglx-direct],
        [disable direct rendering in GLX and EGL for DRI \
-            @<:@default=auto@:>@])],
+            @<:@default=enabled@:>@])],
    [driglx_direct="$enableval"],
    [driglx_direct="yes"])

@@ -2160,7 +2158,9 @@ if test "x$enable_xvmc" = xyes -o \
        "x$enable_vdpau" = xyes -o \
        "x$enable_omx_bellagio" = xyes -o \
        "x$enable_va" = xyes; then
-    PKG_CHECK_MODULES([VL], [x11-xcb xcb xcb-dri2 >= $XCBDRI2_REQUIRED])
+    if echo $platforms | grep -q "x11"; then
+        PKG_CHECK_MODULES([VL], [x11-xcb xcb xcb-dri2 >= $XCBDRI2_REQUIRED])
+    fi
    need_gallium_vl_winsys=yes
 fi
 AM_CONDITIONAL(NEED_GALLIUM_VL_WINSYS, test "x$need_gallium_vl_winsys" = xyes)
@@ -2495,6 +2495,14 @@ if test -n "$with_gallium_drivers"; then
            HAVE_GALLIUM_RADEONSI=yes
            PKG_CHECK_MODULES([RADEON], [libdrm >= $LIBDRM_RADEON_REQUIRED libdrm_radeon >= $LIBDRM_RADEON_REQUIRED])
            PKG_CHECK_MODULES([AMDGPU], [libdrm >= $LIBDRM_AMDGPU_REQUIRED libdrm_amdgpu >= $LIBDRM_AMDGPU_REQUIRED])
+
+            # Blacklist libdrm_amdgpu 2.4.90 because it causes a crash in older
+            # radeonsi with pretty much any app.
+            libdrm_version=`pkg-config libdrm_amdgpu --modversion`
+            if test "x$libdrm_version" = x2.4.90; then
+                AC_MSG_ERROR([radeonsi can't use libdrm 2.4.90 due to a compatibility issue. Use a newer or older version.])
+            fi
+
            require_libdrm "radeonsi"
            radeon_llvm_check $LLVM_REQUIRED_RADEONSI "radeonsi"
            if test "x$enable_egl" = xyes; then
@@ -2712,6 +2720,18 @@ if test "x$enable_llvm" = xyes; then
            fi
        fi
    fi
+
+    dnl The gallium-xlib GLX and gallium OSMesa targets directly embed the
+    dnl swr/llvmpipe driver into the final binary.  Adding LLVM_LIBS results in 
+    dnl the LLVM library propagated in the Libs.private of the respective .pc
+    dnl file which ensures complete dependency information when statically
+    dnl linking.
+    if test "x$enable_glx" == xgallium-xlib; then
+        GL_PC_LIB_PRIV="$GL_PC_LIB_PRIV $LLVM_LIBS"
+    fi
+    if test "x$enable_gallium_osmesa" = xyes; then
+        OSMESA_PC_LIB_PRIV="$OSMESA_PC_LIB_PRIV $LLVM_LIBS"
+    fi
 fi

 AM_CONDITIONAL(HAVE_GALLIUM_SVGA, test "x$HAVE_GALLIUM_SVGA" = xyes)
--- a/docs/envvars.html
+++ b/docs/envvars.html
@@ -88,22 +88,40 @@ This is a work-around for that.
 <li>MESA_GL_VERSION_OVERRIDE - changes the value returned by
 glGetString(GL_VERSION) and possibly the GL API type.
 <ul>
-<li> The format should be MAJOR.MINOR[FC]
-<li> FC is an optional suffix that indicates a forward compatible context.
-This is only valid for versions &gt;= 3.0.
-<li> GL versions &lt; 3.0 are set to a compatibility (non-Core) profile
-<li> GL versions = 3.0, see below
-<li> GL versions &gt; 3.0 are set to a Core profile
-<li> Examples: 2.1, 3.0, 3.0FC, 3.1, 3.1FC
-<ul>
-<li> 2.1 - select a compatibility (non-Core) profile with GL version 2.1
-<li> 3.0 - select a compatibility (non-Core) profile with GL version 3.0
-<li> 3.0FC - select a Core+Forward Compatible profile with GL version 3.0
-<li> 3.1 - select a Core profile with GL version 3.1
-<li> 3.1FC - select a Core+Forward Compatible profile with GL version 3.1
-</ul>
-<li> Mesa may not really implement all the features of the given version.
-(for developers only)
+  <li>The format should be MAJOR.MINOR[FC|COMPAT]
+  <li>FC is an optional suffix that indicates a forward compatible
+      context. This is only valid for versions &gt;= 3.0.
+  <li>COMPAT is an optional suffix that indicates a compatibility
+      context or GL_ARB_compatibility support. This is only valid for
+      versions &gt;= 3.1.
+  <li>GL versions &lt;= 3.0 are set to a compatibility (non-Core)
+      profile
+  <li>GL versions = 3.1, depending on the driver, it may or may not
+      have the ARB_compatibility extension enabled.
+  <li>GL versions &gt;= 3.2 are set to a Core profile
+  <li>Examples: 2.1, 3.0, 3.0FC, 3.1, 3.1FC, 3.1COMPAT, X.Y, X.YFC,
+      X.YCOMPAT.
+  <ul>
+    <li>2.1 - select a compatibility (non-Core) profile with GL
+        version 2.1.
+    <li>3.0 - select a compatibility (non-Core) profile with GL
+        version 3.0.
+    <li>3.0FC - select a Core+Forward Compatible profile with GL
+        version 3.0.
+    <li>3.1 - select GL version 3.1 with GL_ARB_compatibility enabled
+        per the driver default.
+    <li>3.1FC - select GL version 3.1 with forward compatibility and
+        GL_ARB_compatibility disabled.
+    <li>3.1COMPAT - select GL version 3.1 with GL_ARB_compatibility
+        enabled.
+    <li>X.Y - override GL version to X.Y without changing the profile.
+    <li>X.YFC - select a Core+Forward Compatible profile with GL
+        version X.Y.
+    <li>X.YCOMPAT - select a Compatibility profile with GL version
+        X.Y.
+  </ul>
+  <li>Mesa may not really implement all the features of the given
+      version. (for developers only)
 </ul>
 <li>MESA_GLES_VERSION_OVERRIDE - changes the value returned by
 glGetString(GL_VERSION) for OpenGL ES.
--- a/docs/llvmpipe.html
+++ b/docs/llvmpipe.html
@@ -20,7 +20,7 @@
 The Gallium llvmpipe driver is a software rasterizer that uses LLVM to
 do runtime code generation.
 Shaders, point/line/triangle rasterization and vertex processing are
-implemented with LLVM IR which is translated to x86 or x86-64 machine
+implemented with LLVM IR which is translated to x86, x86-64, or ppc64le machine
 code.
 Also, the driver is multithreaded to take advantage of multiple CPU cores
 (up to 8 at this time).
@@ -32,24 +32,36 @@ It's the fastest software rasterizer for Mesa.

 <ul>
 <li>
-   <p>An x86 or amd64 processor; 64-bit mode recommended.</p>
   <p>
+   For x86 or amd64 processors, 64-bit mode is recommended.
   Support for SSE2 is strongly encouraged.  Support for SSE3 and SSE4.1 will
   yield the most efficient code.  The fewer features the CPU has the more
-   likely is that you run into underperforming, buggy, or incomplete code.
+   likely it is that you will run into underperforming, buggy, or incomplete code.
+   </p>
+   <p>
+   For ppc64le processors, use of the Altivec feature (the Vector
+   Facility) is recommended if supported; use of the VSX feature (the
+   Vector-Scalar Facility) is recommended if supported AND Mesa is
+   built with LLVM version 4.0 or later.
   </p>
   <p>
   See /proc/cpuinfo to know what your CPU supports.
   </p>
 </li>
 <li>
-   <p>LLVM: version 3.4 recommended; 3.3 or later required.</p>
+   <p>Unless otherwise stated, LLVM version 3.4 is recommended; 3.3 or later is required.</p>
   <p>
   For Linux, on a recent Debian based distribution do:
   </p>
 <pre>
     aptitude install llvm-dev
 </pre>
+   <p>
+   If you want development snapshot builds of LLVM for Debian and derived
+   distributions like Ubuntu, you can use the APT repository at <a
+   href="https://apt.llvm.org/" title="Debian Development packages for LLVM"
+   >apt.llvm.org</a>, which are maintained by Debian's LLVM maintainer.
+   </p>
   <p>
   For a RPM-based distribution do:
   </p>
@@ -228,8 +240,8 @@ build/linux-???-debug/gallium/drivers/llvmpipe:
 </ul>

 <p>
-Some of this tests can output results and benchmarks to a tab-separated-file
-for posterior analysis, e.g.:
+Some of these tests can output results and benchmarks to a tab-separated file
+for later analysis, e.g.:
 </p>
 <pre>
  build/linux-x86_64-debug/gallium/drivers/llvmpipe/lp_test_blend -o blend.tsv
@@ -240,8 +252,8 @@ for posterior analysis, e.g.:

 <ul>
 <li>
-  When looking to this code by the first time start in lp_state_fs.c, and 
-  then skim through the lp_bld_* functions called in there, and the comments
+  When looking at this code for the first time, start in lp_state_fs.c, and
+  then skim through the lp_bld_* functions called there, and the comments
  at the top of the lp_bld_*.c functions.
 </li>
 <li>
--- a/docs/relnotes/17.3.0.html
+++ b/docs/relnotes/17.3.0.html
@@ -14,7 +14,7 @@
 <iframe src="../contents.html"></iframe>
 <div class="content">

-<h1>Mesa 17.3.0 Release Notes / TBD</h1>
+<h1>Mesa 17.3.0 Release Notes / December 8. 2017</h1>

 <p>
 Mesa 17.3.0 is a new development release.
@@ -33,7 +33,8 @@ because compatibility contexts are not supported.

 <h2>SHA256 checksums</h2>
 <pre>
-TBD.
+0cb1ffe2b4637d80f08df3bdfeb300352dcffd8ff4f6711278639b084e3f07f9  mesa-17.3.0.tar.gz
+29a0a3a6c39990d491a1a58ed5c692e596b3bfc6c01d0b45e0b787116c50c6d9  mesa-17.3.0.tar.xz
 </pre>


@@ -58,14 +59,187 @@ Note: some of the new features are only available with certain drivers.
 <h2>Bug fixes</h2>

 <ul>
-TBD
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97532">Bug 97532</a> - Regression: GLB 2.7 &amp; Glmark-2 GLES versions segfault due to linker precision error (259fc505) on dead variable</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100438">Bug 100438</a> - glsl/ir.cpp:1376: ir_dereference_variable::ir_dereference_variable(ir_variable*): Assertion `var != NULL' failed.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100613">Bug 100613</a> - Regression in Mesa 17 on s390x (zSystems)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101334">Bug 101334</a> - AMD SI cards: Some vulkan apps freeze the system</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101378">Bug 101378</a> - interpolateAtSample check for input parameter is too strict</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101655">Bug 101655</a> - Explicit sync support for android</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101691">Bug 101691</a> - gfx corruption on windowed 3d-apps running on dGPU</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101709">Bug 101709</a> - [llvmpipe] piglit gl-1.0-scissor-offscreen regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101766">Bug 101766</a> - Assertion `!&quot;invalid type&quot;' failed when constant expression involves literal of different type</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101832">Bug 101832</a> - [PATCH][regression][bisect] Xorg fails to start after f50aa21456d82c8cb6fbaa565835f1acc1720a5d</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101851">Bug 101851</a> - [regression] libEGL_common.a undefined reference to '__gxx_personality_v0'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101867">Bug 101867</a> - Launch options window renders black in Feral Games in current Mesa trunk</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101876">Bug 101876</a> - SIGSEGV when launching Steam</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101910">Bug 101910</a> - [BYT] ES31-CTS.functional.copy_image.non_compressed.viewclass_96_bits.rgb32f_rgb32f</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101925">Bug 101925</a> - playstore/webview crash</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101941">Bug 101941</a> - Getting different output depending on attribute declaration order</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101961">Bug 101961</a> - Serious Sam Fusion hangs system completely</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101981">Bug 101981</a> - Commit ddc32537d6db69198e88ef0dfe19770bf9daa536 breaks rendering in multiple applications</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101982">Bug 101982</a> - Weston crashes when running an OpenGL program on i965</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101983">Bug 101983</a> - [G33] ES2-CTS.functional.shaders.struct.uniform.sampler_nested* regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101989">Bug 101989</a> - ES3-CTS.functional.state_query.integers.viewport_getinteger regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102006">Bug 102006</a> - gstreamer vaapih264enc segfault</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102014">Bug 102014</a> - Mesa git build broken by commit bc7f41e11d325280db12e7b9444501357bc13922</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102015">Bug 102015</a> - [Regression,bisected]: Segfaults with various programs</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102024">Bug 102024</a> - FORMAT_FEATURE_SAMPLED_IMAGE_BIT not supported for D16_UNORM and D32_SFLOAT</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102038">Bug 102038</a> - assertion failure in update_framebuffer_size</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102050">Bug 102050</a> - commit b4f639d02a causes build breakage on Android 32bit builds</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102052">Bug 102052</a> - No package 'expat' found</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102062">Bug 102062</a> - Segfault at eglCreateContext in android-x86</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102125">Bug 102125</a> - [softpipe] piglit arb_texture_view-targets regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102148">Bug 102148</a> - Crash when running qopenglwidget example on mesa llvmpipe win32</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102177">Bug 102177</a> - [SKL] ES31-CTS.core.sepshaderobjs.StateInteraction fails sporadically</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102201">Bug 102201</a> - [regression, SI] GPU crash in Unigine Valley</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102241">Bug 102241</a> - gallium/wgl: SwapBuffers freezing regularly with swap interval enabled</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102274">Bug 102274</a> - assertion failure in ir_validate.cpp:240</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102308">Bug 102308</a> - segfault in glCompressedTextureSubImage3D</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102358">Bug 102358</a> - WarThunder freezes at start, with activated vsync (vblank_mode=2)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102377">Bug 102377</a> - PIPE_*_4BYTE_ALIGNED_ONLY caps crashing</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102429">Bug 102429</a> - [regression, SI] Performance decrease in Unigine Valley &amp; Heaven</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102435">Bug 102435</a> - [skl,kbl] [drm] GPU HANG: ecode 9:0:0x86df7cf9, in csgo_linux64 [4947], reason: Hang on rcs, action: reset</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102454">Bug 102454</a> - glibc 2.26 doesn't provide anymore xlocale.h</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102461">Bug 102461</a> - [llvmpipe] piglit glean fragprog1 XPD test 1 regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102467">Bug 102467</a> - src/mesa/state_tracker/st_cb_readpixels.c:178]: (warning) Redundant assignment</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102496">Bug 102496</a> - Frontbuffer rendering corruption on mesa master</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102502">Bug 102502</a> - [bisected] Kodi crashes since commit 707d2e8b - gallium: fold u_trim_pipe_prim call from st/mesa to drivers</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102530">Bug 102530</a> - [bisected] Kodi crashes when launching a stream - commit bd2662bf</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102552">Bug 102552</a> - Null dereference due to not checking return value of util_format_description</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102565">Bug 102565</a> - u_debug_stack.c:114: undefined reference to `_Ux86_64_getcontext'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102573">Bug 102573</a> - fails to build on armel</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102665">Bug 102665</a> - test_glsl_to_tgsi_lifetime.cpp:53:67: error: ‘&gt;&gt;’ should be ‘&gt; &gt;’ within a nested template argument list</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102677">Bug 102677</a> - [OpenGL CTS] KHR-GL45.CommonBugs.CommonBug_PerVertexValidation fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102680">Bug 102680</a> - [OpenGL CTS] KHR-GL45.shader_ballot_tests.ShaderBallotBitmasks fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102685">Bug 102685</a> - piglit.spec.glsl-1_50.compiler.vs-redeclares-pervertex-out-before-global-redeclaration</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102774">Bug 102774</a> - [BDW] [Bisected] Absolute constant buffers break VAAPI in mpv</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102809">Bug 102809</a> - Rust shadows(?) flash random colours</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102844">Bug 102844</a> - memory leak with glDeleteProgram for shader program type GL_COMPUTE_SHADER</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102847">Bug 102847</a> - swr fail to build with llvm-5.0.0</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102852">Bug 102852</a> - Scons: Support the new Scons 3.0.0</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102904">Bug 102904</a> - piglit and gl45 cts linker tests regressed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102924">Bug 102924</a> - mesa (git version) images too dark</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102940">Bug 102940</a> - Regression: Vulkan KMS rendering crashes since 17.2</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102955">Bug 102955</a> - HyperZ related rendering issue in ARK: Survival Evolved</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102999">Bug 102999</a> - [BISECTED,REGRESSION] Failing Android EGL dEQP with RGBA configs</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103002">Bug 103002</a> - string_buffer_test.cpp:43: error: ISO C++ forbids initialization of member ‘str1’</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103085">Bug 103085</a> - [ivb byt hsw] piglit.spec.arb_indirect_parameters.tf-count-arrays</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103098">Bug 103098</a> - [OpenGL CTS] KHR-GL45.enhanced_layouts.varying_structure_locations fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103101">Bug 103101</a> - [SKL][bisected] DiRT Rally GPU hang</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103115">Bug 103115</a> - [BSW BXT GLK] dEQP-VK.spirv_assembly.instruction.compute.sconvert.int32_to_int64</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103128">Bug 103128</a> - [softpipe] piglit fs-ldexp regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103142">Bug 103142</a> - R600g+sb: optimizer apparently stuck in an endless loop</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103214">Bug 103214</a> - GLES CTS functional.state_query.indexed.atomic_counter regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103227">Bug 103227</a> - [G965 G45 ILK] ES2-CTS.gtf.GL2ExtensionTests.texture_float.texture_float regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103247">Bug 103247</a> - Performance regression: car chase, manhattan</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103253">Bug 103253</a> - blob.h:138:1: error: unknown type name 'ssize_t'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103265">Bug 103265</a> - [llvmpipe] piglit depth-tex-compare regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103323">Bug 103323</a> - Possible unintended error message in file pixel.c line 286</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103388">Bug 103388</a> - Linking libcltgsi.la (llvm/codegen/libclllvm_la-common.lo) fails with &quot;error: no match for 'operator-'&quot; with GCC-7, Mesa from Git and current LLVM revisions</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103393">Bug 103393</a> - glDispatchComputeGroupSizeARB : gl_GlobalInvocationID.x != gl_WorkGroupID.x * gl_LocalGroupSizeARB.x + gl_LocalInvocationID.x</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103412">Bug 103412</a> - gallium/wgl: Another fix to context creation without prior SetPixelFormat()</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103519">Bug 103519</a> - wayland egl apps crash on start with mesa 17.2</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103529">Bug 103529</a> - [GM45] GPU hang with mpv fullscreen (bisected)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103537">Bug 103537</a> - i965: Shadow of Mordor broken since commit 379b24a40d3d34ffdaaeb1b328f50e28ecb01468 on Haswell</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103544">Bug 103544</a> - Graphical glitches r600 in game this war of mine linux native</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103616">Bug 103616</a> - Increased difference from reference image in shaders</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103628">Bug 103628</a> - [BXT, GLK, BSW] KHR-GL46.shader_ballot_tests.ShaderBallotBitmasks</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103759">Bug 103759</a> - plasma desktop corrupted rendering</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103787">Bug 103787</a> - [BDW,BSW] gpu hang on spec.arb_pipeline_statistics_query.arb_pipeline_statistics_query-comp</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103909">Bug 103909</a> - anv_allocator.c:113:1: error: static declaration of ‘memfd_create’ follows non-static declaration</li>
+
 </ul>

 <h2>Changes</h2>

-<ul>
-TBD
-</ul>

 </div>
 </body>
--- a/docs/relnotes/17.3.1.html
+++ b/docs/relnotes/17.3.1.html
@@ -0,0 +1,191 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 17.3.1 Release Notes / December 21, 2017</h1>
+
+<p>
+Mesa 17.3.1 is a bug fix release which fixes bugs found since the 17.3.0 release.
+</p>
+<p>
+Mesa 17.3.1 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+b0bb0419dbe3043ed4682a28eaf95721f427ca3f23a3c2a7dc77dbe8a3b6384d  mesa-17.3.1.tar.gz
+9ae607e0998a586fb2c866cfc8e45e6f52d1c56cb1b41288253ea83eada824c1  mesa-17.3.1.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94739">Bug 94739</a> - Mesa 11.1.2 implementation error: bad format MESA_FORMAT_Z_FLOAT32 in _mesa_unpack_uint_24_8_depth_stencil_row</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102710">Bug 102710</a> - vkCmdBlitImage with arrayLayers &gt; 1 fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103579">Bug 103579</a> - Vertex shader causes compiler to crash in SPIRV-to-NIR</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103966">Bug 103966</a> - Mesa 17.2.5 implementation error: bad format MESA_FORMAT_Z_FLOAT32 in _mesa_unpack_uint_24_8_depth_stencil_row</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104119">Bug 104119</a> - radv: OpBitFieldInsert produces 0 with a loop counter for Insert</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104143">Bug 104143</a> - r600/sb: clobbers gl_Position -&gt; gl_FragCoord</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Alex Smith (1):</p>
+<ul>
+  <li>radv: Add LLVM version to the device name string</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (3):</p>
+<ul>
+  <li>spirv: Fix loading an entire block at once.</li>
+  <li>radv: Don't advertise VK_EXT_debug_report.</li>
+  <li>radv: Fix multi-layer blits.</li>
+</ul>
+
+<p>Ben Crocker (1):</p>
+<ul>
+  <li>docs/llvmpipe: document ppc64le as alternative architecture to x86.</li>
+</ul>
+
+<p>Brian Paul (2):</p>
+<ul>
+  <li>xlib: call _mesa_warning() instead of fprintf()</li>
+  <li>gallium/aux: include nr_samples in util_resource_size() computation</li>
+</ul>
+
+<p>Bruce Cherniak (1):</p>
+<ul>
+  <li>swr: Fix KNOB_MAX_WORKER_THREADS thread creation override.</li>
+</ul>
+
+<p>Dave Airlie (1):</p>
+<ul>
+  <li>radv: port merge tess info from anv</li>
+</ul>
+
+<p>Emil Velikov (5):</p>
+<ul>
+  <li>docs: add sha256 checksums for 17.3.0</li>
+  <li>util: scons: wire up the sha1 test</li>
+  <li>cherry-ignore: meson: fix strtof locale support check</li>
+  <li>cherry-ignore: util: add mesa-sha1 test to meson</li>
+  <li>Update version to 17.3.1</li>
+</ul>
+
+<p>Eric Anholt (1):</p>
+<ul>
+  <li>broadcom/vc4: Fix handling of GFXH-515 workaround with a start vertex count.</li>
+</ul>
+
+<p>Eric Engestrom (1):</p>
+<ul>
+  <li>compiler: use NDEBUG to guard asserts</li>
+</ul>
+
+<p>Fabian Bieler (2):</p>
+<ul>
+  <li>glsl: Match order of gl_LightSourceParameters elements.</li>
+  <li>glsl: Fix gl_NormalScale.</li>
+</ul>
+
+<p>Gert Wollny (1):</p>
+<ul>
+  <li>r600/sb: do not convert if-blocks that contain indirect array access</li>
+</ul>
+
+<p>James Legg (1):</p>
+<ul>
+  <li>nir/opcodes: Fix constant-folding of bitfield_insert</li>
+</ul>
+
+<p>Jason Ekstrand (1):</p>
+<ul>
+  <li>i965: Switch over to fully external-or-not MOCS scheme</li>
+</ul>
+
+<p>Juan A. Suarez Romero (1):</p>
+<ul>
+  <li>travis: disable Meson build</li>
+</ul>
+
+<p>Kenneth Graunke (2):</p>
+<ul>
+  <li>meta: Initialize depth/clear values on declaration.</li>
+  <li>meta: Fix ClearTexture with GL_DEPTH_COMPONENT.</li>
+</ul>
+
+<p>Leo Liu (1):</p>
+<ul>
+  <li>radeon/vce: move destroy command before feedback command</li>
+</ul>
+
+<p>Marek Olšák (4):</p>
+<ul>
+  <li>radeonsi: flush the context after resource_copy_region for buffer exports</li>
+  <li>radeonsi: allow DMABUF exports for local buffers</li>
+  <li>winsys/amdgpu: disable local BOs again due to worse performance</li>
+  <li>radeonsi: don't call force_dcc_off for buffers</li>
+</ul>
+
+<p>Matt Turner (2):</p>
+<ul>
+  <li>util: Assume little endian in the absence of platform-specific handling</li>
+  <li>util: Add a SHA1 unit test program</li>
+</ul>
+
+<p>Nicolai Hähnle (1):</p>
+<ul>
+  <li>radeonsi: fix the R600_RESOURCE_FLAG_UNMAPPABLE check</li>
+</ul>
+
+<p>Pierre Moreau (1):</p>
+<ul>
+  <li>nvc0/ir: Properly lower 64-bit shifts when the shift value is &gt;32</li>
+</ul>
+
+<p>Timothy Arceri (1):</p>
+<ul>
+  <li>glsl: get correct member type when processing xfb ifc arrays</li>
+</ul>
+
+<p>Vadym Shovkoplias (2):</p>
+<ul>
+  <li>glx/dri3: Remove unused deviceName variable</li>
+  <li>util/disk_cache: Remove unneeded free() on always null string</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/17.3.2.html
+++ b/docs/relnotes/17.3.2.html
@@ -0,0 +1,109 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 17.3.2 Release Notes / January 8, 2018</h1>
+
+<p>
+Mesa 17.3.2 is a bug fix release which fixes bugs found since the 17.3.1 release.
+</p>
+<p>
+Mesa 17.3.2 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+f997e80f14c385f9a2ba827c2b74aebf1b7426712ca4a81c631ef9f78e437bf4  mesa-17.3.2.tar.gz
+e2844a13f2d6f8f24bee65804a51c42d8dc6ae9c36cff7ee61d0940e796d64c6  mesa-17.3.2.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97852">Bug 97852</a> - Unreal Engine corrupted preview viewport</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103801">Bug 103801</a> - [i965] &gt;Observer_ issue</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104288">Bug 104288</a> - Steamroll needs allow_glsl_cross_stage_interpolation_mismatch=true</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Bas Nieuwenhuizen (1):</p>
+<ul>
+  <li>radv: Fix DCC compatible formats.</li>
+</ul>
+
+<p>Brendan King (1):</p>
+<ul>
+  <li>egl: link libEGL against the dynamic version of libglapi</li>
+</ul>
+
+<p>Dave Airlie (6):</p>
+<ul>
+  <li>radv/gfx9: add support for 3d images to blit 2d paths</li>
+  <li>radv: handle depth/stencil image copy with layouts better. (v3.1)</li>
+  <li>radv/meta: fix blit paths for depth/stencil (v2.1)</li>
+  <li>radv: fix issue with multisample positions and interp_var_at_sample.</li>
+  <li>radv/gfx9: add 3d sampler image-&gt;buffer copy shader. (v3)</li>
+  <li>radv: don't do format replacement on tc compat htile surfaces.</li>
+</ul>
+
+<p>Emil Velikov (2):</p>
+<ul>
+  <li>docs: add sha256 checksums for 17.3.1</li>
+  <li>Update version to 17.3.2</li>
+</ul>
+
+<p>Eric Engestrom (1):</p>
+<ul>
+  <li>egl: let each platform decided how to handle LIBGL_ALWAYS_SOFTWARE</li>
+</ul>
+
+<p>Rob Herring (1):</p>
+<ul>
+  <li>egl/android: Fix build break with dri2_initialize_android _EGLDisplay parameter</li>
+</ul>
+
+<p>Samuel Pitoiset (2):</p>
+<ul>
+  <li>radv/gfx9: fix primitive topology when adjacency is used</li>
+  <li>radv: use a faster version for nir_op_pack_half_2x16</li>
+</ul>
+
+<p>Tapani Pälli (2):</p>
+<ul>
+  <li>mesa: add AllowGLSLCrossStageInterpolationMismatch workaround</li>
+  <li>drirc: set allow_glsl_cross_stage_interpolation_mismatch for more games</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/17.3.3.html
+++ b/docs/relnotes/17.3.3.html
@@ -0,0 +1,151 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 17.3.3 Release Notes / January 18, 2018</h1>
+
+<p>
+Mesa 17.3.3 is a bug fix release which fixes bugs found since the 17.3.2 release.
+</p>
+<p>
+Mesa 17.3.3 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+c733d37a161501cd81dc9b309ccb613753b98eafc6d35e0847548a6642749772  mesa-17.3.3.tar.gz
+41bac5de0ef6adc1f41a1ec0f80c19e361298ce02fa81b5f9ba4fdca33a9379b  mesa-17.3.3.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104214">Bug 104214</a> - Dota crashes when switching from game to desktop</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104492">Bug 104492</a> - Compute Shader: Wrong alignment when assigning struct value to structured SSBO</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104551">Bug 104551</a> - Check if Mako templates for Python are installed</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Alex Smith (3):</p>
+<ul>
+  <li>anv: Add missing unlock in anv_scratch_pool_alloc</li>
+  <li>anv: Take write mask into account in has_color_buffer_write_enabled</li>
+  <li>anv: Make sure state on primary is correct after CmdExecuteCommands</li>
+</ul>
+
+<p>Andres Gomez (1):</p>
+<ul>
+  <li>anv: Import mako templates only during execution of anv_extensions</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (11):</p>
+<ul>
+  <li>radv: Invert condition for all samples identical during resolve.</li>
+  <li>radv: Flush caches before subpass resolve.</li>
+  <li>radv: Fix fragment resolve destination offset.</li>
+  <li>radv: Use correct framebuffer size for partial FS resolves.</li>
+  <li>radv: Always use fragment resolve if dest uses DCC.</li>
+  <li>Revert "radv/gfx9: fix block compression texture views."</li>
+  <li>radv: Use correct HTILE expanded words.</li>
+  <li>radv: Allow writing 0 scissors.</li>
+  <li>ac/nir: Handle loading data from compact arrays.</li>
+  <li>radv: Invalidate L1 for VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT.</li>
+  <li>ac/nir: Sanitize location_frac for local variables.</li>
+</ul>
+
+<p>Dave Airlie (8):</p>
+<ul>
+  <li>radv: fix events on compute queues.</li>
+  <li>radv: fix pipeline statistics end query on compute queue</li>
+  <li>radv/gfx9: fix 3d image to image transfers on compute queues.</li>
+  <li>radv/gfx9: fix 3d image clears on compute queues</li>
+  <li>radv/gfx9: fix buffer to image for 3d images on compute queues</li>
+  <li>radv/gfx9: fix block compression texture views.</li>
+  <li>radv/gfx9: use a bigger hammer to flush cb/db caches.</li>
+  <li>radv/gfx9: use correct swizzle parameter to work out border swizzle.</li>
+</ul>
+
+<p>Emil Velikov (1):</p>
+<ul>
+  <li>docs: add sha256 checksums for 17.3.2</li>
+</ul>
+
+<p>Florian Will (1):</p>
+<ul>
+  <li>glsl: Respect std430 layout in lower_buffer_access</li>
+</ul>
+
+<p>Juan A. Suarez Romero (6):</p>
+<ul>
+  <li>cherry-ignore: intel/fs: Use the original destination region for int MUL lowering</li>
+  <li>cherry-ignore: i965/fs: Use UW types when using V immediates</li>
+  <li>cherry-ignore: main: Clear shader program data whenever ProgramBinary is called</li>
+  <li>cherry-ignore: egl: pass the dri2_dpy to the $plat_teardown functions</li>
+  <li>cherry-ignore: vulkan/wsi: free cmd pools</li>
+  <li>Update version to 17.3.3</li>
+</ul>
+
+<p>Józef Kucia (1):</p>
+<ul>
+  <li>radeonsi: fix alpha-to-coverage if color writes are disabled</li>
+</ul>
+
+<p>Kenneth Graunke (2):</p>
+<ul>
+  <li>i965: Require space for MI_BATCHBUFFER_END.</li>
+  <li>i965: Torch public intel_batchbuffer_emit_dword/float helpers.</li>
+</ul>
+
+<p>Lucas Stach (1):</p>
+<ul>
+  <li>etnaviv: disable in-place resolve for non-supertiled surfaces</li>
+</ul>
+
+<p>Samuel Iglesias Gonsálvez (1):</p>
+<ul>
+  <li>anv: VkDescriptorSetLayoutBinding can have descriptorCount == 0</li>
+</ul>
+
+<p>Thomas Hellstrom (1):</p>
+<ul>
+  <li>loader/dri3: Avoid freeing renderbuffers in use</li>
+</ul>
+
+<p>Tim Rowley (1):</p>
+<ul>
+  <li>swr/rast: fix invalid sign masks in avx512 simdlib code</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/17.3.4.html
+++ b/docs/relnotes/17.3.4.html
@@ -0,0 +1,275 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 17.3.4 Release Notes / January 15, 2018</h1>
+
+<p>
+Mesa 17.3.4 is a bug fix release which fixes bugs found since the 17.3.3 release.
+</p>
+<p>
+Mesa 17.3.4 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+2d3a4c3cbc995b3e192361dce710d8c749e046e7575aa1b7d8fc9e6b4df28f84  mesa-17.3.4.tar.gz
+71f995e233bc5df1a0dd46c980d1720106e7f82f02d61c1ca50854b5e02590d0  mesa-17.3.4.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90311">Bug 90311</a> - Fail to build libglx with clang at linking stage</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101442">Bug 101442</a> - Piglit shaders&#64;ssa&#64;fs-if-def-else-break fails with sb but passes with R600_DEBUG=nosb</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102435">Bug 102435</a> - [skl,kbl] [drm] GPU HANG: ecode 9:0:0x86df7cf9, in csgo_linux64 [4947], reason: Hang on rcs, action: reset</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103006">Bug 103006</a> - [OpenGL CTS] [HSW] KHR-GL45.vertex_attrib_binding.basic-inputL-case1</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103626">Bug 103626</a> - [SNB] ES3-CTS.functional.shaders.precision</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104163">Bug 104163</a> - [GEN9+] 2-3% perf drop in GfxBench Manhattan 3.1 from &quot;i965: Disable regular fast-clears (CCS_D) on gen9+&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104383">Bug 104383</a> - [KBL] Intel GPU hang with firefox</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104411">Bug 104411</a> - [CCS] lemonbar-xft GPU hang</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104487">Bug 104487</a> - [KBL] portal2_linux GPU hang</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104711">Bug 104711</a> - [skl CCS] Oxenfree (unity engine game) hangs GPU</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104741">Bug 104741</a> - Graphic corruption for Android apps Telegram and KineMaster</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104745">Bug 104745</a> - HEVC VDPAU decoding broken on RX 460 with UVD Firmware v1.130</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104818">Bug 104818</a> - mesa fails to build on ia64</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Andres Gomez (1):</p>
+<ul>
+  <li>i965: perform 2 uploads with dual slot *64*PASSTHRU formats on gen&lt;8</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (10):</p>
+<ul>
+  <li>radv: Fix ordering issue in meta memory allocation failure path.</li>
+  <li>radv: Fix memory allocation failure path in compute resolve init.</li>
+  <li>radv: Fix freeing meta state if the device pipeline cache fails to allocate.</li>
+  <li>radv: Fix fragment resolve init memory allocation failure paths.</li>
+  <li>radv: Fix bufimage failure deallocation.</li>
+  <li>radv: Init variant entry with memset.</li>
+  <li>radv: Don't allow 3d or 1d depth/stencil textures.</li>
+  <li>ac/nir: Use instance_rate_inputs per attribute, not per variable.</li>
+  <li>ac/nir: Use correct 32-bit component writemask for 64-bit SSBO stores.</li>
+  <li>ac/nir: Fix vector extraction if source vector has &gt;4 elements.</li>
+</ul>
+
+<p>Boyuan Zhang (2):</p>
+<ul>
+  <li>radeon/vcn: add and manage render picture list</li>
+  <li>radeon/uvd: add and manage render picture list</li>
+</ul>
+
+<p>Chuck Atkins (1):</p>
+<ul>
+  <li>configure.ac: add missing llvm dependencies to .pc files</li>
+</ul>
+
+<p>Dave Airlie (10):</p>
+<ul>
+  <li>r600/sb: fix a bug emitting ar load from a constant.</li>
+  <li>ac/nir: account for view index in the user sgpr allocation.</li>
+  <li>radv: add fs_key meta format support to resolve passes.</li>
+  <li>radv: don't use hw resolve for integer image formats</li>
+  <li>radv: don't use hw resolves for r16g16 norm formats.</li>
+  <li>radv: move spi_baryc_cntl to pipeline</li>
+  <li>r600/sb: insert the else clause when we might depart from a loop</li>
+  <li>radv: don't enable tc compat for d32s8 + 4/8 samples (v1.1)</li>
+  <li>radv/gfx9: fix block compression texture views. (v2)</li>
+  <li>virgl: also remove dimension on indirect.</li>
+</ul>
+
+<p>Eleni Maria Stea (1):</p>
+<ul>
+  <li>mesa: Fix function pointers initialization in status tracker</li>
+</ul>
+
+<p>Emil Velikov (18):</p>
+<ul>
+  <li>cherry-ignore: i965: Accept CONTEXT_ATTRIB_PRIORITY for brwCreateContext</li>
+  <li>cherry-ignore: swr: refactor swr_create_screen to allow for proper cleanup on error</li>
+  <li>cherry-ignore: anv: add explicit 18.0 only nominations</li>
+  <li>cherry-ignore: radv: fix sample_mask_in loading. (v3.1)</li>
+  <li>cherry-ignore: meson: multiple fixes</li>
+  <li>cherry-ignore: swr/rast: support llvm 3.9 type declarations</li>
+  <li>Revert "cherry-ignore: intel/fs: Use the original destination region for int MUL lowering"</li>
+  <li>cherry-ignore: ac/nir: set amdgpu.uniform and invariant.load for UBOs</li>
+  <li>cherry-ignore: add gen10 fixes</li>
+  <li>cherry-ignore: add r600/amdgpu 18.0 nominations</li>
+  <li>cherry-ignore: add i965 shader cache fixes</li>
+  <li>cherry-ignore: nir: mark unused space in packed_tex_data</li>
+  <li>radv: Stop advertising VK_KHX_multiview</li>
+  <li>cherry-ignore: radv: Don't expose VK_KHX_multiview on android.</li>
+  <li>configure.ac: correct driglx-direct help text</li>
+  <li>cherry-ignore: add meson fix</li>
+  <li>cherry-ignore: add a few more meson fixes</li>
+  <li>Update version to 17.3.4</li>
+</ul>
+
+<p>Eric Engestrom (1):</p>
+<ul>
+  <li>radeon: remove left over dead code</li>
+</ul>
+
+<p>Gert Wollny (1):</p>
+<ul>
+  <li>r600/shader: Initialize max_driver_temp_used correctly for the first time</li>
+</ul>
+
+<p>Grazvydas Ignotas (2):</p>
+<ul>
+  <li>st/va: release held locks in error paths</li>
+  <li>st/vdpau: release held lock in error path</li>
+</ul>
+
+<p>Igor Gnatenko (1):</p>
+<ul>
+  <li>link mesautil with pthreads</li>
+</ul>
+
+<p>Indrajit Das (4):</p>
+<ul>
+  <li>st/omx_bellagio: Update default intra matrix per MPEG2 spec</li>
+  <li>radeon/uvd: update quantiser matrices only when requested</li>
+  <li>radeon/vcn: update quantiser matrices only when requested</li>
+  <li>st/va: clear pointers for mpeg2 quantiser matrices</li>
+</ul>
+
+<p>Jason Ekstrand (19):</p>
+<ul>
+  <li>i965: Call brw_cache_flush_for_render in predraw_resolve_framebuffer</li>
+  <li>i965: Add more precise cache tracking helpers</li>
+  <li>i965/blorp: Add more destination flushing</li>
+  <li>i965: Track the depth and render caches separately</li>
+  <li>i965: Track format and aux usage in the render cache</li>
+  <li>Re-enable regular fast-clears (CCS_D) on gen9+</li>
+  <li>i965/miptree: Refactor CCS_E and CCS_D cases in render_aux_usage</li>
+  <li>i965/miptree: Add an explicit tiling parameter to create_for_bo</li>
+  <li>i965/miptree: Use the tiling from the modifier instead of the BO</li>
+  <li>i965/bufmgr: Add a create_from_prime_tiled function</li>
+  <li>i965: Set tiling on BOs imported with modifiers</li>
+  <li>i965/miptree: Take an aux_usage in prepare/finish_render</li>
+  <li>i965/miptree: Add an aux_disabled parameter to render_aux_usage</li>
+  <li>i965/surface_state: Drop brw_aux_surface_disabled</li>
+  <li>intel/fs: Use the original destination region for int MUL lowering</li>
+  <li>anv/pipeline: Don't look at blend state unless we have an attachment</li>
+  <li>anv/cmd_buffer: Re-emit the pipeline at every subpass</li>
+  <li>anv: Stop advertising VK_KHX_multiview</li>
+  <li>i965: Call prepare_external after implicit window-system MSAA resolves</li>
+</ul>
+
+<p>Jon Turney (3):</p>
+<ul>
+  <li>configure: Default to gbm=no on osx</li>
+  <li>glx/apple: include util/debug.h for env_var_as_boolean prototype</li>
+  <li>glx/apple: locate dispatch table functions to wrap by name</li>
+</ul>
+
+<p>José Fonseca (1):</p>
+<ul>
+  <li>svga: Prevent use after free.</li>
+</ul>
+
+<p>Juan A. Suarez Romero (1):</p>
+<ul>
+  <li>docs: add sha256 checksums for 17.3.3</li>
+</ul>
+
+<p>Kenneth Graunke (2):</p>
+<ul>
+  <li>i965: Bind null render targets for shadow sampling + color.</li>
+  <li>i965: Bump official kernel requirement to Linux v3.9.</li>
+</ul>
+
+<p>Lucas Stach (2):</p>
+<ul>
+  <li>etnaviv: dirty TS state when framebuffer has changed</li>
+  <li>renderonly: fix dumb BO allocation for non 32bpp formats</li>
+</ul>
+
+<p>Marek Olšák (1):</p>
+<ul>
+  <li>radeonsi: don't ignore pitch for imported textures</li>
+</ul>
+
+<p>Matthew Nicholls (2):</p>
+<ul>
+  <li>radv: restore previous stencil reference after depth-stencil clear</li>
+  <li>radv: remove predication on cache flushes</li>
+</ul>
+
+<p>Maxin B. John (1):</p>
+<ul>
+  <li>anv_icd.py: improve reproducible builds</li>
+</ul>
+
+<p>Michel Dänzer (1):</p>
+<ul>
+  <li>winsys/radeon: Compute is_displayable in surf_drm_to_winsys</li>
+</ul>
+
+<p>Roland Scheidegger (1):</p>
+<ul>
+  <li>r600: don't do stack workarounds for hemlock</li>
+</ul>
+
+<p>Samuel Pitoiset (1):</p>
+<ul>
+  <li>radv: create pipeline layout objects for all meta operations</li>
+</ul>
+
+<p>Samuel Thibault (1):</p>
+<ul>
+  <li>glx: fix non-dri build</li>
+</ul>
+
+<p>Timothy Arceri (2):</p>
+<ul>
+  <li>ac: fix buffer overflow bug in 64bit SSBO loads</li>
+  <li>ac: fix visit_ssa_undef() for doubles</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/17.3.5.html
+++ b/docs/relnotes/17.3.5.html
@@ -0,0 +1,66 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 17.3.5 Release Notes / February 19, 2018</h1>
+
+<p>
+Mesa 17.3.5 is a bug fix release which fixes bugs found since the 17.3.4 release.
+</p>
+<p>
+Mesa 17.3.5 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+bc1ee20366aae2affc37c89228f871f438136f70252005e9f842169bde976788  mesa-17.3.5.tar.gz
+eb9228fc8aaa71e0205c1481c5b157752ebaec9b646b030d27478e25a6d7936a  mesa-17.3.5.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Emil Velikov (2):</p>
+<ul>
+  <li>docs: add sha256 checksums for 17.3.4</li>
+  <li>Update version to 17.3.5</li>
+</ul>
+
+<p>James Legg (1):</p>
+<ul>
+  <li>ac/nir: Fix conflict resolution typo in handle_vs_input_decl</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/17.3.6.html
+++ b/docs/relnotes/17.3.6.html
@@ -0,0 +1,85 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 17.3.5 Release Notes / February 19, 2018</h1>
+
+<p>
+Mesa 17.3.6 is a bug fix release which fixes bugs found since the 17.3.5 release.
+</p>
+<p>
+Mesa 17.3.6 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+d5e10ea3f0d11b06d2b0b235bba372a04278c39bc0e712090bda1f61842db188  mesa-17.3.6.tar.gz
+e5915680d44ac9d05defdec529db7459ac9edd441c9845266eff2e2d3e57fbf8  mesa-17.3.6.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104383">Bug 104383</a> - [KBL] Intel GPU hang with firefox</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104411">Bug 104411</a> - [CCS] lemonbar-xft GPU hang</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104546">Bug 104546</a> - Crash happens when running compute pipeline after calling glxMakeCurrent two times</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Emil Velikov (2):</p>
+<ul>
+  <li>docs: add sha256 checksums for 17.3.5</li>
+  <li>Update version to 17.3.6</li>
+</ul>
+
+<p>Jason Ekstrand (4):</p>
+<ul>
+  <li>i965/draw: Do resolves properly for textures used by TXF</li>
+  <li>i965: Replace draw_aux_buffer_disabled with draw_aux_usage</li>
+  <li>i965/draw: Set NEW_AUX_STATE when draw aux changes</li>
+  <li>i965: Stop disabling aux during texture preparation</li>
+</ul>
+
+<p>Kenneth Graunke (1):</p>
+<ul>
+  <li>i965: Don't disable CCS for RT dependencies when dispatching compute.</li>
+</ul>
+
+<p>Topi Pohjolainen (1):</p>
+<ul>
+  <li>i965: Don't try to disable render aux buffers for compute</li>
+</ul>
+
+</div>
+</body>
+</html>
+
--- a/docs/relnotes/17.3.7.html
+++ b/docs/relnotes/17.3.7.html
@@ -0,0 +1,312 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 17.3.7 Release Notes / March 21, 2018</h1>
+
+<p>
+Mesa 17.3.7 is a bug fix release which fixes bugs found since the 17.3.7 release.
+</p>
+<p>
+Mesa 17.3.7 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+f08de6d0ccb3dbca04b44790d85c3ff9e7b1cc4189d1b7c7167e5ba7d98736c0  mesa-17.3.7.tar.gz
+0595904a8fba65a8fe853a84ad3c940205503b94af41e8ceed245fada777ac1e  mesa-17.3.7.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103007">Bug 103007</a> - [OpenGL CTS] [HSW] KHR-GL45.gpu_shader_fp64.fp64.max_uniform_components fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103988">Bug 103988</a> - Intermittent piglit failures with shader cache enabled</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104302">Bug 104302</a> - Wolfenstein 2 (2017) under wine graphical artifacting on RADV</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104381">Bug 104381</a> - swr fails to build since llvm-svn r321257</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104625">Bug 104625</a> - semicolon after if</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104642">Bug 104642</a> - Android: NULL pointer dereference with i965 mesa-dev, seems build_id_length related</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104654">Bug 104654</a> - r600/sb: Alien Isolation GPU lock</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104905">Bug 104905</a> - SpvOpFOrdEqual doesn't return correct results for NaNs</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104915">Bug 104915</a> - Indexed SHADING_LANGUAGE_VERSION query not supported</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104923">Bug 104923</a> - anv: Dota2 rendering corruption</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105013">Bug 105013</a> - [regression] GLX+VA-API+clutter-gst video playback is corrupt with Mesa 17.3 (but is fine with 17.2)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105029">Bug 105029</a> - simdlib_512_avx512.inl:371:57: error: could not convert ‘_mm512_mask_blend_epi32((__mmask16)(ImmT), a, b)’ from ‘__m512i’ {aka ‘__vector(8) long long int’} to ‘SIMDImpl::SIMD512Impl::Float’</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105098">Bug 105098</a> - [RADV] GPU freeze with simple Vulkan App</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105103">Bug 105103</a> - Wayland master causes Mesa to fail to compile</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105224">Bug 105224</a> - Webgl Pointclouds flickers</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105255">Bug 105255</a> - Waiting for fences without waitAll is not implemented</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105271">Bug 105271</a> - WebGL2 shader crashes i965_dri.so 17.3.3</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105436">Bug 105436</a> - Blinking textures in UT2004 [bisected]</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Alex Smith (1):</p>
+<ul>
+  <li>radv: Fix CmdCopyImage between uncompressed and compressed images</li>
+</ul>
+
+<p>Andriy Khulap (1):</p>
+<ul>
+  <li>i965: Fix RELOC_WRITE typo in brw_store_data_imm64()</li>
+</ul>
+
+<p>Anuj Phogat (1):</p>
+<ul>
+  <li>isl: Don't use surface format R32_FLOAT for typed atomic integer operations</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (6):</p>
+<ul>
+  <li>radv: Always lower indirect derefs after nir_lower_global_vars_to_local.</li>
+  <li>radeonsi: Export signalled sync file instead of -1.</li>
+  <li>radv: Implement WaitForFences with !waitAll.</li>
+  <li>radv: Implement waiting on non-submitted fences.</li>
+  <li>radv: Fix copying from 3D images starting at non-zero depth.</li>
+  <li>radv: Increase the number of dynamic uniform buffers.</li>
+</ul>
+
+<p>Brian Paul (1):</p>
+<ul>
+  <li>mesa: add missing switch case for EXTRA_VERSION_40 in check_extra()</li>
+</ul>
+
+<p>Chuck Atkins (1):</p>
+<ul>
+  <li>glx: Properly handle cases where screen creation fails</li>
+</ul>
+
+<p>Daniel Stone (3):</p>
+<ul>
+  <li>i965: Fix bugs in intel_from_planar</li>
+  <li>egl/wayland: Fix ARGB/XRGB transposition in config map</li>
+  <li>egl/wayland: Always use in-tree wayland-egl-backend.h</li>
+</ul>
+
+<p>Dave Airlie (9):</p>
+<ul>
+  <li>r600: fix cubemap arrays</li>
+  <li>r600/sb/cayman: fix indirect ubo access on cayman</li>
+  <li>r600: fix xfb stream check.</li>
+  <li>ac/nir: to integer the args to bcsel.</li>
+  <li>r600/cayman: fix fragcood loading recip generation.</li>
+  <li>radv: don't support tc-compat on multisample d32s8 at all.</li>
+  <li>virgl: remap query types to hw support.</li>
+  <li>ac/nir: don't apply slice rounding on txf_ms</li>
+  <li>r600: implement callstack workaround for evergreen.</li>
+</ul>
+
+<p>Dylan Baker (2):</p>
+<ul>
+  <li>glapi/check_table: Remove 'extern "C"' block</li>
+  <li>glapi: remove APPLE extensions from test</li>
+</ul>
+
+<p>Emil Velikov (1):</p>
+<ul>
+  <li>docs: add sha256 checksums for 17.3.6</li>
+</ul>
+
+<p>Eric Anholt (4):</p>
+<ul>
+  <li>mesa: Drop incorrect A4B4G4R4 _mesa_format_matches_format_and_type() cases.</li>
+  <li>ac/nir: Fix compiler warning about uninitialized dw_addr.</li>
+  <li>glsl/tests: Fix strict aliasing warning about int64/double.</li>
+  <li>glsl/tests: Fix a compiler warning about signed/unsigned loop comparison.</li>
+</ul>
+
+<p>Francisco Jerez (1):</p>
+<ul>
+  <li>i965: Fix KHR_blend_equation_advanced with some render targets.</li>
+</ul>
+
+<p>Frank Binns (1):</p>
+<ul>
+  <li>egl/dri2: fix segfault when display initialisation fails</li>
+</ul>
+
+<p>George Kyriazis (1):</p>
+<ul>
+  <li>swr/rast: blend_epi32() should return Integer, not Float</li>
+</ul>
+
+<p>Gert Wollny (1):</p>
+<ul>
+  <li>r600: Take ALU_EXTENDED into account when evaluating jump offsets</li>
+</ul>
+
+<p>Gurchetan Singh (1):</p>
+<ul>
+  <li>mesa: don't clamp just based on ARB_viewport_array extension</li>
+</ul>
+
+<p>Iago Toral Quiroga (2):</p>
+<ul>
+  <li>i965/sbe: fix number of inputs for active components</li>
+  <li>i965/vec4: use a temp register to compute offsets for pull loads</li>
+</ul>
+
+<p>James Legg (1):</p>
+<ul>
+  <li>radv: Really use correct HTILE expanded words.</li>
+</ul>
+
+<p>Jason Ekstrand (3):</p>
+<ul>
+  <li>intel/isl: Add an isl_color_value_is_zero helper</li>
+  <li>vulkan/wsi/x11: Set OUT_OF_DATE if wait_for_special_event fails</li>
+  <li>intel/fs: Set up sampler message headers in the visitor on gen7+</li>
+</ul>
+
+<p>Jonathan Gray (1):</p>
+<ul>
+  <li>configure.ac: pthread-stubs not present on OpenBSD</li>
+</ul>
+
+<p>Jordan Justen (3):</p>
+<ul>
+  <li>i965: Create new program cache bo when clearing the program cache</li>
+  <li>program: Don't reset SamplersValidated when restoring from shader cache</li>
+  <li>intel/vulkan: Hard code CS scratch_ids_per_subslice for Cherryview</li>
+</ul>
+
+<p>Juan A. Suarez Romero (14):</p>
+<ul>
+  <li>cherry-ignore: Explicit 18.0 only nominations</li>
+  <li>cherry-ignore: r600/compute: only mark buffer/image state dirty for fragment shaders</li>
+  <li>cherry-ignore: anv: Move setting current_pipeline to cmd_state_init</li>
+  <li>cherry-ignore: anv: Be more careful about fast-clear colors</li>
+  <li>cherry-ignore: Add patches that has a specific version for 17.3</li>
+  <li>cherry-ignore: r600: Take ALU_EXTENDED into account when evaluating jump offsets</li>
+  <li>cherry-ignore: intel/compiler: Memory fence commit must always be enabled for gen10+</li>
+  <li>cherry-ignore: i965: Avoid problems from referencing orphaned BOs after growing.</li>
+  <li>cherry-ignore: include all Meson related fixes</li>
+  <li>cherry-ignore: ac/shader: fix vertex input with components.</li>
+  <li>cherry-ignore: i965: Use absolute addressing for constant buffer 0 on Kernel 4.16+.</li>
+  <li>cherry-ignore: anv/image: Separate modifiers from legacy scanout</li>
+  <li>cherry-ignore: glsl: Fix memory leak with known glsl_type instances</li>
+  <li>Update version to 17.3.7</li>
+</ul>
+
+<p>Karol Herbst (1):</p>
+<ul>
+  <li>nvir/nvc0: fix legalizing of ld unlock c0[0x10000]</li>
+</ul>
+
+<p>Kenneth Graunke (1):</p>
+<ul>
+  <li>i965: Emit CS stall before MEDIA_VFE_STATE.</li>
+</ul>
+
+<p>Lionel Landwerlin (1):</p>
+<ul>
+  <li>i965: perf: ensure reading config IDs from sysfs isn't interrupted</li>
+</ul>
+
+<p>Marek Olšák (2):</p>
+<ul>
+  <li>radeonsi: align command buffer starting address to fix some Raven hangs</li>
+  <li>configure.ac: blacklist libdrm 2.4.90</li>
+</ul>
+
+<p>Michal Navratil (1):</p>
+<ul>
+  <li>winsys/amdgpu: allow non page-aligned size bo creation from pointer</li>
+</ul>
+
+<p>Samuel Iglesias Gonsálvez (1):</p>
+<ul>
+  <li>glsl/linker: fix bug when checking precision qualifier</li>
+</ul>
+
+<p>Samuel Pitoiset (2):</p>
+<ul>
+  <li>ac/nir: use ordered float comparisons except for not equal</li>
+  <li>Revert "mesa: do not trigger _NEW_TEXTURE_STATE in glActiveTexture()"</li>
+</ul>
+
+<p>Stephan Gerhold (1):</p>
+<ul>
+  <li>util/build-id: Fix address comparison for binaries with LOAD vaddr &gt; 0</li>
+</ul>
+
+<p>Thomas Hellstrom (2):</p>
+<ul>
+  <li>svga: Fix a leftover debug hack</li>
+  <li>loader_dri3/glx/egl: Reinstate the loader_dri3_vtable get_dri_screen callback</li>
+</ul>
+
+<p>Tim Rowley (1):</p>
+<ul>
+  <li>swr/rast: fix MemoryBuffer build break for llvm-6</li>
+</ul>
+
+<p>Timothy Arceri (1):</p>
+<ul>
+  <li>nir: fix interger divide by zero crash during constant folding</li>
+</ul>
+
+<p>Tobias Droste (1):</p>
+<ul>
+  <li>gallivm: Use new LLVM fast-math-flags API</li>
+</ul>
+
+<p>Vadym Shovkoplias (1):</p>
+<ul>
+  <li>mesa: add glsl version query (v4)</li>
+</ul>
+
+<p>Vinson Lee (1):</p>
+<ul>
+  <li>swr/rast: Fix macOS macro.</li>
+</ul>
+
+
+</div>
+</body>
+</html>
+
--- a/docs/relnotes/17.3.8.html
+++ b/docs/relnotes/17.3.8.html
@@ -0,0 +1,147 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 17.3.8 Release Notes / April 03, 2018</h1>
+
+<p>
+Mesa 17.3.8 is a bug fix release which fixes bugs found since the 17.3.7 release.
+</p>
+<p>
+Mesa 17.3.8 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+175d2ca9be2af3a8db6cd603986096d75da70f59699528d7b6675d542a305e23  mesa-17.3.8.tar.gz
+8f9d9bf281c48e4a8f5228816577263b4c655248dc7666e75034ab422951a6b1  mesa-17.3.8.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102542">Bug 102542</a> - mesa-17.2.0/src/gallium/state_trackers/nine/nine_ff.c:1938: bad assignment ?</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103746">Bug 103746</a> - [BDW BSW SKL KBL] dEQP-GLES31.functional.copy_image regressions</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104636">Bug 104636</a> - [BSW/HD400] Aztec Ruins GL version GPU hangs</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105290">Bug 105290</a> - [BSW/HD400] SynMark OglCSDof GPU hangs when shaders come from cache</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105464">Bug 105464</a> - Reading per-patch outputs in Tessellation Control Shader returns undefined values</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105670">Bug 105670</a> - [regression][hang] Trine1EE hangs GPU after loading screen on Mesa3D-17.3 and later</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105704">Bug 105704</a> - compiler assertion hit</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105717">Bug 105717</a> - [bisected] Mesa build tests fails: BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Axel Davy (3):</p>
+<ul>
+  <li>st/nine: Fix bad tracking of vs textures for NINESBT_ALL</li>
+  <li>st/nine: Fixes warning about implicit conversion</li>
+  <li>st/nine: Fix non inversible matrix check</li>
+</ul>
+
+<p>Caio Marcelo de Oliveira Filho (1):</p>
+<ul>
+  <li>anv/pipeline: fail if TCS/TES compile fail</li>
+</ul>
+
+<p>Dave Airlie (1):</p>
+<ul>
+  <li>radv: get correct offset into LDS for indexed vars.</li>
+</ul>
+
+<p>Derek Foreman (1):</p>
+<ul>
+  <li>egl/wayland: Make swrast display_sync the correct queue</li>
+</ul>
+
+<p>Eric Engestrom (1):</p>
+<ul>
+  <li>meson/configure: detect endian.h instead of trying to guess when it's available</li>
+</ul>
+
+<p>Ian Romanick (2):</p>
+<ul>
+  <li>mesa: Don't write to user buffer in glGetTexParameterIuiv on error</li>
+  <li>i965/vec4: Fix null destination register in 3-source instructions</li>
+</ul>
+
+<p>Jason Ekstrand (1):</p>
+<ul>
+  <li>i965: Emit texture cache invalidates around blorp_copy</li>
+</ul>
+
+<p>Jordan Justen (2):</p>
+<ul>
+  <li>i965: Calculate thread_count in brw_alloc_stage_scratch</li>
+  <li>i965: Hard code CS scratch_ids_per_subslice for Cherryview</li>
+</ul>
+
+<p>Juan A. Suarez Romero (6):</p>
+<ul>
+  <li>docs: add sha256 checksums for 17.3.7</li>
+  <li>cherry-ignore: ac/nir: pass the nir variable through tcs loading.</li>
+  <li>cherry-ignore: radv: handle exporting view index to fragment shader. (v1.1)</li>
+  <li>cherry-ignore: omx: always define ENABLE_ST_OMX_{BELLAGIO,TIZONIA}</li>
+  <li>cherry-ignore: docs: fix 18.0 release note version</li>
+  <li>Update version to 17.3.8</li>
+</ul>
+
+<p>Leo Liu (1):</p>
+<ul>
+  <li>radeon/vce: move feedback command inside of destroy function</li>
+</ul>
+
+<p>Marek Olšák (1):</p>
+<ul>
+  <li>st/dri: fix OpenGL-OpenCL interop for GL_TEXTURE_BUFFER</li>
+</ul>
+
+<p>Rob Clark (1):</p>
+<ul>
+  <li>nir: fix per_vertex_output intrinsic</li>
+</ul>
+
+<p>Timothy Arceri (2):</p>
+<ul>
+  <li>glsl: fix infinite loop caused by bug in loop unrolling pass</li>
+  <li>nir: fix crash in loop unroll corner case</li>
+</ul>
+
+
+</div>
+</body>
+</html>
+
--- a/docs/relnotes/17.3.9.html
+++ b/docs/relnotes/17.3.9.html
@@ -0,0 +1,162 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 17.3.9 Release Notes / April 18, 2018</h1>
+
+<p>
+Mesa 17.3.9 is a bug fix release which fixes bugs found since the 17.3.8 release.
+</p>
+<p>
+Mesa 17.3.9 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+4d625f65a1ff4cd8cfeb39e38f047507c6dea047502a0d53113c96f54588f340  mesa-17.3.9.tar.gz
+c5beb5fc05f0e0c294fefe1a393ee118cb67e27a4dca417d77c297f7d4b6e479  mesa-17.3.9.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98281">Bug 98281</a> - 'message's in ctx-&gt;Debug.LogMessages[] seem to leak.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101408">Bug 101408</a> - [Gen8+] Xonotic fails to render one of the weapons</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102342">Bug 102342</a> - mesa-17.1.7/src/gallium/auxiliary/pipebuffer/pb_cache.c:169]: (style) Suspicious condition</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105317">Bug 105317</a> - The GPU Vega 56 was hang while try to pass #GraphicsFuzz shader15 test</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105440">Bug 105440</a> - GEN7: rendering issue on citra</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105442">Bug 105442</a> - Hang when running nine ff lighting shader with radeonsi</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105994">Bug 105994</a> - surface state leak when creating and destroying image views with aspectMask depth and stencil</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Andres Gomez (2):</p>
+<ul>
+  <li>dri_util: when overriding, always reset the core version</li>
+  <li>mesa: adds some comments regarding MESA_GLES_VERSION_OVERRIDE usage</li>
+</ul>
+
+<p>Axel Davy (2):</p>
+<ul>
+  <li>st/nine: Declare lighting consts for ff shaders</li>
+  <li>st/nine: Do not use scratch for face register</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (1):</p>
+<ul>
+  <li>ac/nir: Add workaround for GFX9 buffer views.</li>
+</ul>
+
+<p>Daniel Stone (1):</p>
+<ul>
+  <li>st/dri: Initialise modifier to INVALID for DRI2</li>
+</ul>
+
+<p>Emil Velikov (1):</p>
+<ul>
+  <li>glsl: remove unreachable assert()</li>
+</ul>
+
+<p>Eric Engestrom (1):</p>
+<ul>
+  <li>gbm: remove never-implemented function</li>
+</ul>
+
+<p>Henri Verbeet (1):</p>
+<ul>
+  <li>mesa: Inherit texture view multi-sample information from the original texture images.</li>
+</ul>
+
+<p>Iago Toral Quiroga (1):</p>
+<ul>
+  <li>compiler/spirv: set is_shadow for depth comparitor sampling opcodes</li>
+</ul>
+
+<p>Jason Ekstrand (4):</p>
+<ul>
+  <li>nir/vars_to_ssa: Remove copies from the correct set</li>
+  <li>nir/lower_indirect_derefs: Support interp_var_at intrinsics</li>
+  <li>intel/vec4: Set channel_sizes for MOV_INDIRECT sources</li>
+  <li>nir/lower_vec_to_movs: Only coalesce if the vec had a SSA destination</li>
+</ul>
+
+<p>Juan A. Suarez Romero (3):</p>
+<ul>
+  <li>docs: add sha256 checksums for 17.3.8</li>
+  <li>cherry-ignore: Explicit 18.0 only nominations</li>
+  <li>Update version to 17.3.9</li>
+</ul>
+
+<p>Lionel Landwerlin (1):</p>
+<ul>
+  <li>anv: fix number of planes for depth &amp; stencil</li>
+</ul>
+
+<p>Marek Olšák (1):</p>
+<ul>
+  <li>mesa: simplify MESA_GL_VERSION_OVERRIDE behavior of API override</li>
+</ul>
+
+<p>Samuel Pitoiset (1):</p>
+<ul>
+  <li>radv: fix picking the method for resolve subpass</li>
+</ul>
+
+<p>Sergii Romantsov (1):</p>
+<ul>
+  <li>i965: Extend the negative 32-bit deltas to 64-bits</li>
+</ul>
+
+<p>Timothy Arceri (6):</p>
+<ul>
+  <li>gallium/pipebuffer: fix parenthesis location</li>
+  <li>glsl: always call do_lower_jumps() after loop unrolling</li>
+  <li>ac: add if/loop build helpers</li>
+  <li>radeonsi: make use of if/loop build helpers in ac</li>
+  <li>ac: make use of if/loop build helpers</li>
+  <li>mesa: free debug messages when destroying the debug state</li>
+</ul>
+
+<p>Xiong, James (1):</p>
+<ul>
+  <li>i965: return the fourcc saved in __DRIimage when possible</li>
+</ul>
+
+
+</div>
+</body>
+</html>
+
--- a/docs/specs/enums.txt
+++ b/docs/specs/enums.txt
@@ -46,14 +46,14 @@ GL_MESA_shader_debug.spec: (obsolete)
        GL_DEBUG_ASSERT_MESA             0x875B

 GL_MESA_program_debug: (obsolete)
-	GL_FRAGMENT_PROGRAM_CALLBACK_MESA      0x????
-	GL_VERTEX_PROGRAM_CALLBACK_MESA        0x????
-	GL_FRAGMENT_PROGRAM_POSITION_MESA      0x????
-	GL_VERTEX_PROGRAM_POSITION_MESA        0x????
-	GL_FRAGMENT_PROGRAM_CALLBACK_FUNC_MESA 0x????
-	GL_FRAGMENT_PROGRAM_CALLBACK_DATA_MESA 0x????
-	GL_VERTEX_PROGRAM_CALLBACK_FUNC_MESA   0x????
-	GL_VERTEX_PROGRAM_CALLBACK_DATA_MESA   0x????
+        GL_FRAGMENT_PROGRAM_POSITION_MESA       0x8BB0
+        GL_FRAGMENT_PROGRAM_CALLBACK_MESA       0x8BB1
+        GL_FRAGMENT_PROGRAM_CALLBACK_FUNC_MESA  0x8BB2
+        GL_FRAGMENT_PROGRAM_CALLBACK_DATA_MESA  0x8BB3
+        GL_VERTEX_PROGRAM_POSITION_MESA         0x8BB4
+        GL_VERTEX_PROGRAM_CALLBACK_MESA         0x8BB5
+        GL_VERTEX_PROGRAM_CALLBACK_FUNC_MESA    0x8BB6
+        GL_VERTEX_PROGRAM_CALLBACK_DATA_MESA    0x8BB7

 GL_MESAX_texture_stack:
 	GL_TEXTURE_1D_STACK_MESAX            0x8759
@@ -63,16 +63,6 @@ GL_MESAX_texture_stack:
 	GL_TEXTURE_1D_STACK_BINDING_MESAX    0x875D
 	GL_TEXTURE_2D_STACK_BINDING_MESAX    0x875E

-GL_MESA_program_debug
-	GL_FRAGMENT_PROGRAM_POSITION_MESA	0x8BB0
-	GL_FRAGMENT_PROGRAM_CALLBACK_MESA	0x8BB1
-	GL_FRAGMENT_PROGRAM_CALLBACK_FUNC_MESA	0x8BB2
-	GL_FRAGMENT_PROGRAM_CALLBACK_DATA_MESA	0x8BB3
-	GL_FRAGMENT_PROGRAM_POSITION_MESA	0x8BB4
-	GL_FRAGMENT_PROGRAM_CALLBACK_MESA	0x8BB5
-	GL_FRAGMENT_PROGRAM_CALLBACK_FUNC_MESA	0x8BB6
-	GL_FRAGMENT_PROGRAM_CALLBACK_DATA_MESA	0x8BB7
-
 GL_MESA_tile_raster_order
 	GL_TILE_RASTER_ORDER_FIXED_MESA		0x8BB8
 	GL_TILE_RASTER_ORDER_INCREASING_X_MESA	0x8BB9
--- a/meson.build
+++ b/meson.build
@@ -21,6 +21,8 @@
 project('mesa', ['c', 'cpp'], version : '17.3.0-devel', license : 'MIT',
        default_options : ['c_std=c99', 'cpp_std=c++11'])

+error('The meson build is unsupported for building mesa 17.3.x releases.')
+
 # Arguments for the preprocessor, put these in a separate array from the C and
 # C++ (cpp in meson terminology) arguments since they need to be added to the
 # default arguments for both C and C++.
@@ -495,7 +497,7 @@ elif cc.has_header_symbol('sys/mkdev.h', 'major')
  pre_args += '-DMAJOR_IN_MKDEV'
 endif

-foreach h : ['xlocale.h', 'sys/sysctl.h']
+foreach h : ['xlocale.h', 'sys/sysctl.h', 'endian.h']
  if cc.has_header(h)
    pre_args += '-DHAVE_@0@'.format(h.to_upper().underscorify())
  endif
@@ -607,7 +609,7 @@ dep_libdrm_amdgpu = []
 dep_libdrm_radeon = []
 dep_libdrm_nouveau = []
 if with_amd_vk or with_gallium_radeonsi
-  dep_libdrm_amdgpu = dependency('libdrm_amdgpu', version : '>= 2.4.85')
+  dep_libdrm_amdgpu = dependency('libdrm_amdgpu', version : '>= 2.4.89')
 endif
 if with_gallium_radeonsi # older radeon too
  dep_libdrm_radeon = dependency('libdrm_radeon', version : '>= 2.4.71')
--- a/scons/gallium.py
+++ b/scons/gallium.py
@@ -352,6 +352,9 @@ def generate(env):
        if check_header(env, 'xlocale.h'):
            cppdefines += ['HAVE_XLOCALE_H']

+        if check_header(env, 'endian.h'):
+            cppdefines += ['HAVE_ENDIAN_H']
+
        if check_functions(env, ['strtod_l', 'strtof_l']):
            cppdefines += ['HAVE_STRTOD_L']

--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -33,7 +33,7 @@ git_sha1.h: git_sha1.h.tmp

 BUILT_SOURCES = git_sha1.h
 CLEANFILES = $(BUILT_SOURCES)
-EXTRA_DIST =
+EXTRA_DIST = git_sha1.h.in

 SUBDIRS = . gtest util mapi/glapi/gen mapi

--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -98,7 +98,9 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
 {
 	struct amdgpu_buffer_size_alignments alignment_info = {};
 	struct amdgpu_heap_info vram, vram_vis, gtt;
-	struct drm_amdgpu_info_hw_ip dma = {}, compute = {}, uvd = {}, vce = {}, vcn_dec = {};
+	struct drm_amdgpu_info_hw_ip dma = {}, compute = {}, uvd = {};
+	struct drm_amdgpu_info_hw_ip uvd_enc = {}, vce = {}, vcn_dec = {};
+	struct drm_amdgpu_info_hw_ip vcn_enc = {}, gfx = {};
 	uint32_t vce_version = 0, vce_feature = 0, uvd_version = 0, uvd_feature = 0;
 	int r, i, j;
 	drmDevicePtr devinfo;
@@ -154,6 +156,12 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
 		return false;
 	}

+	r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_GFX, 0, &gfx);
+	if (r) {
+		fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(gfx) failed.\n");
+		return false;
+	}
+
 	r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_COMPUTE, 0, &compute);
 	if (r) {
 		fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(compute) failed.\n");
@@ -315,6 +323,17 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
 	if (info->chip_class == SI)
 		info->gfx_ib_pad_with_type2 = TRUE;

+	unsigned ib_align = 0;
+	ib_align = MAX2(ib_align, gfx.ib_start_alignment);
+	ib_align = MAX2(ib_align, compute.ib_start_alignment);
+	ib_align = MAX2(ib_align, dma.ib_start_alignment);
+	ib_align = MAX2(ib_align, uvd.ib_start_alignment);
+	ib_align = MAX2(ib_align, uvd_enc.ib_start_alignment);
+	ib_align = MAX2(ib_align, vce.ib_start_alignment);
+	ib_align = MAX2(ib_align, vcn_dec.ib_start_alignment);
+	ib_align = MAX2(ib_align, vcn_enc.ib_start_alignment);
+	info->ib_start_alignment = ib_align;
+
 	return true;
 }

--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -61,6 +61,7 @@ struct radeon_info {
 	bool                        has_virtual_memory;
 	bool                        gfx_ib_pad_with_type2;
 	bool                        has_hw_decode;
+	unsigned                    ib_start_alignment;
 	uint32_t                    num_sdma_rings;
 	uint32_t                    num_compute_rings;
 	uint32_t                    uvd_fw_version;
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -41,6 +41,16 @@

 #include "shader_enums.h"

+#define AC_LLVM_INITIAL_CF_DEPTH 4
+
+/* Data for if/else/endif and bgnloop/endloop control flow structures.
+ */
+struct ac_llvm_flow {
+	/* Loop exit or next part of if/else/endif. */
+	LLVMBasicBlockRef next_block;
+	LLVMBasicBlockRef loop_entry_block;
+};
+
 /* Initialize module-independent parts of the context.
 *
 * The caller is responsible for initializing ctx::module and ctx::builder.
@@ -92,6 +102,14 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context,
 	ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
 }

+void
+ac_llvm_context_dispose(struct ac_llvm_context *ctx)
+{
+	free(ctx->flow);
+	ctx->flow = NULL;
+	ctx->flow_depth_max = 0;
+}
+
 unsigned
 ac_get_type_size(LLVMTypeRef type)
 {
@@ -960,6 +978,26 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
 					  AC_FUNC_ATTR_READONLY);
 }

+LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx,
+						   LLVMValueRef rsrc,
+						   LLVMValueRef vindex,
+						   LLVMValueRef voffset,
+						   bool can_speculate)
+{
+	LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->builder, rsrc, LLVMConstInt(ctx->i32, 2, 0), "");
+	LLVMValueRef stride = LLVMBuildExtractElement(ctx->builder, rsrc, LLVMConstInt(ctx->i32, 1, 0), "");
+	stride = LLVMBuildLShr(ctx->builder, stride, LLVMConstInt(ctx->i32, 16, 0), "");
+
+	LLVMValueRef new_elem_count = LLVMBuildSelect(ctx->builder,
+	                                              LLVMBuildICmp(ctx->builder, LLVMIntUGT, elem_count, stride, ""),
+	                                              elem_count, stride, "");
+
+	LLVMValueRef new_rsrc = LLVMBuildInsertElement(ctx->builder, rsrc, new_elem_count,
+						       LLVMConstInt(ctx->i32, 2, 0), "");
+
+	return ac_build_buffer_load_format(ctx, new_rsrc, vindex, voffset, can_speculate);
+}
+
 /**
 * Set range metadata on an instruction.  This can only be used on load and
 * call instructions.  If you know an instruction can only produce the values
@@ -1742,3 +1780,174 @@ void ac_init_exec_full_mask(struct ac_llvm_context *ctx)
 			   "llvm.amdgcn.init.exec", ctx->voidt,
 			   &full_mask, 1, AC_FUNC_ATTR_CONVERGENT);
 }
+
+static struct ac_llvm_flow *
+get_current_flow(struct ac_llvm_context *ctx)
+{
+	if (ctx->flow_depth > 0)
+		return &ctx->flow[ctx->flow_depth - 1];
+	return NULL;
+}
+
+static struct ac_llvm_flow *
+get_innermost_loop(struct ac_llvm_context *ctx)
+{
+	for (unsigned i = ctx->flow_depth; i > 0; --i) {
+		if (ctx->flow[i - 1].loop_entry_block)
+			return &ctx->flow[i - 1];
+	}
+	return NULL;
+}
+
+static struct ac_llvm_flow *
+push_flow(struct ac_llvm_context *ctx)
+{
+	struct ac_llvm_flow *flow;
+
+	if (ctx->flow_depth >= ctx->flow_depth_max) {
+		unsigned new_max = MAX2(ctx->flow_depth << 1,
+					AC_LLVM_INITIAL_CF_DEPTH);
+
+		ctx->flow = realloc(ctx->flow, new_max * sizeof(*ctx->flow));
+		ctx->flow_depth_max = new_max;
+	}
+
+	flow = &ctx->flow[ctx->flow_depth];
+	ctx->flow_depth++;
+
+	flow->next_block = NULL;
+	flow->loop_entry_block = NULL;
+	return flow;
+}
+
+static void set_basicblock_name(LLVMBasicBlockRef bb, const char *base,
+				int label_id)
+{
+	char buf[32];
+	snprintf(buf, sizeof(buf), "%s%d", base, label_id);
+	LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf);
+}
+
+/* Append a basic block at the level of the parent flow.
+ */
+static LLVMBasicBlockRef append_basic_block(struct ac_llvm_context *ctx,
+					    const char *name)
+{
+	assert(ctx->flow_depth >= 1);
+
+	if (ctx->flow_depth >= 2) {
+		struct ac_llvm_flow *flow = &ctx->flow[ctx->flow_depth - 2];
+
+		return LLVMInsertBasicBlockInContext(ctx->context,
+						     flow->next_block, name);
+	}
+
+	LLVMValueRef main_fn =
+		LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->builder));
+	return LLVMAppendBasicBlockInContext(ctx->context, main_fn, name);
+}
+
+/* Emit a branch to the given default target for the current block if
+ * applicable -- that is, if the current block does not already contain a
+ * branch from a break or continue.
+ */
+static void emit_default_branch(LLVMBuilderRef builder,
+				LLVMBasicBlockRef target)
+{
+	if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder)))
+		 LLVMBuildBr(builder, target);
+}
+
+void ac_build_bgnloop(struct ac_llvm_context *ctx, int label_id)
+{
+	struct ac_llvm_flow *flow = push_flow(ctx);
+	flow->loop_entry_block = append_basic_block(ctx, "LOOP");
+	flow->next_block = append_basic_block(ctx, "ENDLOOP");
+	set_basicblock_name(flow->loop_entry_block, "loop", label_id);
+	LLVMBuildBr(ctx->builder, flow->loop_entry_block);
+	LLVMPositionBuilderAtEnd(ctx->builder, flow->loop_entry_block);
+}
+
+void ac_build_break(struct ac_llvm_context *ctx)
+{
+	struct ac_llvm_flow *flow = get_innermost_loop(ctx);
+	LLVMBuildBr(ctx->builder, flow->next_block);
+}
+
+void ac_build_continue(struct ac_llvm_context *ctx)
+{
+	struct ac_llvm_flow *flow = get_innermost_loop(ctx);
+	LLVMBuildBr(ctx->builder, flow->loop_entry_block);
+}
+
+void ac_build_else(struct ac_llvm_context *ctx, int label_id)
+{
+	struct ac_llvm_flow *current_branch = get_current_flow(ctx);
+	LLVMBasicBlockRef endif_block;
+
+	assert(!current_branch->loop_entry_block);
+
+	endif_block = append_basic_block(ctx, "ENDIF");
+	emit_default_branch(ctx->builder, endif_block);
+
+	LLVMPositionBuilderAtEnd(ctx->builder, current_branch->next_block);
+	set_basicblock_name(current_branch->next_block, "else", label_id);
+
+	current_branch->next_block = endif_block;
+}
+
+void ac_build_endif(struct ac_llvm_context *ctx, int label_id)
+{
+	struct ac_llvm_flow *current_branch = get_current_flow(ctx);
+
+	assert(!current_branch->loop_entry_block);
+
+	emit_default_branch(ctx->builder, current_branch->next_block);
+	LLVMPositionBuilderAtEnd(ctx->builder, current_branch->next_block);
+	set_basicblock_name(current_branch->next_block, "endif", label_id);
+
+	ctx->flow_depth--;
+}
+
+void ac_build_endloop(struct ac_llvm_context *ctx, int label_id)
+{
+	struct ac_llvm_flow *current_loop = get_current_flow(ctx);
+
+	assert(current_loop->loop_entry_block);
+
+	emit_default_branch(ctx->builder, current_loop->loop_entry_block);
+
+	LLVMPositionBuilderAtEnd(ctx->builder, current_loop->next_block);
+	set_basicblock_name(current_loop->next_block, "endloop", label_id);
+	ctx->flow_depth--;
+}
+
+static void if_cond_emit(struct ac_llvm_context *ctx, LLVMValueRef cond,
+			 int label_id)
+{
+	struct ac_llvm_flow *flow = push_flow(ctx);
+	LLVMBasicBlockRef if_block;
+
+	if_block = append_basic_block(ctx, "IF");
+	flow->next_block = append_basic_block(ctx, "ELSE");
+	set_basicblock_name(if_block, "if", label_id);
+	LLVMBuildCondBr(ctx->builder, cond, if_block, flow->next_block);
+	LLVMPositionBuilderAtEnd(ctx->builder, if_block);
+}
+
+void ac_build_if(struct ac_llvm_context *ctx, LLVMValueRef value,
+		 int label_id)
+{
+	LLVMValueRef cond = LLVMBuildFCmp(ctx->builder, LLVMRealUNE,
+					  value, ctx->f32_0, "");
+	if_cond_emit(ctx, cond, label_id);
+}
+
+void ac_build_uif(struct ac_llvm_context *ctx, LLVMValueRef value,
+		  int label_id)
+{
+	LLVMValueRef cond = LLVMBuildICmp(ctx->builder, LLVMIntNE,
+					  ac_to_integer(ctx, value),
+					  ctx->i32_0, "");
+	if_cond_emit(ctx, cond, label_id);
+}
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -34,6 +34,8 @@
 extern "C" {
 #endif

+struct ac_llvm_flow;
+
 struct ac_llvm_context {
 	LLVMContextRef context;
 	LLVMModuleRef module;
@@ -57,6 +59,10 @@ struct ac_llvm_context {
 	LLVMValueRef f32_0;
 	LLVMValueRef f32_1;

+	struct ac_llvm_flow *flow;
+	unsigned flow_depth;
+	unsigned flow_depth_max;
+
 	unsigned range_md_kind;
 	unsigned invariant_load_md_kind;
 	unsigned uniform_md_kind;
@@ -71,6 +77,9 @@ void
 ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context,
 		     enum chip_class chip_class);

+void
+ac_llvm_context_dispose(struct ac_llvm_context *ctx);
+
 unsigned ac_get_type_size(LLVMTypeRef type);

 LLVMTypeRef ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t);
@@ -188,6 +197,14 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
 					 LLVMValueRef voffset,
 					 bool can_speculate);

+/* load_format that handles the stride & element count better if idxen is
+ * disabled by LLVM. */
+LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx,
+						   LLVMValueRef rsrc,
+						   LLVMValueRef vindex,
+						   LLVMValueRef voffset,
+						   bool can_speculate);
+
 LLVMValueRef
 ac_get_thread_id(struct ac_llvm_context *ctx);

@@ -282,6 +299,18 @@ void ac_optimize_vs_outputs(struct ac_llvm_context *ac,
 			    uint32_t num_outputs,
 			    uint8_t *num_param_exports);
 void ac_init_exec_full_mask(struct ac_llvm_context *ctx);
+
+void ac_build_bgnloop(struct ac_llvm_context *ctx, int lable_id);
+void ac_build_break(struct ac_llvm_context *ctx);
+void ac_build_continue(struct ac_llvm_context *ctx);
+void ac_build_else(struct ac_llvm_context *ctx, int lable_id);
+void ac_build_endif(struct ac_llvm_context *ctx, int lable_id);
+void ac_build_endloop(struct ac_llvm_context *ctx, int lable_id);
+void ac_build_if(struct ac_llvm_context *ctx, LLVMValueRef value,
+		 int lable_id);
+void ac_build_uif(struct ac_llvm_context *ctx, LLVMValueRef value,
+		  int lable_id);
+
 #ifdef __cplusplus
 }
 #endif
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -562,7 +562,30 @@ struct user_sgpr_info {
 	bool indirect_all_descriptor_sets;
 };

+static bool needs_view_index_sgpr(struct nir_to_llvm_context *ctx,
+				  gl_shader_stage stage)
+{
+	switch (stage) {
+	case MESA_SHADER_VERTEX:
+		if (ctx->shader_info->info.needs_multiview_view_index ||
+		    (!ctx->options->key.vs.as_es && !ctx->options->key.vs.as_ls && ctx->options->key.has_multiview_view_index))
+			return true;
+		break;
+	case MESA_SHADER_TESS_EVAL:
+		if (ctx->shader_info->info.needs_multiview_view_index || (!ctx->options->key.tes.as_es && ctx->options->key.has_multiview_view_index))
+			return true;
+	case MESA_SHADER_GEOMETRY:
+	case MESA_SHADER_TESS_CTRL:
+		if (ctx->shader_info->info.needs_multiview_view_index)
+			return true;
+	default:
+		break;
+	}
+	return false;
+}
+
 static void allocate_user_sgprs(struct nir_to_llvm_context *ctx,
+				bool needs_view_index,
 				struct user_sgpr_info *user_sgpr_info)
 {
 	memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info));
@@ -616,6 +639,9 @@ static void allocate_user_sgprs(struct nir_to_llvm_context *ctx,
 		break;
 	}

+	if (needs_view_index)
+		user_sgpr_info->sgpr_count++;
+
 	if (ctx->shader_info->info.needs_push_constants)
 		user_sgpr_info->sgpr_count += 2;

@@ -745,8 +771,8 @@ static void create_function(struct nir_to_llvm_context *ctx,
 	struct user_sgpr_info user_sgpr_info;
 	struct arg_info args = {};
 	LLVMValueRef desc_sets;
-
-	allocate_user_sgprs(ctx, &user_sgpr_info);
+	bool needs_view_index = needs_view_index_sgpr(ctx, stage);
+	allocate_user_sgprs(ctx, needs_view_index, &user_sgpr_info);

 	if (user_sgpr_info.need_ring_offsets && !ctx->options->supports_spill) {
 		add_user_sgpr_argument(&args, const_array(ctx->v4i32, 16), &ctx->ring_offsets); /* address of rings */
@@ -764,7 +790,7 @@ static void create_function(struct nir_to_llvm_context *ctx,
 	case MESA_SHADER_VERTEX:
 		radv_define_common_user_sgprs_phase1(ctx, stage, has_previous_stage, previous_stage, &user_sgpr_info, &args, &desc_sets);
 		radv_define_vs_user_sgprs_phase1(ctx, stage, has_previous_stage, previous_stage, &args);
-		if (ctx->shader_info->info.needs_multiview_view_index || (!ctx->options->key.vs.as_es && !ctx->options->key.vs.as_ls && ctx->options->key.has_multiview_view_index))
+		if (needs_view_index)
 			add_user_sgpr_argument(&args, ctx->i32, &ctx->view_index);
 		if (ctx->options->key.vs.as_es)
 			add_sgpr_argument(&args, ctx->i32, &ctx->es2gs_offset); // es2gs offset
@@ -796,7 +822,7 @@ static void create_function(struct nir_to_llvm_context *ctx,
 			add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_out_offsets); // tcs out offsets
 			add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_out_layout); // tcs out layout
 			add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_in_layout); // tcs in layout
-			if (ctx->shader_info->info.needs_multiview_view_index)
+			if (needs_view_index)
 				add_user_sgpr_argument(&args, ctx->i32, &ctx->view_index);

 			add_vgpr_argument(&args, ctx->i32, &ctx->tcs_patch_id); // patch id
@@ -811,7 +837,7 @@ static void create_function(struct nir_to_llvm_context *ctx,
 			add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_out_offsets); // tcs out offsets
 			add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_out_layout); // tcs out layout
 			add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_in_layout); // tcs in layout
-			if (ctx->shader_info->info.needs_multiview_view_index)
+			if (needs_view_index)
 				add_user_sgpr_argument(&args, ctx->i32, &ctx->view_index);
 			add_sgpr_argument(&args, ctx->i32, &ctx->oc_lds); // param oc lds
 			add_sgpr_argument(&args, ctx->i32, &ctx->tess_factor_offset); // tess factor offset
@@ -822,8 +848,9 @@ static void create_function(struct nir_to_llvm_context *ctx,
 	case MESA_SHADER_TESS_EVAL:
 		radv_define_common_user_sgprs_phase1(ctx, stage, has_previous_stage, previous_stage, &user_sgpr_info, &args, &desc_sets);
 		add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_offchip_layout); // tcs offchip layout
-		if (ctx->shader_info->info.needs_multiview_view_index || (!ctx->options->key.tes.as_es && ctx->options->key.has_multiview_view_index))
+		if (needs_view_index)
 			add_user_sgpr_argument(&args, ctx->i32, &ctx->view_index);
+
 		if (ctx->options->key.tes.as_es) {
 			add_sgpr_argument(&args, ctx->i32, &ctx->oc_lds); // OC LDS
 			add_sgpr_argument(&args, ctx->i32, NULL); //
@@ -855,7 +882,7 @@ static void create_function(struct nir_to_llvm_context *ctx,
 				radv_define_vs_user_sgprs_phase1(ctx, stage, has_previous_stage, previous_stage, &args);
 			add_user_sgpr_argument(&args, ctx->i32, &ctx->gsvs_ring_stride); // gsvs stride
 			add_user_sgpr_argument(&args, ctx->i32, &ctx->gsvs_num_entries); // gsvs num entires
-			if (ctx->shader_info->info.needs_multiview_view_index)
+			if (needs_view_index)
 				add_user_sgpr_argument(&args, ctx->i32, &ctx->view_index);

 			add_vgpr_argument(&args, ctx->i32, &ctx->gs_vtx_offset[0]); // vtx01
@@ -880,7 +907,7 @@ static void create_function(struct nir_to_llvm_context *ctx,
 			radv_define_vs_user_sgprs_phase1(ctx, stage, has_previous_stage, previous_stage, &args);
 			add_user_sgpr_argument(&args, ctx->i32, &ctx->gsvs_ring_stride); // gsvs stride
 			add_user_sgpr_argument(&args, ctx->i32, &ctx->gsvs_num_entries); // gsvs num entires
-			if (ctx->shader_info->info.needs_multiview_view_index)
+			if (needs_view_index)
 				add_user_sgpr_argument(&args, ctx->i32, &ctx->view_index);
 			add_sgpr_argument(&args, ctx->i32, &ctx->gs2vs_offset); // gs2vs offset
 			add_sgpr_argument(&args, ctx->i32, &ctx->gs_wave_id); // wave id
@@ -1259,7 +1286,8 @@ static LLVMValueRef emit_bcsel(struct ac_llvm_context *ctx,
 {
 	LLVMValueRef v = LLVMBuildICmp(ctx->builder, LLVMIntNE, src0,
 				       ctx->i32_0, "");
-	return LLVMBuildSelect(ctx->builder, v, src1, src2, "");
+	return LLVMBuildSelect(ctx->builder, v, ac_to_integer(ctx, src1),
+			       ac_to_integer(ctx, src2), "");
 }

 static LLVMValueRef emit_find_lsb(struct ac_llvm_context *ctx,
@@ -1517,23 +1545,13 @@ static LLVMValueRef emit_bitfield_insert(struct ac_llvm_context *ctx,
 static LLVMValueRef emit_pack_half_2x16(struct ac_llvm_context *ctx,
 					LLVMValueRef src0)
 {
-	LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
-	int i;
 	LLVMValueRef comp[2];

 	src0 = ac_to_float(ctx, src0);
 	comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_0, "");
 	comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, "");
-	for (i = 0; i < 2; i++) {
-		comp[i] = LLVMBuildFPTrunc(ctx->builder, comp[i], ctx->f16, "");
-		comp[i] = LLVMBuildBitCast(ctx->builder, comp[i], ctx->i16, "");
-		comp[i] = LLVMBuildZExt(ctx->builder, comp[i], ctx->i32, "");
-	}

-	comp[1] = LLVMBuildShl(ctx->builder, comp[1], const16, "");
-	comp[0] = LLVMBuildOr(ctx->builder, comp[0], comp[1], "");
-
-	return comp[0];
+	return ac_build_cvt_pkrtz_f16(ctx, comp);
 }

 static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx,
@@ -1756,16 +1774,16 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
 		result = emit_int_cmp(&ctx->ac, LLVMIntUGE, src[0], src[1]);
 		break;
 	case nir_op_feq:
-		result = emit_float_cmp(&ctx->ac, LLVMRealUEQ, src[0], src[1]);
+		result = emit_float_cmp(&ctx->ac, LLVMRealOEQ, src[0], src[1]);
 		break;
 	case nir_op_fne:
 		result = emit_float_cmp(&ctx->ac, LLVMRealUNE, src[0], src[1]);
 		break;
 	case nir_op_flt:
-		result = emit_float_cmp(&ctx->ac, LLVMRealULT, src[0], src[1]);
+		result = emit_float_cmp(&ctx->ac, LLVMRealOLT, src[0], src[1]);
 		break;
 	case nir_op_fge:
-		result = emit_float_cmp(&ctx->ac, LLVMRealUGE, src[0], src[1]);
+		result = emit_float_cmp(&ctx->ac, LLVMRealOGE, src[0], src[1]);
 		break;
 	case nir_op_fabs:
 		result = emit_intrin_1f_param(&ctx->ac, "llvm.fabs",
@@ -2239,11 +2257,19 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx,
 					struct ac_image_args *args)
 {
 	if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
-		return ac_build_buffer_load_format(&ctx->ac,
-						   args->resource,
-						   args->addr,
-						   LLVMConstInt(ctx->ac.i32, 0, false),
-						   true);
+		if (ctx->abi->gfx9_stride_size_workaround) {
+			return ac_build_buffer_load_format_gfx9_safe(&ctx->ac,
+								     args->resource,
+								     args->addr,
+								     ctx->ac.i32_0,
+								     true);
+		} else {
+			return ac_build_buffer_load_format(&ctx->ac,
+							   args->resource,
+							   args->addr,
+							   ctx->ac.i32_0,
+							   true);
+		}
 	}

 	args->opcode = ac_image_sample;
@@ -2353,6 +2379,46 @@ static LLVMValueRef visit_get_buffer_size(struct ac_nir_context *ctx,

 	return get_buffer_size(ctx, desc, false);
 }
+
+static uint32_t widen_mask(uint32_t mask, unsigned multiplier)
+{
+	uint32_t new_mask = 0;
+	for(unsigned i = 0; i < 32 && (1u << i) <= mask; ++i)
+		if (mask & (1u << i))
+			new_mask |= ((1u << multiplier) - 1u) << (i * multiplier);
+	return new_mask;
+}
+
+static LLVMValueRef extract_vector_range(struct ac_llvm_context *ctx, LLVMValueRef src,
+                                         unsigned start, unsigned count)
+{
+	LLVMTypeRef type = LLVMTypeOf(src);
+
+	if (LLVMGetTypeKind(type) != LLVMVectorTypeKind) {
+		assert(start == 0);
+		assert(count == 1);
+		return src;
+	}
+
+	unsigned src_elements = LLVMGetVectorSize(type);
+	assert(start < src_elements);
+	assert(start + count <= src_elements);
+
+	if (start == 0 && count == src_elements)
+		return src;
+
+	if (count == 1)
+		return LLVMBuildExtractElement(ctx->builder, src, LLVMConstInt(ctx->i32, start, false), "");
+
+	assert(count <= 8);
+	LLVMValueRef indices[8];
+	for (unsigned i = 0; i < count; ++i)
+		indices[i] = LLVMConstInt(ctx->i32, start + i, false);
+
+	LLVMValueRef swizzle = LLVMConstVector(indices, count);
+	return LLVMBuildShuffleVector(ctx->builder, src, src, swizzle, "");
+}
+
 static void visit_store_ssbo(struct ac_nir_context *ctx,
                             nir_intrinsic_instr *instr)
 {
@@ -2375,6 +2441,8 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
 	if (components_32bit > 1)
 		data_type = LLVMVectorType(ctx->ac.f32, components_32bit);

+	writemask = widen_mask(writemask, elem_size_mult);
+
 	base_data = ac_to_float(&ctx->ac, src_data);
 	base_data = trim_vector(&ctx->ac, base_data, instr->num_components);
 	base_data = LLVMBuildBitCast(ctx->ac.builder, base_data,
@@ -2384,7 +2452,7 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
 		int start, count;
 		LLVMValueRef data;
 		LLVMValueRef offset;
-		LLVMValueRef tmp;
+
 		u_bit_scan_consecutive_range(&writemask, &start, &count);

 		/* Due to an LLVM limitation, split 3-element writes
@@ -2394,9 +2462,6 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
 			count = 2;
 		}

-		start *= elem_size_mult;
-		count *= elem_size_mult;
-
 		if (count > 4) {
 			writemask |= ((1u << (count - 4)) - 1u) << (start + 4);
 			count = 4;
@@ -2404,30 +2469,14 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,

 		if (count == 4) {
 			store_name = "llvm.amdgcn.buffer.store.v4f32";
-			data = base_data;
 		} else if (count == 2) {
-			LLVMTypeRef v2f32 = LLVMVectorType(ctx->ac.f32, 2);
-
-			tmp = LLVMBuildExtractElement(ctx->ac.builder,
-						      base_data, LLVMConstInt(ctx->ac.i32, start, false), "");
-			data = LLVMBuildInsertElement(ctx->ac.builder, LLVMGetUndef(v2f32), tmp,
-						      ctx->ac.i32_0, "");
-
-			tmp = LLVMBuildExtractElement(ctx->ac.builder,
-						      base_data, LLVMConstInt(ctx->ac.i32, start + 1, false), "");
-			data = LLVMBuildInsertElement(ctx->ac.builder, data, tmp,
-						      ctx->ac.i32_1, "");
 			store_name = "llvm.amdgcn.buffer.store.v2f32";

 		} else {
 			assert(count == 1);
-			if (get_llvm_num_components(base_data) > 1)
-				data = LLVMBuildExtractElement(ctx->ac.builder, base_data,
-							       LLVMConstInt(ctx->ac.i32, start, false), "");
-			else
-				data = base_data;
 			store_name = "llvm.amdgcn.buffer.store.f32";
 		}
+		data = extract_vector_range(&ctx->ac, base_data, start, count);

 		offset = base_offset;
 		if (start != 0) {
@@ -2537,8 +2586,11 @@ static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx,
 			i1false,
 		};

-		results[i] = ac_build_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0);
+		int idx = i;
+		if (instr->dest.ssa.bit_size == 64)
+			idx = i > 1 ? 1 : 0;

+		results[idx] = ac_build_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0);
 	}

 	LLVMValueRef ret = results[0];
@@ -2805,7 +2857,7 @@ get_dw_address(struct nir_to_llvm_context *ctx,
 						    LLVMConstInt(ctx->i32, 4, false), ""), "");
 	else if (const_index && !compact_const_index)
 		dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
-				       LLVMConstInt(ctx->i32, const_index, false), "");
+				       LLVMConstInt(ctx->i32, const_index * 4, false), "");

 	dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
 			       LLVMConstInt(ctx->i32, param * 4, false), "");
@@ -3083,6 +3135,7 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
 	LLVMValueRef indir_index;
 	LLVMValueRef ret;
 	unsigned const_index;
+	unsigned stride = instr->variables[0]->var->data.compact ? 1 : 4;
 	bool vs_in = ctx->stage == MESA_SHADER_VERTEX &&
 	             instr->variables[0]->var->data.mode == nir_var_shader_in;
 	get_deref_offset(ctx, instr->variables[0], vs_in, NULL, NULL,
@@ -3108,13 +3161,13 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
 				count -= chan / 4;
 				LLVMValueRef tmp_vec = ac_build_gather_values_extended(
 						&ctx->ac, ctx->abi->inputs + idx + chan, count,
-						4, false, true);
+						stride, false, true);

 				values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
 								       tmp_vec,
 								       indir_index, "");
 			} else
-				values[chan] = ctx->abi->inputs[idx + chan + const_index * 4];
+				values[chan] = ctx->abi->inputs[idx + chan + const_index * stride];
 		}
 		break;
 	case nir_var_local:
@@ -3125,13 +3178,13 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
 				count -= chan / 4;
 				LLVMValueRef tmp_vec = ac_build_gather_values_extended(
 						&ctx->ac, ctx->locals + idx + chan, count,
-						4, true, true);
+						stride, true, true);

 				values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
 								       tmp_vec,
 								       indir_index, "");
 			} else {
-				values[chan] = LLVMBuildLoad(ctx->ac.builder, ctx->locals[idx + chan + const_index * 4], "");
+				values[chan] = LLVMBuildLoad(ctx->ac.builder, ctx->locals[idx + chan + const_index * stride], "");
 			}
 		}
 		break;
@@ -3153,14 +3206,14 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
 				count -= chan / 4;
 				LLVMValueRef tmp_vec = ac_build_gather_values_extended(
 						&ctx->ac, ctx->outputs + idx + chan, count,
-						4, true, true);
+						stride, true, true);

 				values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
 								       tmp_vec,
 								       indir_index, "");
 			} else {
 				values[chan] = LLVMBuildLoad(ctx->ac.builder,
-						     ctx->outputs[idx + chan + const_index * 4],
+						     ctx->outputs[idx + chan + const_index * stride],
 						     "");
 			}
 		}
@@ -3186,17 +3239,12 @@ visit_store_var(struct ac_nir_context *ctx,
 		         NULL, NULL, &const_index, &indir_index);

 	if (get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64) {
-		int old_writemask = writemask;

 		src = LLVMBuildBitCast(ctx->ac.builder, src,
 		                       LLVMVectorType(ctx->ac.f32, get_llvm_num_components(src) * 2),
 		                       "");

-		writemask = 0;
-		for (unsigned chan = 0; chan < 4; chan++) {
-			if (old_writemask & (1 << chan))
-				writemask |= 3u << (2 * chan);
-		}
+		writemask = widen_mask(writemask, 2);
 	}

 	switch (instr->variables[0]->var->data.mode) {
@@ -3573,8 +3621,23 @@ static void visit_image_store(struct ac_nir_context *ctx,
 		glc = i1true;

 	if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
+		LLVMValueRef rsrc = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, true, true);
+
+		if (ctx->abi->gfx9_stride_size_workaround) {
+			LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 2, 0), "");
+			LLVMValueRef stride = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 1, 0), "");
+			stride = LLVMBuildLShr(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 16, 0), "");
+
+			LLVMValueRef new_elem_count = LLVMBuildSelect(ctx->ac.builder,
+			                                              LLVMBuildICmp(ctx->ac.builder, LLVMIntUGT, elem_count, stride, ""),
+			                                              elem_count, stride, "");
+
+			rsrc = LLVMBuildInsertElement(ctx->ac.builder, rsrc, new_elem_count,
+			                              LLVMConstInt(ctx->ac.i32, 2, 0), "");
+		}
+
 		params[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[2])); /* data */
-		params[1] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, true, true);
+		params[1] = rsrc;
 		params[2] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]),
 						    ctx->ac.i32_0, ""); /* vindex */
 		params[3] = ctx->ac.i32_0; /* voffset */
@@ -3631,15 +3694,17 @@ static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx,
 	LLVMValueRef i1true = LLVMConstInt(ctx->ac.i1, 1, false);
 	MAYBE_UNUSED int length;

+	bool is_unsigned = glsl_get_sampler_result_type(type) == GLSL_TYPE_UINT;
+
 	switch (instr->intrinsic) {
 	case nir_intrinsic_image_atomic_add:
 		atomic_name = "add";
 		break;
 	case nir_intrinsic_image_atomic_min:
-		atomic_name = "smin";
+		atomic_name = is_unsigned ? "umin" : "smin";
 		break;
 	case nir_intrinsic_image_atomic_max:
-		atomic_name = "smax";
+		atomic_name = is_unsigned ? "umax" : "smax";
 		break;
 	case nir_intrinsic_image_atomic_and:
 		atomic_name = "and";
@@ -4733,7 +4798,7 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
 			/* This seems like a bit of a hack - but it passes Vulkan CTS with it */
 			if (instr->sampler_dim != GLSL_SAMPLER_DIM_3D &&
 			    instr->sampler_dim != GLSL_SAMPLER_DIM_CUBE &&
-			    instr->op != nir_texop_txf) {
+			    instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms) {
 				coords[2] = apply_round_slice(&ctx->ac, coords[2]);
 			}
 			address[count++] = coords[2];
@@ -4908,27 +4973,26 @@ static void visit_ssa_undef(struct ac_nir_context *ctx,
 			    const nir_ssa_undef_instr *instr)
 {
 	unsigned num_components = instr->def.num_components;
+	LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);
 	LLVMValueRef undef;

 	if (num_components == 1)
-		undef = LLVMGetUndef(ctx->ac.i32);
+		undef = LLVMGetUndef(type);
 	else {
-		undef = LLVMGetUndef(LLVMVectorType(ctx->ac.i32, num_components));
+		undef = LLVMGetUndef(LLVMVectorType(type, num_components));
 	}
 	_mesa_hash_table_insert(ctx->defs, &instr->def, undef);
 }

-static void visit_jump(struct ac_nir_context *ctx,
+static void visit_jump(struct ac_llvm_context *ctx,
 		       const nir_jump_instr *instr)
 {
 	switch (instr->type) {
 	case nir_jump_break:
-		LLVMBuildBr(ctx->ac.builder, ctx->break_block);
-		LLVMClearInsertionPosition(ctx->ac.builder);
+		ac_build_break(ctx);
 		break;
 	case nir_jump_continue:
-		LLVMBuildBr(ctx->ac.builder, ctx->continue_block);
-		LLVMClearInsertionPosition(ctx->ac.builder);
+		ac_build_continue(ctx);
 		break;
 	default:
 		fprintf(stderr, "Unknown NIR jump instr: ");
@@ -4966,7 +5030,7 @@ static void visit_block(struct ac_nir_context *ctx, nir_block *block)
 			visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
 			break;
 		case nir_instr_type_jump:
-			visit_jump(ctx, nir_instr_as_jump(instr));
+			visit_jump(&ctx->ac, nir_instr_as_jump(instr));
 			break;
 		default:
 			fprintf(stderr, "Unknown NIR instr type: ");
@@ -4983,56 +5047,34 @@ static void visit_if(struct ac_nir_context *ctx, nir_if *if_stmt)
 {
 	LLVMValueRef value = get_src(ctx, if_stmt->condition);

-	LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->ac.builder));
-	LLVMBasicBlockRef merge_block =
-	    LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
-	LLVMBasicBlockRef if_block =
-	    LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
-	LLVMBasicBlockRef else_block = merge_block;
-	if (!exec_list_is_empty(&if_stmt->else_list))
-		else_block = LLVMAppendBasicBlockInContext(
-		    ctx->ac.context, fn, "");
+	nir_block *then_block =
+		(nir_block *) exec_list_get_head(&if_stmt->then_list);

-	LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, value,
-	                                  LLVMConstInt(ctx->ac.i32, 0, false), "");
-	LLVMBuildCondBr(ctx->ac.builder, cond, if_block, else_block);
+	ac_build_uif(&ctx->ac, value, then_block->index);

-	LLVMPositionBuilderAtEnd(ctx->ac.builder, if_block);
 	visit_cf_list(ctx, &if_stmt->then_list);
-	if (LLVMGetInsertBlock(ctx->ac.builder))
-		LLVMBuildBr(ctx->ac.builder, merge_block);

 	if (!exec_list_is_empty(&if_stmt->else_list)) {
-		LLVMPositionBuilderAtEnd(ctx->ac.builder, else_block);
+		nir_block *else_block =
+			(nir_block *) exec_list_get_head(&if_stmt->else_list);
+
+		ac_build_else(&ctx->ac, else_block->index);
 		visit_cf_list(ctx, &if_stmt->else_list);
-		if (LLVMGetInsertBlock(ctx->ac.builder))
-			LLVMBuildBr(ctx->ac.builder, merge_block);
 	}

-	LLVMPositionBuilderAtEnd(ctx->ac.builder, merge_block);
+	ac_build_endif(&ctx->ac, then_block->index);
 }

 static void visit_loop(struct ac_nir_context *ctx, nir_loop *loop)
 {
-	LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->ac.builder));
-	LLVMBasicBlockRef continue_parent = ctx->continue_block;
-	LLVMBasicBlockRef break_parent = ctx->break_block;
+	nir_block *first_loop_block =
+		(nir_block *) exec_list_get_head(&loop->body);

-	ctx->continue_block =
-	    LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
-	ctx->break_block =
-	    LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
+	ac_build_bgnloop(&ctx->ac, first_loop_block->index);

-	LLVMBuildBr(ctx->ac.builder, ctx->continue_block);
-	LLVMPositionBuilderAtEnd(ctx->ac.builder, ctx->continue_block);
 	visit_cf_list(ctx, &loop->body);

-	if (LLVMGetInsertBlock(ctx->ac.builder))
-		LLVMBuildBr(ctx->ac.builder, ctx->continue_block);
-	LLVMPositionBuilderAtEnd(ctx->ac.builder, ctx->break_block);
-
-	ctx->continue_block = continue_parent;
-	ctx->break_block = break_parent;
+	ac_build_endloop(&ctx->ac, first_loop_block->index);
 }

 static void visit_cf_list(struct ac_nir_context *ctx,
@@ -5074,16 +5116,16 @@ handle_vs_input_decl(struct nir_to_llvm_context *ctx,

 	variable->data.driver_location = idx * 4;

-	if (ctx->options->key.vs.instance_rate_inputs & (1u << index)) {
-		buffer_index = LLVMBuildAdd(ctx->builder, ctx->abi.instance_id,
-					    ctx->abi.start_instance, "");
-		ctx->shader_info->vs.vgpr_comp_cnt = MAX2(3,
-		                            ctx->shader_info->vs.vgpr_comp_cnt);
-	} else
-		buffer_index = LLVMBuildAdd(ctx->builder, ctx->abi.vertex_id,
-					    ctx->abi.base_vertex, "");
-
 	for (unsigned i = 0; i < attrib_count; ++i, ++idx) {
+		if (ctx->options->key.vs.instance_rate_inputs & (1u << (index + i))) {
+			buffer_index = LLVMBuildAdd(ctx->builder, ctx->abi.instance_id,
+						    ctx->abi.start_instance, "");
+			ctx->shader_info->vs.vgpr_comp_cnt =
+				MAX2(3, ctx->shader_info->vs.vgpr_comp_cnt);
+		} else
+			buffer_index = LLVMBuildAdd(ctx->builder, ctx->abi.vertex_id,
+						    ctx->abi.base_vertex, "");
+
 		t_offset = LLVMConstInt(ctx->i32, index + i, false);

 		t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
@@ -5454,6 +5496,7 @@ setup_locals(struct ac_nir_context *ctx,
 	nir_foreach_variable(variable, &func->impl->locals) {
 		unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
 		variable->data.driver_location = ctx->num_locals * 4;
+		variable->data.location_frac = 0;
 		ctx->num_locals += attrib_count;
 	}
 	ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef));
@@ -5895,7 +5938,7 @@ handle_es_outputs_post(struct nir_to_llvm_context *ctx,
 	}

 	for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
-		LLVMValueRef dw_addr;
+		LLVMValueRef dw_addr = NULL;
 		LLVMValueRef *out_ptr = &ctx->nir->outputs[i * 4];
 		int param_index;
 		int length = 4;
@@ -6385,6 +6428,8 @@ static void ac_llvm_finalize_module(struct nir_to_llvm_context * ctx)

 	LLVMDisposeBuilder(ctx->builder);
 	LLVMDisposePassManager(passmgr);
+
+	ac_llvm_context_dispose(&ctx->ac);
 }

 static void
@@ -6601,6 +6646,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
 	ctx.abi.load_ssbo = radv_load_ssbo;
 	ctx.abi.load_sampler_desc = radv_get_sampler_desc;
 	ctx.abi.clamp_shadow_reference = false;
+	ctx.abi.gfx9_stride_size_workaround = ctx.ac.chip_class == GFX9;

 	if (shader_count >= 2)
 		ac_init_exec_full_mask(&ctx.ac);
--- a/src/amd/common/ac_shader_abi.h
+++ b/src/amd/common/ac_shader_abi.h
@@ -92,6 +92,10 @@ struct ac_shader_abi {
 	/* Whether to clamp the shadow reference value to [0,1]on VI. Radeonsi currently
 	 * uses it due to promoting D16 to D32, but radv needs it off. */
 	bool clamp_shadow_reference;
+
+	/* Whether to workaround GFX9 ignoring the stride for the buffer size if IDXEN=0
+	 * and LLVM optimizes an indexed load with constant index to IDXEN=0. */
+	bool gfx9_stride_size_workaround;
 };

 #endif /* AC_SHADER_ABI_H */
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -927,9 +927,11 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
 		    in->numSamples == 1) {
 			ADDR2_COMPUTE_DCCINFO_INPUT din = {0};
 			ADDR2_COMPUTE_DCCINFO_OUTPUT dout = {0};
+			ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {};

 			din.size = sizeof(ADDR2_COMPUTE_DCCINFO_INPUT);
 			dout.size = sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT);
+			dout.pMipInfo = meta_mip_info;

 			din.dccKeyFlags.pipeAligned = 1;
 			din.dccKeyFlags.rbAligned = 1;
@@ -955,21 +957,37 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
 			surf->dcc_alignment = dout.dccRamBaseAlign;
 			surf->num_dcc_levels = in->numMipLevels;

-			/* Disable DCC for the smallest levels. It seems to be
-			 * required for DCC readability between CB and shaders
-			 * when TC L2 isn't flushed. This was guessed.
+			/* Disable DCC for levels that are in the mip tail.
+			 *
+			 * There are two issues that this is intended to
+			 * address:
+			 *
+			 * 1. Multiple mip levels may share a cache line. This
+			 *    can lead to corruption when switching between
+			 *    rendering to different mip levels because the
+			 *    RBs don't maintain coherency.
+			 *
+			 * 2. Texturing with metadata after rendering sometimes
+			 *    fails with corruption, probably for a similar
+			 *    reason.
+			 *
+			 * Working around these issues for all levels in the
+			 * mip tail may be overly conservative, but it's what
+			 * Vulkan does.
 			 *
 			 * Alternative solutions that also work but are worse:
-			 * - Disable DCC.
+			 * - Disable DCC entirely.
 			 * - Flush TC L2 after rendering.
 			 */
-			for (unsigned i = 1; i < in->numMipLevels; i++) {
-				if (mip_info[i].pitch *
-				    mip_info[i].height * surf->bpe < 1024) {
+			for (unsigned i = 0; i < in->numMipLevels; i++) {
+				if (meta_mip_info[i].inMiptail) {
 					surf->num_dcc_levels = i;
 					break;
 				}
 			}
+
+			if (!surf->num_dcc_levels)
+				surf->dcc_size = 0;
 		}

 		/* FMASK */
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -380,7 +380,7 @@ radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer)
 		flags = RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
 			RADV_CMD_FLAG_CS_PARTIAL_FLUSH;

-		si_cs_emit_cache_flush(cmd_buffer->cs, false,
+		si_cs_emit_cache_flush(cmd_buffer->cs,
 				       cmd_buffer->device->physical_device->rad_info.chip_class,
 				       NULL, 0,
 				       radv_cmd_buffer_uses_mec(cmd_buffer),
@@ -541,7 +541,8 @@ radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer,
 	radeon_set_context_reg(cmd_buffer->cs, R_028804_DB_EQAA, ms->db_eqaa);
 	radeon_set_context_reg(cmd_buffer->cs, R_028A4C_PA_SC_MODE_CNTL_1, ms->pa_sc_mode_cntl_1);

-	if (old_pipeline && num_samples == old_pipeline->graphics.ms.num_samples)
+	if (old_pipeline && num_samples == old_pipeline->graphics.ms.num_samples &&
+	    old_pipeline->shaders[MESA_SHADER_FRAGMENT]->info.info.ps.needs_sample_positions == pipeline->shaders[MESA_SHADER_FRAGMENT]->info.info.ps.needs_sample_positions)
 		return;

 	radeon_set_context_reg_seq(cmd_buffer->cs, R_028BDC_PA_SC_LINE_CNTL, 2);
@@ -918,7 +919,6 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer,
 {
 	struct radv_shader_variant *ps;
 	uint64_t va;
-	unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
 	struct radv_blend_state *blend = &pipeline->graphics.blend;
 	assert (pipeline->shaders[MESA_SHADER_FRAGMENT]);

@@ -940,13 +940,10 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer,
 	radeon_set_context_reg(cmd_buffer->cs, R_0286D0_SPI_PS_INPUT_ADDR,
 			       ps->config.spi_ps_input_addr);

-	if (ps->info.info.ps.force_persample)
-		spi_baryc_cntl |= S_0286E0_POS_FLOAT_LOCATION(2);
-
 	radeon_set_context_reg(cmd_buffer->cs, R_0286D8_SPI_PS_IN_CONTROL,
 			       S_0286D8_NUM_INTERP(ps->info.fs.num_interp));

-	radeon_set_context_reg(cmd_buffer->cs, R_0286E0_SPI_BARYC_CNTL, spi_baryc_cntl);
+	radeon_set_context_reg(cmd_buffer->cs, R_0286E0_SPI_BARYC_CNTL, pipeline->graphics.spi_baryc_cntl);

 	radeon_set_context_reg(cmd_buffer->cs, R_028710_SPI_SHADER_Z_FORMAT,
 			       pipeline->graphics.shader_z_format);
@@ -1918,11 +1915,11 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer,
 		switch ((VkAccessFlagBits)(1 << b)) {
 		case VK_ACCESS_INDIRECT_COMMAND_READ_BIT:
 		case VK_ACCESS_INDEX_READ_BIT:
-		case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
 			break;
 		case VK_ACCESS_UNIFORM_READ_BIT:
 			flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1 | RADV_CMD_FLAG_INV_SMEM_L1;
 			break;
+		case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
 		case VK_ACCESS_SHADER_READ_BIT:
 		case VK_ACCESS_TRANSFER_READ_BIT:
 		case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT:
@@ -2438,6 +2435,8 @@ VkResult radv_EndCommandBuffer(
 		si_emit_cache_flush(cmd_buffer);
 	}

+	vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments);
+
 	if (!cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs))
 		return VK_ERROR_OUT_OF_DEVICE_MEMORY;

@@ -3580,7 +3579,8 @@ void radv_CmdEndRenderPass(

 /*
 * For HTILE we have the following interesting clear words:
- *   0x0000030f: Uncompressed.
+ *   0xfffff30f: Uncompressed, full depth range, for depth+stencil HTILE
+ *   0xfffc000f: Uncompressed, full depth range, for depth only HTILE.
 *   0xfffffff0: Clear depth to 1.0
 *   0x00000000: Clear depth to 0.0
 */
@@ -3629,7 +3629,8 @@ static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffe
 		radv_initialize_htile(cmd_buffer, image, range, 0);
 	} else if (!radv_layout_is_htile_compressed(image, src_layout, src_queue_mask) &&
 	           radv_layout_is_htile_compressed(image, dst_layout, dst_queue_mask)) {
-		radv_initialize_htile(cmd_buffer, image, range, 0xffffffff);
+		uint32_t clear_value = vk_format_is_stencil(image->vk_format) ? 0xfffff30f : 0xfffc000f;
+		radv_initialize_htile(cmd_buffer, image, range, clear_value);
 	} else if (radv_layout_is_htile_compressed(image, src_layout, src_queue_mask) &&
 	           !radv_layout_is_htile_compressed(image, dst_layout, dst_queue_mask)) {
 		VkImageSubresourceRange local_range = *range;
@@ -3831,7 +3832,7 @@ static void write_event(struct radv_cmd_buffer *cmd_buffer,
 	si_cs_emit_write_event_eop(cs,
 				   cmd_buffer->state.predicating,
 				   cmd_buffer->device->physical_device->rad_info.chip_class,
-				   false,
+				   radv_cmd_buffer_uses_mec(cmd_buffer),
 				   V_028A90_BOTTOM_OF_PIPE_TS, 0,
 				   1, va, 2, value);

--- a/src/amd/vulkan/radv_descriptor_set.c
+++ b/src/amd/vulkan/radv_descriptor_set.c
@@ -739,8 +739,59 @@ void radv_update_descriptor_sets(
 		}

 	}
-	if (descriptorCopyCount)
-		radv_finishme("copy descriptors");
+
+	for (i = 0; i < descriptorCopyCount; i++) {
+		const VkCopyDescriptorSet *copyset = &pDescriptorCopies[i];
+		RADV_FROM_HANDLE(radv_descriptor_set, src_set,
+		                 copyset->srcSet);
+		RADV_FROM_HANDLE(radv_descriptor_set, dst_set,
+		                 copyset->dstSet);
+		const struct radv_descriptor_set_binding_layout *src_binding_layout =
+			src_set->layout->binding + copyset->srcBinding;
+		const struct radv_descriptor_set_binding_layout *dst_binding_layout =
+			dst_set->layout->binding + copyset->dstBinding;
+		uint32_t *src_ptr = src_set->mapped_ptr;
+		uint32_t *dst_ptr = dst_set->mapped_ptr;
+		struct radeon_winsys_bo **src_buffer_list = src_set->descriptors;
+		struct radeon_winsys_bo **dst_buffer_list = dst_set->descriptors;
+
+		src_ptr += src_binding_layout->offset / 4;
+		dst_ptr += dst_binding_layout->offset / 4;
+
+		src_ptr += src_binding_layout->size * copyset->srcArrayElement / 4;
+		dst_ptr += dst_binding_layout->size * copyset->dstArrayElement / 4;
+
+		src_buffer_list += src_binding_layout->buffer_offset;
+		src_buffer_list += copyset->srcArrayElement;
+
+		dst_buffer_list += dst_binding_layout->buffer_offset;
+		dst_buffer_list += copyset->dstArrayElement;
+
+		for (j = 0; j < copyset->descriptorCount; ++j) {
+			switch (src_binding_layout->type) {
+			case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+			case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
+				unsigned src_idx = copyset->srcArrayElement + j;
+				unsigned dst_idx = copyset->dstArrayElement + j;
+				struct radv_descriptor_range *src_range, *dst_range;
+				src_idx += src_binding_layout->dynamic_offset_offset;
+				dst_idx += dst_binding_layout->dynamic_offset_offset;
+
+				src_range = src_set->dynamic_descriptors + src_idx;
+				dst_range = dst_set->dynamic_descriptors + dst_idx;
+				*dst_range = *src_range;
+				break;
+			}
+			default:
+				memcpy(dst_ptr, src_ptr, src_binding_layout->size);
+			}
+			src_ptr += src_binding_layout->size / 4;
+			dst_ptr += dst_binding_layout->size / 4;
+			dst_buffer_list[j] = src_buffer_list[j];
+			++src_buffer_list;
+			++dst_buffer_list;
+		}
+	}
 }

 void radv_UpdateDescriptorSets(
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -76,32 +76,112 @@ radv_get_device_uuid(struct radeon_info *info, void *uuid)
 	ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
 }

-static const char *
-get_chip_name(enum radeon_family family)
+static void
+radv_get_device_name(enum radeon_family family, char *name, size_t name_len)
 {
+	const char *chip_string;
+	char llvm_string[32] = {};
+
 	switch (family) {
-	case CHIP_TAHITI: return "AMD RADV TAHITI";
-	case CHIP_PITCAIRN: return "AMD RADV PITCAIRN";
-	case CHIP_VERDE: return "AMD RADV CAPE VERDE";
-	case CHIP_OLAND: return "AMD RADV OLAND";
-	case CHIP_HAINAN: return "AMD RADV HAINAN";
-	case CHIP_BONAIRE: return "AMD RADV BONAIRE";
-	case CHIP_KAVERI: return "AMD RADV KAVERI";
-	case CHIP_KABINI: return "AMD RADV KABINI";
-	case CHIP_HAWAII: return "AMD RADV HAWAII";
-	case CHIP_MULLINS: return "AMD RADV MULLINS";
-	case CHIP_TONGA: return "AMD RADV TONGA";
-	case CHIP_ICELAND: return "AMD RADV ICELAND";
-	case CHIP_CARRIZO: return "AMD RADV CARRIZO";
-	case CHIP_FIJI: return "AMD RADV FIJI";
-	case CHIP_POLARIS10: return "AMD RADV POLARIS10";
-	case CHIP_POLARIS11: return "AMD RADV POLARIS11";
-	case CHIP_POLARIS12: return "AMD RADV POLARIS12";
-	case CHIP_STONEY: return "AMD RADV STONEY";
-	case CHIP_VEGA10: return "AMD RADV VEGA";
-	case CHIP_RAVEN: return "AMD RADV RAVEN";
-	default: return "AMD RADV unknown";
+	case CHIP_TAHITI: chip_string = "AMD RADV TAHITI"; break;
+	case CHIP_PITCAIRN: chip_string = "AMD RADV PITCAIRN"; break;
+	case CHIP_VERDE: chip_string = "AMD RADV CAPE VERDE"; break;
+	case CHIP_OLAND: chip_string = "AMD RADV OLAND"; break;
+	case CHIP_HAINAN: chip_string = "AMD RADV HAINAN"; break;
+	case CHIP_BONAIRE: chip_string = "AMD RADV BONAIRE"; break;
+	case CHIP_KAVERI: chip_string = "AMD RADV KAVERI"; break;
+	case CHIP_KABINI: chip_string = "AMD RADV KABINI"; break;
+	case CHIP_HAWAII: chip_string = "AMD RADV HAWAII"; break;
+	case CHIP_MULLINS: chip_string = "AMD RADV MULLINS"; break;
+	case CHIP_TONGA: chip_string = "AMD RADV TONGA"; break;
+	case CHIP_ICELAND: chip_string = "AMD RADV ICELAND"; break;
+	case CHIP_CARRIZO: chip_string = "AMD RADV CARRIZO"; break;
+	case CHIP_FIJI: chip_string = "AMD RADV FIJI"; break;
+	case CHIP_POLARIS10: chip_string = "AMD RADV POLARIS10"; break;
+	case CHIP_POLARIS11: chip_string = "AMD RADV POLARIS11"; break;
+	case CHIP_POLARIS12: chip_string = "AMD RADV POLARIS12"; break;
+	case CHIP_STONEY: chip_string = "AMD RADV STONEY"; break;
+	case CHIP_VEGA10: chip_string = "AMD RADV VEGA"; break;
+	case CHIP_RAVEN: chip_string = "AMD RADV RAVEN"; break;
+	default: chip_string = "AMD RADV unknown"; break;
 	}
+
+	if (HAVE_LLVM > 0) {
+		snprintf(llvm_string, sizeof(llvm_string),
+			 " (LLVM %i.%i.%i)", (HAVE_LLVM >> 8) & 0xff,
+			 HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH);
+	}
+
+	snprintf(name, name_len, "%s%s", chip_string, llvm_string);
+}
+
+static void
+radv_physical_device_init_mem_types(struct radv_physical_device *device)
+{
+	STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
+	uint64_t visible_vram_size = MIN2(device->rad_info.vram_size,
+	                                  device->rad_info.vram_vis_size);
+
+	int vram_index = -1, visible_vram_index = -1, gart_index = -1;
+	device->memory_properties.memoryHeapCount = 0;
+	if (device->rad_info.vram_size - visible_vram_size > 0) {
+		vram_index = device->memory_properties.memoryHeapCount++;
+		device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap) {
+			.size = device->rad_info.vram_size - visible_vram_size,
+			.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
+		};
+	}
+	if (visible_vram_size) {
+		visible_vram_index = device->memory_properties.memoryHeapCount++;
+		device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap) {
+			.size = visible_vram_size,
+			.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
+		};
+	}
+	if (device->rad_info.gart_size > 0) {
+		gart_index = device->memory_properties.memoryHeapCount++;
+		device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
+			.size = device->rad_info.gart_size,
+			.flags = 0,
+		};
+	}
+
+	STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
+	unsigned type_count = 0;
+	if (vram_index >= 0) {
+		device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM;
+		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
+			.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
+			.heapIndex = vram_index,
+		};
+	}
+	if (gart_index >= 0) {
+		device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
+		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
+			.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+			VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+			.heapIndex = gart_index,
+		};
+	}
+	if (visible_vram_index >= 0) {
+		device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM_CPU_ACCESS;
+		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
+			.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+			VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+			VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+			.heapIndex = visible_vram_index,
+		};
+	}
+	if (gart_index >= 0) {
+		device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_CACHED;
+		device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
+			.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+			VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
+			VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
+			.heapIndex = gart_index,
+		};
+	}
+	device->memory_properties.memoryTypeCount = type_count;
 }

 static VkResult
@@ -152,6 +232,8 @@ radv_physical_device_init(struct radv_physical_device *device,
 		goto fail;
 	}

+	radv_get_device_name(device->rad_info.family, device->name, sizeof(device->name));
+
 	if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
 		radv_finish_wsi(device);
 		device->ws->destroy(device->ws);
@@ -168,12 +250,11 @@ radv_physical_device_init(struct radv_physical_device *device,
 	/* The gpu id is already embeded in the uuid so we just pass "radv"
 	 * when creating the cache.
 	 */
-	char buf[VK_UUID_SIZE + 1];
-	disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE);
-	device->disk_cache = disk_cache_create("radv", buf, shader_env_flags);
+	char buf[VK_UUID_SIZE * 2 + 1];
+	disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
+	device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags);

 	fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
-	device->name = get_chip_name(device->rad_info.family);

 	radv_get_driver_uuid(&device->device_uuid);
 	radv_get_device_uuid(&device->rad_info, &device->device_uuid);
@@ -189,6 +270,7 @@ radv_physical_device_init(struct radv_physical_device *device,
 	 */
 	device->has_clear_state = device->rad_info.chip_class >= CIK;

+	radv_physical_device_init_mem_types(device);
 	return VK_SUCCESS;

 fail:
@@ -540,9 +622,9 @@ void radv_GetPhysicalDeviceProperties(
 		.maxPerStageResources                     = max_descriptor_set_size,
 		.maxDescriptorSetSamplers                 = max_descriptor_set_size,
 		.maxDescriptorSetUniformBuffers           = max_descriptor_set_size,
-		.maxDescriptorSetUniformBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
+		.maxDescriptorSetUniformBuffersDynamic    = MAX_DYNAMIC_UNIFORM_BUFFERS,
 		.maxDescriptorSetStorageBuffers           = max_descriptor_set_size,
-		.maxDescriptorSetStorageBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
+		.maxDescriptorSetStorageBuffersDynamic    = MAX_DYNAMIC_STORAGE_BUFFERS,
 		.maxDescriptorSetSampledImages            = max_descriptor_set_size,
 		.maxDescriptorSetStorageImages            = max_descriptor_set_size,
 		.maxDescriptorSetInputAttachments         = max_descriptor_set_size,
@@ -779,49 +861,7 @@ void radv_GetPhysicalDeviceMemoryProperties(
 {
 	RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);

-	STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
-
-	pMemoryProperties->memoryTypeCount = RADV_MEM_TYPE_COUNT;
-	pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM] = (VkMemoryType) {
-		.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
-		.heapIndex = RADV_MEM_HEAP_VRAM,
-	};
-	pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_WRITE_COMBINE] = (VkMemoryType) {
-		.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
-		VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
-		.heapIndex = RADV_MEM_HEAP_GTT,
-	};
-	pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM_CPU_ACCESS] = (VkMemoryType) {
-		.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
-		VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
-		VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
-		.heapIndex = RADV_MEM_HEAP_VRAM_CPU_ACCESS,
-	};
-	pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_CACHED] = (VkMemoryType) {
-		.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
-		VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
-		VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
-		.heapIndex = RADV_MEM_HEAP_GTT,
-	};
-
-	STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
-	uint64_t visible_vram_size = MIN2(physical_device->rad_info.vram_size,
-	                                  physical_device->rad_info.vram_vis_size);
-
-	pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT;
-	pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) {
-		.size = physical_device->rad_info.vram_size -
-				visible_vram_size,
-		.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
-	};
-	pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) {
-		.size = visible_vram_size,
-		.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
-	};
-	pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) {
-		.size = physical_device->rad_info.gart_size,
-		.flags = 0,
-	};
+	*pMemoryProperties = physical_device->memory_properties;
 }

 void radv_GetPhysicalDeviceMemoryProperties2KHR(
@@ -1079,13 +1119,15 @@ VkResult radv_CreateDevice(
 	result = radv_CreatePipelineCache(radv_device_to_handle(device),
 					  &ci, NULL, &pc);
 	if (result != VK_SUCCESS)
-		goto fail;
+		goto fail_meta;

 	device->mem_cache = radv_pipeline_cache_from_handle(pc);

 	*pDevice = radv_device_to_handle(device);
 	return VK_SUCCESS;

+fail_meta:
+	radv_device_finish_meta(device);
 fail:
 	if (device->trace_bo)
 		device->ws->buffer_destroy(device->trace_bo);
@@ -1648,7 +1690,6 @@ radv_get_preamble_cs(struct radv_queue *queue,

 		if (i == 0) {
 			si_cs_emit_cache_flush(cs,
-					       false,
 			                       queue->device->physical_device->rad_info.chip_class,
 					       NULL, 0,
 			                       queue->queue_family_index == RING_COMPUTE &&
@@ -1660,7 +1701,6 @@ radv_get_preamble_cs(struct radv_queue *queue,
 			                       RADV_CMD_FLAG_INV_GLOBAL_L2);
 		} else if (i == 1) {
 			si_cs_emit_cache_flush(cs,
-					       false,
 			                       queue->device->physical_device->rad_info.chip_class,
 					       NULL, 0,
 			                       queue->queue_family_index == RING_COMPUTE &&
@@ -1799,10 +1839,6 @@ static VkResult radv_alloc_sem_counts(struct radv_winsys_sem_counts *counts,

 		if (sem->temp_syncobj) {
 			counts->syncobj[syncobj_idx++] = sem->temp_syncobj;
-			if (reset_temp) {
-				/* after we wait on a temp import - drop it */
-				sem->temp_syncobj = 0;
-			}
 		}
 		else if (sem->syncobj)
 			counts->syncobj[syncobj_idx++] = sem->syncobj;
@@ -1823,6 +1859,21 @@ void radv_free_sem_info(struct radv_winsys_sem_info *sem_info)
 	free(sem_info->signal.sem);
 }

+
+static void radv_free_temp_syncobjs(struct radv_device *device,
+				    int num_sems,
+				    const VkSemaphore *sems)
+{
+	for (uint32_t i = 0; i < num_sems; i++) {
+		RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
+
+		if (sem->temp_syncobj) {
+			device->ws->destroy_syncobj(device->ws, sem->temp_syncobj);
+			sem->temp_syncobj = 0;
+		}
+	}
+}
+
 VkResult radv_alloc_sem_info(struct radv_winsys_sem_info *sem_info,
 			     int num_wait_sems,
 			     const VkSemaphore *wait_sems,
@@ -1961,6 +2012,9 @@ VkResult radv_QueueSubmit(
 			}
 		}

+		radv_free_temp_syncobjs(queue->device,
+					pSubmits[i].waitSemaphoreCount,
+					pSubmits[i].pWaitSemaphores);
 		radv_free_sem_info(&sem_info);
 		free(cs_array);
 	}
@@ -2059,6 +2113,7 @@ VkResult radv_alloc_memory(VkDevice                        _device,
 	VkResult result;
 	enum radeon_bo_domain domain;
 	uint32_t flags = 0;
+	enum radv_mem_type mem_type_index = device->physical_device->mem_type_indices[pAllocateInfo->memoryTypeIndex];

 	assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);

@@ -2101,18 +2156,18 @@ VkResult radv_alloc_memory(VkDevice                        _device,
 	}

 	uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
-	if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
-	    pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_CACHED)
+	if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
+	    mem_type_index == RADV_MEM_TYPE_GTT_CACHED)
 		domain = RADEON_DOMAIN_GTT;
 	else
 		domain = RADEON_DOMAIN_VRAM;

-	if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_VRAM)
+	if (mem_type_index == RADV_MEM_TYPE_VRAM)
 		flags |= RADEON_FLAG_NO_CPU_ACCESS;
 	else
 		flags |= RADEON_FLAG_CPU_ACCESS;

-	if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
+	if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
 		flags |= RADEON_FLAG_GTT_WC;

 	if (mem_flags & RADV_MEM_IMPLICIT_SYNC)
@@ -2125,7 +2180,7 @@ VkResult radv_alloc_memory(VkDevice                        _device,
 		result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
 		goto fail;
 	}
-	mem->type_index = pAllocateInfo->memoryTypeIndex;
+	mem->type_index = mem_type_index;
 out_success:
 	*pMem = radv_device_memory_to_handle(mem);

@@ -2218,13 +2273,14 @@ VkResult radv_InvalidateMappedMemoryRanges(
 }

 void radv_GetBufferMemoryRequirements(
-	VkDevice                                    device,
+	VkDevice                                    _device,
 	VkBuffer                                    _buffer,
 	VkMemoryRequirements*                       pMemoryRequirements)
 {
+	RADV_FROM_HANDLE(radv_device, device, _device);
 	RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);

-	pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
+	pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;

 	if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
 		pMemoryRequirements->alignment = 4096;
@@ -2258,13 +2314,14 @@ void radv_GetBufferMemoryRequirements2KHR(
 }

 void radv_GetImageMemoryRequirements(
-	VkDevice                                    device,
+	VkDevice                                    _device,
 	VkImage                                     _image,
 	VkMemoryRequirements*                       pMemoryRequirements)
 {
+	RADV_FROM_HANDLE(radv_device, device, _device);
 	RADV_FROM_HANDLE(radv_image, image, _image);

-	pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
+	pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;

 	pMemoryRequirements->size = image->size;
 	pMemoryRequirements->alignment = image->alignment;
@@ -2528,13 +2585,17 @@ void radv_DestroyFence(
 	vk_free2(&device->alloc, pAllocator, fence);
 }

+
+static uint64_t radv_get_current_time()
+{
+	struct timespec tv;
+	clock_gettime(CLOCK_MONOTONIC, &tv);
+	return tv.tv_nsec + tv.tv_sec*1000000000ull;
+}
+
 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
 {
-	uint64_t current_time;
-	struct timespec tv;
-
-	clock_gettime(CLOCK_MONOTONIC, &tv);
-	current_time = tv.tv_nsec + tv.tv_sec*1000000000ull;
+	uint64_t current_time = radv_get_current_time();

 	timeout = MIN2(UINT64_MAX - current_time, timeout);

@@ -2552,7 +2613,13 @@ VkResult radv_WaitForFences(
 	timeout = radv_get_absolute_timeout(timeout);

 	if (!waitAll && fenceCount > 1) {
-		fprintf(stderr, "radv: WaitForFences without waitAll not implemented yet\n");
+		while(radv_get_current_time() <= timeout) {
+			for (uint32_t i = 0; i < fenceCount; ++i) {
+				if (radv_GetFenceStatus(_device, pFences[i]) == VK_SUCCESS)
+					return VK_SUCCESS;
+			}
+		}
+		return VK_TIMEOUT;
 	}

 	for (uint32_t i = 0; i < fenceCount; ++i) {
@@ -2562,8 +2629,17 @@ VkResult radv_WaitForFences(
 		if (fence->signalled)
 			continue;

-		if (!fence->submitted)
-			return VK_TIMEOUT;
+		if (!fence->submitted) {
+			while(radv_get_current_time() <= timeout && !fence->submitted)
+				/* Do nothing */;
+
+			if (!fence->submitted)
+				return VK_TIMEOUT;
+
+			/* Recheck as it may have been set by submitting operations. */
+			if (fence->signalled)
+				continue;
+		}

 		expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout);
 		if (!expired)
@@ -3483,6 +3559,7 @@ VkResult radv_ImportSemaphoreFdKHR(VkDevice _device,
 	RADV_FROM_HANDLE(radv_device, device, _device);
 	RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
 	uint32_t syncobj_handle = 0;
+	uint32_t *syncobj_dst = NULL;
 	assert(pImportSemaphoreFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);

 	int ret = device->ws->import_syncobj(device->ws, pImportSemaphoreFdInfo->fd, &syncobj_handle);
@@ -3490,10 +3567,15 @@ VkResult radv_ImportSemaphoreFdKHR(VkDevice _device,
 		return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;

 	if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT_KHR) {
-		sem->temp_syncobj = syncobj_handle;
+		syncobj_dst = &sem->temp_syncobj;
 	} else {
-		sem->syncobj = syncobj_handle;
+		syncobj_dst = &sem->syncobj;
 	}
+
+	if (*syncobj_dst)
+		device->ws->destroy_syncobj(device->ws, *syncobj_dst);
+
+	*syncobj_dst = syncobj_handle;
 	close(pImportSemaphoreFdInfo->fd);
 	return VK_SUCCESS;
 }
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -76,8 +76,7 @@ EXTENSIONS = [
    Extension('VK_KHR_wayland_surface',                   6, 'VK_USE_PLATFORM_WAYLAND_KHR'),
    Extension('VK_KHR_xcb_surface',                       6, 'VK_USE_PLATFORM_XCB_KHR'),
    Extension('VK_KHR_xlib_surface',                      6, 'VK_USE_PLATFORM_XLIB_KHR'),
-    Extension('VK_KHX_multiview',                         1, True),
-    Extension('VK_EXT_debug_report',                      8, True),
+    Extension('VK_KHX_multiview',                         1, False),
    Extension('VK_EXT_global_priority',                   1, 'device->rad_info.has_ctx_priority'),
    Extension('VK_AMD_draw_indirect_count',               1, True),
    Extension('VK_AMD_rasterization_order',               1, 'device->rad_info.chip_class >= VI && device->rad_info.max_se >= 2'),
--- a/src/amd/vulkan/radv_formats.c
+++ b/src/amd/vulkan/radv_formats.c
@@ -1063,6 +1063,9 @@ static VkResult radv_get_image_format_properties(struct radv_physical_device *ph
 	if (format_feature_flags == 0)
 		goto unsupported;

+	if (info->type != VK_IMAGE_TYPE_2D && vk_format_is_depth_or_stencil(info->format))
+		goto unsupported;
+
 	switch (info->type) {
 	default:
 		unreachable("bad vkimage type\n");
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -116,7 +116,8 @@ radv_init_surface(struct radv_device *device,
 		    pCreateInfo->mipLevels <= 1 &&
 		    device->physical_device->rad_info.chip_class >= VI &&
 		    ((pCreateInfo->format == VK_FORMAT_D32_SFLOAT ||
-		      pCreateInfo->format == VK_FORMAT_D32_SFLOAT_S8_UINT) ||
+		      /* for some reason TC compat with 2/4/8 samples breaks some cts tests - disable for now */
+		      (pCreateInfo->samples < 2 && pCreateInfo->format == VK_FORMAT_D32_SFLOAT_S8_UINT)) ||
 		     (device->physical_device->rad_info.chip_class >= GFX9 &&
 		      pCreateInfo->format == VK_FORMAT_D16_UNORM)))
 			surface->flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
@@ -127,7 +128,7 @@ radv_init_surface(struct radv_device *device,

 	surface->flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE;

-	bool dcc_compatible_formats = !radv_is_colorbuffer_format_supported(pCreateInfo->format, &blendable);
+	bool dcc_compatible_formats = radv_is_colorbuffer_format_supported(pCreateInfo->format, &blendable);
 	if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
 		const struct  VkImageFormatListCreateInfoKHR *format_list =
 		          (const struct  VkImageFormatListCreateInfoKHR *)
@@ -344,7 +345,7 @@ static unsigned radv_tex_dim(VkImageType image_type, VkImageViewType view_type,
 	}
 }

-static unsigned gfx9_border_color_swizzle(const unsigned char swizzle[4])
+static unsigned gfx9_border_color_swizzle(const enum vk_swizzle swizzle[4])
 {
 	unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;

@@ -449,7 +450,7 @@ si_make_texture_descriptor(struct radv_device *device,
 	state[7] = 0;

 	if (device->physical_device->rad_info.chip_class >= GFX9) {
-		unsigned bc_swizzle = gfx9_border_color_swizzle(desc->swizzle);
+		unsigned bc_swizzle = gfx9_border_color_swizzle(swizzle);

 		/* Depth is the the last accessible layer on Gfx9.
 		 * The hw doesn't need to know the total number of layers.
@@ -1047,10 +1048,55 @@ radv_image_view_init(struct radv_image_view *iview,
 	}

 	if (iview->vk_format != image->vk_format) {
-		iview->extent.width = round_up_u32(iview->extent.width * vk_format_get_blockwidth(iview->vk_format),
-						   vk_format_get_blockwidth(image->vk_format));
-		iview->extent.height = round_up_u32(iview->extent.height * vk_format_get_blockheight(iview->vk_format),
-						    vk_format_get_blockheight(image->vk_format));
+		unsigned view_bw = vk_format_get_blockwidth(iview->vk_format);
+		unsigned view_bh = vk_format_get_blockheight(iview->vk_format);
+		unsigned img_bw = vk_format_get_blockwidth(image->vk_format);
+		unsigned img_bh = vk_format_get_blockheight(image->vk_format);
+
+		iview->extent.width = round_up_u32(iview->extent.width * view_bw, img_bw);
+		iview->extent.height = round_up_u32(iview->extent.height * view_bh, img_bh);
+
+		/* Comment ported from amdvlk -
+		 * If we have the following image:
+		 *              Uncompressed pixels   Compressed block sizes (4x4)
+		 *      mip0:       22 x 22                   6 x 6
+		 *      mip1:       11 x 11                   3 x 3
+		 *      mip2:        5 x  5                   2 x 2
+		 *      mip3:        2 x  2                   1 x 1
+		 *      mip4:        1 x  1                   1 x 1
+		 *
+		 * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and the HW is
+		 * calculating the degradation of the block sizes down the mip-chain as follows (straight-up
+		 * divide-by-two integer math):
+		 *      mip0:  6x6
+		 *      mip1:  3x3
+		 *      mip2:  1x1
+		 *      mip3:  1x1
+		 *
+		 * This means that mip2 will be missing texels.
+		 *
+		 * Fix this by calculating the base mip's width and height, then convert that, and round it
+		 * back up to get the level 0 size.
+		 * Clamp the converted size between the original values, and next power of two, which
+		 * means we don't oversize the image.
+		 */
+		 if (device->physical_device->rad_info.chip_class >= GFX9 &&
+		     vk_format_is_compressed(image->vk_format) &&
+		     !vk_format_is_compressed(iview->vk_format)) {
+			 unsigned rounded_img_w = util_next_power_of_two(iview->extent.width);
+			 unsigned rounded_img_h = util_next_power_of_two(iview->extent.height);
+			 unsigned lvl_width  = radv_minify(image->info.width , range->baseMipLevel);
+			 unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel);
+
+			 lvl_width = round_up_u32(lvl_width * view_bw, img_bw);
+			 lvl_height = round_up_u32(lvl_height * view_bh, img_bh);
+
+			 lvl_width <<= range->baseMipLevel;
+			 lvl_height <<= range->baseMipLevel;
+
+			 iview->extent.width = CLAMP(lvl_width, iview->extent.width, rounded_img_w);
+			 iview->extent.height = CLAMP(lvl_height, iview->extent.height, rounded_img_h);
+		 }
 	}

 	iview->base_layer = range->baseArrayLayer;
--- a/src/amd/vulkan/radv_meta.c
+++ b/src/amd/vulkan/radv_meta.c
@@ -377,9 +377,9 @@ fail_resolve_fragment:
 fail_resolve_compute:
 	radv_device_finish_meta_fast_clear_flush_state(device);
 fail_fast_clear:
-	radv_device_finish_meta_buffer_state(device);
-fail_query:
 	radv_device_finish_meta_query_state(device);
+fail_query:
+	radv_device_finish_meta_buffer_state(device);
 fail_buffer:
 	radv_device_finish_meta_depth_decomp_state(device);
 fail_depth_decomp:
@@ -533,7 +533,7 @@ void radv_meta_build_resolve_shader_core(nir_builder *b,
 		nir_ssa_dest_init(&tex_all_same->instr, &tex_all_same->dest, 1, 32, "tex");
 		nir_builder_instr_insert(b, &tex_all_same->instr);

-		nir_ssa_def *all_same = nir_ine(b, &tex_all_same->dest.ssa, nir_imm_int(b, 0));
+		nir_ssa_def *all_same = nir_ieq(b, &tex_all_same->dest.ssa, nir_imm_int(b, 0));
 		nir_if *if_stmt = nir_if_create(b->shader);
 		if_stmt->condition = nir_src_for_ssa(all_same);
 		nir_cf_node_insert(b->cursor, &if_stmt->cf_node);
--- a/src/amd/vulkan/radv_meta.h
+++ b/src/amd/vulkan/radv_meta.h
@@ -109,6 +109,7 @@ struct radv_meta_blit2d_surf {
 	unsigned level;
 	unsigned layer;
 	VkImageAspectFlags aspect_mask;
+	VkImageLayout current_layout;
 };

 struct radv_meta_blit2d_buffer {
--- a/src/amd/vulkan/radv_meta_blit.c
+++ b/src/amd/vulkan/radv_meta_blit.c
@@ -265,12 +265,14 @@ static void
 meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
               struct radv_image *src_image,
               struct radv_image_view *src_iview,
+	       VkImageLayout src_image_layout,
               VkOffset3D src_offset_0,
               VkOffset3D src_offset_1,
               struct radv_image *dest_image,
               struct radv_image_view *dest_iview,
-               VkOffset3D dest_offset_0,
-               VkOffset3D dest_offset_1,
+	       VkImageLayout dest_image_layout,
+               VkOffset2D dest_offset_0,
+               VkOffset2D dest_offset_1,
               VkRect2D dest_box,
               VkFilter blit_filter)
 {
@@ -351,11 +353,12 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
 		}
 		break;
 	}
-	case VK_IMAGE_ASPECT_DEPTH_BIT:
+	case VK_IMAGE_ASPECT_DEPTH_BIT: {
+		enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dest_image_layout);
 		radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
 					      &(VkRenderPassBeginInfo) {
 						      .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
-							      .renderPass = device->meta_state.blit.depth_only_rp,
+							      .renderPass = device->meta_state.blit.depth_only_rp[ds_layout],
 							      .framebuffer = fb,
 							      .renderArea = {
 							      .offset = { dest_box.offset.x, dest_box.offset.y },
@@ -378,11 +381,13 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
 			unreachable(!"bad VkImageType");
 		}
 		break;
-	case VK_IMAGE_ASPECT_STENCIL_BIT:
+	}
+	case VK_IMAGE_ASPECT_STENCIL_BIT: {
+		enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dest_image_layout);
 		radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
 					      &(VkRenderPassBeginInfo) {
 						      .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
-							      .renderPass = device->meta_state.blit.stencil_only_rp,
+							      .renderPass = device->meta_state.blit.stencil_only_rp[ds_layout],
 							      .framebuffer = fb,
 							      .renderArea = {
 							      .offset = { dest_box.offset.x, dest_box.offset.y },
@@ -405,6 +410,7 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
 			unreachable(!"bad VkImageType");
 		}
 		break;
+	}
 	default:
 		unreachable(!"bad VkImageType");
 	}
@@ -518,21 +524,6 @@ void radv_CmdBlitImage(
 	for (unsigned r = 0; r < regionCount; r++) {
 		const VkImageSubresourceLayers *src_res = &pRegions[r].srcSubresource;
 		const VkImageSubresourceLayers *dst_res = &pRegions[r].dstSubresource;
-		struct radv_image_view src_iview;
-		radv_image_view_init(&src_iview, cmd_buffer->device,
-				     &(VkImageViewCreateInfo) {
-					     .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
-						     .image = srcImage,
-						     .viewType = radv_meta_get_view_type(src_image),
-						     .format = src_image->vk_format,
-						     .subresourceRange = {
-						     .aspectMask = src_res->aspectMask,
-						     .baseMipLevel = src_res->mipLevel,
-						     .levelCount = 1,
-						     .baseArrayLayer = src_res->baseArrayLayer,
-						     .layerCount = 1
-					     },
-				     });

 		unsigned dst_start, dst_end;
 		if (dest_image->type == VK_IMAGE_TYPE_3D) {
@@ -579,18 +570,17 @@ void radv_CmdBlitImage(
 		dest_box.extent.width = abs(dst_x1 - dst_x0);
 		dest_box.extent.height = abs(dst_y1 - dst_y0);

-		struct radv_image_view dest_iview;
 		const unsigned num_layers = dst_end - dst_start;
 		for (unsigned i = 0; i < num_layers; i++) {
-			const VkOffset3D dest_offset_0 = {
+			struct radv_image_view dest_iview, src_iview;
+
+			const VkOffset2D dest_offset_0 = {
 				.x = dst_x0,
 				.y = dst_y0,
-				.z = dst_start + i ,
 			};
-			const VkOffset3D dest_offset_1 = {
+			const VkOffset2D dest_offset_1 = {
 				.x = dst_x1,
 				.y = dst_y1,
-				.z = dst_start + i ,
 			};
 			VkOffset3D src_offset_0 = {
 				.x = src_x0,
@@ -602,9 +592,10 @@ void radv_CmdBlitImage(
 				.y = src_y1,
 				.z = src_start + i * src_z_step,
 			};
-			const uint32_t dest_array_slice =
-				radv_meta_get_iview_layer(dest_image, dst_res,
-							  &dest_offset_0);
+			const uint32_t dest_array_slice = dst_start + i;
+
+			/* 3D images have just 1 layer */
+			const uint32_t src_array_slice = src_image->type == VK_IMAGE_TYPE_3D ? 0 : src_start + i;

 			radv_image_view_init(&dest_iview, cmd_buffer->device,
 					     &(VkImageViewCreateInfo) {
@@ -620,10 +611,24 @@ void radv_CmdBlitImage(
 							     .layerCount = 1
 						     },
 					     });
+			radv_image_view_init(&src_iview, cmd_buffer->device,
+					     &(VkImageViewCreateInfo) {
+						.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+							.image = srcImage,
+							.viewType = radv_meta_get_view_type(src_image),
+							.format = src_image->vk_format,
+							.subresourceRange = {
+							.aspectMask = src_res->aspectMask,
+							.baseMipLevel = src_res->mipLevel,
+							.levelCount = 1,
+							.baseArrayLayer = src_array_slice,
+							.layerCount = 1
+						},
+					});
 			meta_emit_blit(cmd_buffer,
-				       src_image, &src_iview,
+				       src_image, &src_iview, srcImageLayout,
 				       src_offset_0, src_offset_1,
-				       dest_image, &dest_iview,
+				       dest_image, &dest_iview, destImageLayout,
 				       dest_offset_0, dest_offset_1,
 				       dest_box,
 				       filter);
@@ -653,8 +658,13 @@ radv_device_finish_meta_blit_state(struct radv_device *device)
 				     &state->alloc);
 	}

-	radv_DestroyRenderPass(radv_device_to_handle(device),
-			       state->blit.depth_only_rp, &state->alloc);
+	for (enum radv_blit_ds_layout i = RADV_BLIT_DS_LAYOUT_TILE_ENABLE; i < RADV_BLIT_DS_LAYOUT_COUNT; i++) {
+		radv_DestroyRenderPass(radv_device_to_handle(device),
+				       state->blit.depth_only_rp[i], &state->alloc);
+		radv_DestroyRenderPass(radv_device_to_handle(device),
+				       state->blit.stencil_only_rp[i], &state->alloc);
+	}
+
 	radv_DestroyPipeline(radv_device_to_handle(device),
 			     state->blit.depth_only_1d_pipeline, &state->alloc);
 	radv_DestroyPipeline(radv_device_to_handle(device),
@@ -662,8 +672,6 @@ radv_device_finish_meta_blit_state(struct radv_device *device)
 	radv_DestroyPipeline(radv_device_to_handle(device),
 			     state->blit.depth_only_3d_pipeline, &state->alloc);

-	radv_DestroyRenderPass(radv_device_to_handle(device),
-			       state->blit.stencil_only_rp, &state->alloc);
 	radv_DestroyPipeline(radv_device_to_handle(device),
 			     state->blit.stencil_only_1d_pipeline,
 			     &state->alloc);
@@ -674,6 +682,7 @@ radv_device_finish_meta_blit_state(struct radv_device *device)
 			     state->blit.stencil_only_3d_pipeline,
 			     &state->alloc);

+
 	radv_DestroyPipelineLayout(radv_device_to_handle(device),
 				   state->blit.pipeline_layout, &state->alloc);
 	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
@@ -867,35 +876,38 @@ radv_device_init_meta_blit_depth(struct radv_device *device,
 	fs_2d.nir = build_nir_copy_fragment_shader_depth(GLSL_SAMPLER_DIM_2D);
 	fs_3d.nir = build_nir_copy_fragment_shader_depth(GLSL_SAMPLER_DIM_3D);

-	result = radv_CreateRenderPass(radv_device_to_handle(device),
-				       &(VkRenderPassCreateInfo) {
-					       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+	for (enum radv_blit_ds_layout ds_layout = RADV_BLIT_DS_LAYOUT_TILE_ENABLE; ds_layout < RADV_BLIT_DS_LAYOUT_COUNT; ds_layout++) {
+		VkImageLayout layout = radv_meta_blit_ds_to_layout(ds_layout);
+		result = radv_CreateRenderPass(radv_device_to_handle(device),
+					       &(VkRenderPassCreateInfo) {
+						       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
 						       .attachmentCount = 1,
 						       .pAttachments = &(VkAttachmentDescription) {
-						       .format = VK_FORMAT_D32_SFLOAT,
-						       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
-						       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
-						       .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
-						       .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
-					       },
-						       .subpassCount = 1,
-						.pSubpasses = &(VkSubpassDescription) {
-						       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
-						       .inputAttachmentCount = 0,
-						       .colorAttachmentCount = 0,
-						       .pColorAttachments = NULL,
-						       .pResolveAttachments = NULL,
-						       .pDepthStencilAttachment = &(VkAttachmentReference) {
-							       .attachment = 0,
-							       .layout = VK_IMAGE_LAYOUT_GENERAL,
+							       .format = VK_FORMAT_D32_SFLOAT,
+							       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+							       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+							       .initialLayout = layout,
+							       .finalLayout = layout,
 						       },
-						       .preserveAttachmentCount = 1,
-						       .pPreserveAttachments = (uint32_t[]) { 0 },
-					       },
-						.dependencyCount = 0,
-					 }, &device->meta_state.alloc, &device->meta_state.blit.depth_only_rp);
-	if (result != VK_SUCCESS)
-		goto fail;
+						       .subpassCount = 1,
+						       .pSubpasses = &(VkSubpassDescription) {
+							       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+							       .inputAttachmentCount = 0,
+							       .colorAttachmentCount = 0,
+							       .pColorAttachments = NULL,
+							       .pResolveAttachments = NULL,
+							       .pDepthStencilAttachment = &(VkAttachmentReference) {
+								       .attachment = 0,
+								       .layout = layout,
+								},
+							       .preserveAttachmentCount = 1,
+							       .pPreserveAttachments = (uint32_t[]) { 0 },
+							},
+						        .dependencyCount = 0,
+						}, &device->meta_state.alloc, &device->meta_state.blit.depth_only_rp[ds_layout]);
+		if (result != VK_SUCCESS)
+			goto fail;
+	}

 	VkPipelineVertexInputStateCreateInfo vi_create_info = {
 		.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
@@ -975,7 +987,7 @@ radv_device_init_meta_blit_depth(struct radv_device *device,
 		},
 		.flags = 0,
 		.layout = device->meta_state.blit.pipeline_layout,
-		.renderPass = device->meta_state.blit.depth_only_rp,
+		.renderPass = device->meta_state.blit.depth_only_rp[0],
 		.subpass = 0,
 	};

@@ -1025,33 +1037,36 @@ radv_device_init_meta_blit_stencil(struct radv_device *device,
 	fs_2d.nir = build_nir_copy_fragment_shader_stencil(GLSL_SAMPLER_DIM_2D);
 	fs_3d.nir = build_nir_copy_fragment_shader_stencil(GLSL_SAMPLER_DIM_3D);

-	result = radv_CreateRenderPass(radv_device_to_handle(device),
-				       &(VkRenderPassCreateInfo) {
-					       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+	for (enum radv_blit_ds_layout ds_layout = RADV_BLIT_DS_LAYOUT_TILE_ENABLE; ds_layout < RADV_BLIT_DS_LAYOUT_COUNT; ds_layout++) {
+		VkImageLayout layout = radv_meta_blit_ds_to_layout(ds_layout);
+		result = radv_CreateRenderPass(radv_device_to_handle(device),
+					       &(VkRenderPassCreateInfo) {
+						       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
 						       .attachmentCount = 1,
 						       .pAttachments = &(VkAttachmentDescription) {
-						       .format = VK_FORMAT_S8_UINT,
-						       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
-						       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
-						       .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
-						       .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
-					       },
-						       .subpassCount = 1,
-						.pSubpasses = &(VkSubpassDescription) {
-						       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
-						       .inputAttachmentCount = 0,
-						       .colorAttachmentCount = 0,
-						       .pColorAttachments = NULL,
-						       .pResolveAttachments = NULL,
-						       .pDepthStencilAttachment = &(VkAttachmentReference) {
-							       .attachment = 0,
-							       .layout = VK_IMAGE_LAYOUT_GENERAL,
+							       .format = VK_FORMAT_S8_UINT,
+							       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+							       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+							       .initialLayout = layout,
+							       .finalLayout = layout,
 						       },
-						       .preserveAttachmentCount = 1,
-						       .pPreserveAttachments = (uint32_t[]) { 0 },
-					       },
-						.dependencyCount = 0,
-					 }, &device->meta_state.alloc, &device->meta_state.blit.stencil_only_rp);
+						       .subpassCount = 1,
+						       .pSubpasses = &(VkSubpassDescription) {
+							       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+							       .inputAttachmentCount = 0,
+							       .colorAttachmentCount = 0,
+							       .pColorAttachments = NULL,
+							       .pResolveAttachments = NULL,
+							       .pDepthStencilAttachment = &(VkAttachmentReference) {
+								       .attachment = 0,
+								       .layout = layout,
+							       },
+							       .preserveAttachmentCount = 1,
+							       .pPreserveAttachments = (uint32_t[]) { 0 },
+						       },
+						       .dependencyCount = 0,
+					 }, &device->meta_state.alloc, &device->meta_state.blit.stencil_only_rp[ds_layout]);
+	}
 	if (result != VK_SUCCESS)
 		goto fail;

@@ -1135,7 +1150,6 @@ radv_device_init_meta_blit_stencil(struct radv_device *device,
 			},
 			.depthCompareOp = VK_COMPARE_OP_ALWAYS,
 		},
-
 		.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
 			.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
 			.dynamicStateCount = 6,
@@ -1150,7 +1164,7 @@ radv_device_init_meta_blit_stencil(struct radv_device *device,
 		},
 		.flags = 0,
 		.layout = device->meta_state.blit.pipeline_layout,
-		.renderPass = device->meta_state.blit.stencil_only_rp,
+		.renderPass = device->meta_state.blit.stencil_only_rp[0],
 		.subpass = 0,
 	};

@@ -1182,6 +1196,7 @@ radv_device_init_meta_blit_stencil(struct radv_device *device,
 	if (result != VK_SUCCESS)
 		goto fail;

+
 fail:
 	ralloc_free(fs_1d.nir);
 	ralloc_free(fs_2d.nir);
--- a/src/amd/vulkan/radv_meta_blit2d.c
+++ b/src/amd/vulkan/radv_meta_blit2d.c
@@ -30,6 +30,7 @@

 enum blit2d_src_type {
 	BLIT2D_SRC_TYPE_IMAGE,
+	BLIT2D_SRC_TYPE_IMAGE_3D,
 	BLIT2D_SRC_TYPE_BUFFER,
 	BLIT2D_NUM_SRC_TYPES,
 };
@@ -41,6 +42,8 @@ create_iview(struct radv_cmd_buffer *cmd_buffer,
              VkImageAspectFlagBits aspects)
 {
 	VkFormat format;
+	VkImageViewType view_type = cmd_buffer->device->physical_device->rad_info.chip_class < GFX9 ? VK_IMAGE_VIEW_TYPE_2D :
+		radv_meta_get_view_type(surf->image);

 	if (depth_format)
 		format = depth_format;
@@ -51,7 +54,7 @@ create_iview(struct radv_cmd_buffer *cmd_buffer,
 			     &(VkImageViewCreateInfo) {
 				     .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
 					     .image = radv_image_to_handle(surf->image),
-					     .viewType = VK_IMAGE_VIEW_TYPE_2D,
+					     .viewType = view_type,
 					     .format = format,
 					     .subresourceRange = {
 					     .aspectMask = aspects,
@@ -126,6 +129,12 @@ blit2d_bind_src(struct radv_cmd_buffer *cmd_buffer,
 	} else {
 		create_iview(cmd_buffer, src_img, &tmp->iview, depth_format, aspects);

+		if (src_type == BLIT2D_SRC_TYPE_IMAGE_3D)
+			radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+					      device->meta_state.blit2d.p_layouts[src_type],
+					      VK_SHADER_STAGE_FRAGMENT_BIT, 16, 4,
+					      &src_img->layer);
+
 		radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
 					      device->meta_state.blit2d.p_layouts[src_type],
 					      0, /* set */
@@ -269,10 +278,11 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer,

 				bind_pipeline(cmd_buffer, src_type, fs_key);
 			} else if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
+				enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dst->current_layout);
 				radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
 							&(VkRenderPassBeginInfo) {
 								.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
-									.renderPass = device->meta_state.blit2d.depth_only_rp,
+									.renderPass = device->meta_state.blit2d.depth_only_rp[ds_layout],
 									.framebuffer = dst_temps.fb,
 									.renderArea = {
 									.offset = { rects[r].dst_x, rects[r].dst_y, },
@@ -286,10 +296,11 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer,
 				bind_depth_pipeline(cmd_buffer, src_type);

 			} else if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
+				enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dst->current_layout);
 				radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
 							&(VkRenderPassBeginInfo) {
 								.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
-									.renderPass = device->meta_state.blit2d.stencil_only_rp,
+									.renderPass = device->meta_state.blit2d.stencil_only_rp[ds_layout],
 									.framebuffer = dst_temps.fb,
 									.renderArea = {
 									.offset = { rects[r].dst_x, rects[r].dst_y, },
@@ -341,8 +352,10 @@ radv_meta_blit2d(struct radv_cmd_buffer *cmd_buffer,
 		 unsigned num_rects,
 		 struct radv_meta_blit2d_rect *rects)
 {
+	bool use_3d = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 &&
+		(src_img && src_img->image->type == VK_IMAGE_TYPE_3D);
 	enum blit2d_src_type src_type = src_buf ? BLIT2D_SRC_TYPE_BUFFER :
-						  BLIT2D_SRC_TYPE_IMAGE;
+		use_3d ? BLIT2D_SRC_TYPE_IMAGE_3D : BLIT2D_SRC_TYPE_IMAGE;
 	radv_meta_blit2d_normal_dst(cmd_buffer, src_img, src_buf, dst,
 				    num_rects, rects, src_type);
 }
@@ -407,29 +420,46 @@ build_nir_vertex_shader(void)

 typedef nir_ssa_def* (*texel_fetch_build_func)(struct nir_builder *,
                                               struct radv_device *,
-                                               nir_ssa_def *);
+                                               nir_ssa_def *, bool);

 static nir_ssa_def *
 build_nir_texel_fetch(struct nir_builder *b, struct radv_device *device,
-                      nir_ssa_def *tex_pos)
+                      nir_ssa_def *tex_pos, bool is_3d)
 {
+	enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
 	const struct glsl_type *sampler_type =
-		glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, GLSL_TYPE_UINT);
+		glsl_sampler_type(dim, false, false, GLSL_TYPE_UINT);
 	nir_variable *sampler = nir_variable_create(b->shader, nir_var_uniform,
 						    sampler_type, "s_tex");
 	sampler->data.descriptor_set = 0;
 	sampler->data.binding = 0;

+	nir_ssa_def *tex_pos_3d = NULL;
+	if (is_3d) {
+		nir_intrinsic_instr *layer = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
+		nir_intrinsic_set_base(layer, 16);
+		nir_intrinsic_set_range(layer, 4);
+		layer->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
+		layer->num_components = 1;
+		nir_ssa_dest_init(&layer->instr, &layer->dest, 1, 32, "layer");
+		nir_builder_instr_insert(b, &layer->instr);
+
+		nir_ssa_def *chans[3];
+		chans[0] = nir_channel(b, tex_pos, 0);
+		chans[1] = nir_channel(b, tex_pos, 1);
+		chans[2] = &layer->dest.ssa;
+		tex_pos_3d = nir_vec(b, chans, 3);
+	}
 	nir_tex_instr *tex = nir_tex_instr_create(b->shader, 2);
-	tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
+	tex->sampler_dim = dim;
 	tex->op = nir_texop_txf;
 	tex->src[0].src_type = nir_tex_src_coord;
-	tex->src[0].src = nir_src_for_ssa(tex_pos);
+	tex->src[0].src = nir_src_for_ssa(is_3d ? tex_pos_3d : tex_pos);
 	tex->src[1].src_type = nir_tex_src_lod;
 	tex->src[1].src = nir_src_for_ssa(nir_imm_int(b, 0));
 	tex->dest_type = nir_type_uint;
 	tex->is_array = false;
-	tex->coord_components = 2;
+	tex->coord_components = is_3d ? 3 : 2;
 	tex->texture = nir_deref_var_create(tex, sampler);
 	tex->sampler = NULL;

@@ -442,7 +472,7 @@ build_nir_texel_fetch(struct nir_builder *b, struct radv_device *device,

 static nir_ssa_def *
 build_nir_buffer_fetch(struct nir_builder *b, struct radv_device *device,
-                      nir_ssa_def *tex_pos)
+		       nir_ssa_def *tex_pos, bool is_3d)
 {
 	const struct glsl_type *sampler_type =
 		glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_UINT);
@@ -490,7 +520,7 @@ static const VkPipelineVertexInputStateCreateInfo normal_vi_create_info = {

 static nir_shader *
 build_nir_copy_fragment_shader(struct radv_device *device,
-                               texel_fetch_build_func txf_func, const char* name)
+                               texel_fetch_build_func txf_func, const char* name, bool is_3d)
 {
 	const struct glsl_type *vec4 = glsl_vec4_type();
 	const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
@@ -511,7 +541,7 @@ build_nir_copy_fragment_shader(struct radv_device *device,
 	unsigned swiz[4] = { 0, 1 };
 	nir_ssa_def *tex_pos = nir_swizzle(&b, pos_int, swiz, 2, false);

-	nir_ssa_def *color = txf_func(&b, device, tex_pos);
+	nir_ssa_def *color = txf_func(&b, device, tex_pos, is_3d);
 	nir_store_var(&b, color_out, color, 0xf);

 	return b.shader;
@@ -519,7 +549,7 @@ build_nir_copy_fragment_shader(struct radv_device *device,

 static nir_shader *
 build_nir_copy_fragment_shader_depth(struct radv_device *device,
-				     texel_fetch_build_func txf_func, const char* name)
+				     texel_fetch_build_func txf_func, const char* name, bool is_3d)
 {
 	const struct glsl_type *vec4 = glsl_vec4_type();
 	const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
@@ -540,7 +570,7 @@ build_nir_copy_fragment_shader_depth(struct radv_device *device,
 	unsigned swiz[4] = { 0, 1 };
 	nir_ssa_def *tex_pos = nir_swizzle(&b, pos_int, swiz, 2, false);

-	nir_ssa_def *color = txf_func(&b, device, tex_pos);
+	nir_ssa_def *color = txf_func(&b, device, tex_pos, is_3d);
 	nir_store_var(&b, color_out, color, 0x1);

 	return b.shader;
@@ -548,7 +578,7 @@ build_nir_copy_fragment_shader_depth(struct radv_device *device,

 static nir_shader *
 build_nir_copy_fragment_shader_stencil(struct radv_device *device,
-				       texel_fetch_build_func txf_func, const char* name)
+				       texel_fetch_build_func txf_func, const char* name, bool is_3d)
 {
 	const struct glsl_type *vec4 = glsl_vec4_type();
 	const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
@@ -569,7 +599,7 @@ build_nir_copy_fragment_shader_stencil(struct radv_device *device,
 	unsigned swiz[4] = { 0, 1 };
 	nir_ssa_def *tex_pos = nir_swizzle(&b, pos_int, swiz, 2, false);

-	nir_ssa_def *color = txf_func(&b, device, tex_pos);
+	nir_ssa_def *color = txf_func(&b, device, tex_pos, is_3d);
 	nir_store_var(&b, color_out, color, 0x1);

 	return b.shader;
@@ -586,10 +616,12 @@ radv_device_finish_meta_blit2d_state(struct radv_device *device)
 				       &state->alloc);
 	}

-	radv_DestroyRenderPass(radv_device_to_handle(device),
-			       state->blit2d.depth_only_rp, &state->alloc);
-	radv_DestroyRenderPass(radv_device_to_handle(device),
-			       state->blit2d.stencil_only_rp, &state->alloc);
+	for (enum radv_blit_ds_layout j = RADV_BLIT_DS_LAYOUT_TILE_ENABLE; j < RADV_BLIT_DS_LAYOUT_COUNT; j++) {
+		radv_DestroyRenderPass(radv_device_to_handle(device),
+				       state->blit2d.depth_only_rp[j], &state->alloc);
+		radv_DestroyRenderPass(radv_device_to_handle(device),
+				       state->blit2d.stencil_only_rp[j], &state->alloc);
+	}

 	for (unsigned src = 0; src < BLIT2D_NUM_SRC_TYPES; src++) {
 		radv_DestroyPipelineLayout(radv_device_to_handle(device),
@@ -629,6 +661,10 @@ blit2d_init_color_pipeline(struct radv_device *device,
 		src_func = build_nir_texel_fetch;
 		name = "meta_blit2d_image_fs";
 		break;
+	case BLIT2D_SRC_TYPE_IMAGE_3D:
+		src_func = build_nir_texel_fetch;
+		name = "meta_blit3d_image_fs";
+		break;
 	case BLIT2D_SRC_TYPE_BUFFER:
 		src_func = build_nir_buffer_fetch;
 		name = "meta_blit2d_buffer_fs";
@@ -642,7 +678,7 @@ blit2d_init_color_pipeline(struct radv_device *device,
 	struct radv_shader_module fs = { .nir = NULL };


-	fs.nir = build_nir_copy_fragment_shader(device, src_func, name);
+	fs.nir = build_nir_copy_fragment_shader(device, src_func, name, src_type == BLIT2D_SRC_TYPE_IMAGE_3D);
 	vi_create_info = &normal_vi_create_info;

 	struct radv_shader_module vs = {
@@ -788,6 +824,10 @@ blit2d_init_depth_only_pipeline(struct radv_device *device,
 		src_func = build_nir_texel_fetch;
 		name = "meta_blit2d_depth_image_fs";
 		break;
+	case BLIT2D_SRC_TYPE_IMAGE_3D:
+		src_func = build_nir_texel_fetch;
+		name = "meta_blit3d_depth_image_fs";
+		break;
 	case BLIT2D_SRC_TYPE_BUFFER:
 		src_func = build_nir_buffer_fetch;
 		name = "meta_blit2d_depth_buffer_fs";
@@ -800,7 +840,7 @@ blit2d_init_depth_only_pipeline(struct radv_device *device,
 	const VkPipelineVertexInputStateCreateInfo *vi_create_info;
 	struct radv_shader_module fs = { .nir = NULL };

-	fs.nir = build_nir_copy_fragment_shader_depth(device, src_func, name);
+	fs.nir = build_nir_copy_fragment_shader_depth(device, src_func, name, src_type == BLIT2D_SRC_TYPE_IMAGE_3D);
 	vi_create_info = &normal_vi_create_info;

 	struct radv_shader_module vs = {
@@ -823,34 +863,37 @@ blit2d_init_depth_only_pipeline(struct radv_device *device,
 		},
 	};

-	if (!device->meta_state.blit2d.depth_only_rp) {
-		result = radv_CreateRenderPass(radv_device_to_handle(device),
-					       &(VkRenderPassCreateInfo) {
-						       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+	for (enum radv_blit_ds_layout ds_layout = RADV_BLIT_DS_LAYOUT_TILE_ENABLE; ds_layout < RADV_BLIT_DS_LAYOUT_COUNT; ds_layout++) {
+		if (!device->meta_state.blit2d.depth_only_rp[ds_layout]) {
+			VkImageLayout layout = radv_meta_blit_ds_to_layout(ds_layout);
+			result = radv_CreateRenderPass(radv_device_to_handle(device),
+						       &(VkRenderPassCreateInfo) {
+							       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
 							       .attachmentCount = 1,
 							       .pAttachments = &(VkAttachmentDescription) {
-							       .format = VK_FORMAT_D32_SFLOAT,
-							       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
-							       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
-							       .initialLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
-							       .finalLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
-						       },
-						       .subpassCount = 1,
-						       .pSubpasses = &(VkSubpassDescription) {
-						       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
-						       .inputAttachmentCount = 0,
-						       .colorAttachmentCount = 0,
-						       .pColorAttachments = NULL,
-						       .pResolveAttachments = NULL,
-						       .pDepthStencilAttachment = &(VkAttachmentReference) {
-							       .attachment = 0,
-							       .layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
-						       },
-						       .preserveAttachmentCount = 1,
-						       .pPreserveAttachments = (uint32_t[]) { 0 },
-					       },
-								.dependencyCount = 0,
-						 }, &device->meta_state.alloc, &device->meta_state.blit2d.depth_only_rp);
+								       .format = VK_FORMAT_D32_SFLOAT,
+								       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+								       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+								       .initialLayout = layout,
+								       .finalLayout = layout,
+							       },
+							       .subpassCount = 1,
+							       .pSubpasses = &(VkSubpassDescription) {
+								       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+								       .inputAttachmentCount = 0,
+								       .colorAttachmentCount = 0,
+								       .pColorAttachments = NULL,
+								       .pResolveAttachments = NULL,
+								       .pDepthStencilAttachment = &(VkAttachmentReference) {
+									       .attachment = 0,
+									       .layout = layout,
+								       },
+								       .preserveAttachmentCount = 1,
+								       .pPreserveAttachments = (uint32_t[]) { 0 },
+							       },
+							       .dependencyCount = 0,
+							}, &device->meta_state.alloc, &device->meta_state.blit2d.depth_only_rp[ds_layout]);
+		}
 	}

 	const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
@@ -909,7 +952,7 @@ blit2d_init_depth_only_pipeline(struct radv_device *device,
 		},
 		.flags = 0,
 		.layout = device->meta_state.blit2d.p_layouts[src_type],
-		.renderPass = device->meta_state.blit2d.depth_only_rp,
+		.renderPass = device->meta_state.blit2d.depth_only_rp[0],
 		.subpass = 0,
 	};

@@ -943,6 +986,10 @@ blit2d_init_stencil_only_pipeline(struct radv_device *device,
 		src_func = build_nir_texel_fetch;
 		name = "meta_blit2d_stencil_image_fs";
 		break;
+	case BLIT2D_SRC_TYPE_IMAGE_3D:
+		src_func = build_nir_texel_fetch;
+		name = "meta_blit3d_stencil_image_fs";
+		break;
 	case BLIT2D_SRC_TYPE_BUFFER:
 		src_func = build_nir_buffer_fetch;
 		name = "meta_blit2d_stencil_buffer_fs";
@@ -955,7 +1002,7 @@ blit2d_init_stencil_only_pipeline(struct radv_device *device,
 	const VkPipelineVertexInputStateCreateInfo *vi_create_info;
 	struct radv_shader_module fs = { .nir = NULL };

-	fs.nir = build_nir_copy_fragment_shader_stencil(device, src_func, name);
+	fs.nir = build_nir_copy_fragment_shader_stencil(device, src_func, name, src_type == BLIT2D_SRC_TYPE_IMAGE_3D);
 	vi_create_info = &normal_vi_create_info;

 	struct radv_shader_module vs = {
@@ -978,34 +1025,37 @@ blit2d_init_stencil_only_pipeline(struct radv_device *device,
 		},
 	};

-	if (!device->meta_state.blit2d.stencil_only_rp) {
-		result = radv_CreateRenderPass(radv_device_to_handle(device),
-					       &(VkRenderPassCreateInfo) {
-						       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+	for (enum radv_blit_ds_layout ds_layout = RADV_BLIT_DS_LAYOUT_TILE_ENABLE; ds_layout < RADV_BLIT_DS_LAYOUT_COUNT; ds_layout++) {
+		if (!device->meta_state.blit2d.stencil_only_rp[ds_layout]) {
+			VkImageLayout layout = radv_meta_blit_ds_to_layout(ds_layout);
+			result = radv_CreateRenderPass(radv_device_to_handle(device),
+						       &(VkRenderPassCreateInfo) {
+							       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
 							       .attachmentCount = 1,
 							       .pAttachments = &(VkAttachmentDescription) {
-							       .format = VK_FORMAT_S8_UINT,
-							       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
-							       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
-							       .initialLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
-							       .finalLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
-						       },
-						       .subpassCount = 1,
-						       .pSubpasses = &(VkSubpassDescription) {
-						       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
-						       .inputAttachmentCount = 0,
-						       .colorAttachmentCount = 0,
-						       .pColorAttachments = NULL,
-						       .pResolveAttachments = NULL,
-						       .pDepthStencilAttachment = &(VkAttachmentReference) {
-							       .attachment = 0,
-							       .layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
-						       },
-						       .preserveAttachmentCount = 1,
-						       .pPreserveAttachments = (uint32_t[]) { 0 },
-					       },
-								.dependencyCount = 0,
-						 }, &device->meta_state.alloc, &device->meta_state.blit2d.stencil_only_rp);
+								       .format = VK_FORMAT_S8_UINT,
+								       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+								       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+								       .initialLayout = layout,
+								       .finalLayout = layout,
+							       },
+							       .subpassCount = 1,
+							       .pSubpasses = &(VkSubpassDescription) {
+								       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+								       .inputAttachmentCount = 0,
+								       .colorAttachmentCount = 0,
+								       .pColorAttachments = NULL,
+								       .pResolveAttachments = NULL,
+								       .pDepthStencilAttachment = &(VkAttachmentReference) {
+									       .attachment = 0,
+									       .layout = layout,
+								       },
+								       .preserveAttachmentCount = 1,
+								       .pPreserveAttachments = (uint32_t[]) { 0 },
+							       },
+							       .dependencyCount = 0,
+						       }, &device->meta_state.alloc, &device->meta_state.blit2d.stencil_only_rp[ds_layout]);
+		}
 	}

 	const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
@@ -1080,7 +1130,7 @@ blit2d_init_stencil_only_pipeline(struct radv_device *device,
 		},
 		.flags = 0,
 		.layout = device->meta_state.blit2d.p_layouts[src_type],
-		.renderPass = device->meta_state.blit2d.stencil_only_rp,
+		.renderPass = device->meta_state.blit2d.stencil_only_rp[0],
 		.subpass = 0,
 	};

@@ -1120,6 +1170,7 @@ VkResult
 radv_device_init_meta_blit2d_state(struct radv_device *device)
 {
 	VkResult result;
+	bool create_3d = device->physical_device->rad_info.chip_class >= GFX9;

 	const VkPushConstantRange push_constant_ranges[] = {
 		{VK_SHADER_STAGE_VERTEX_BIT, 0, 16},
@@ -1155,6 +1206,37 @@ radv_device_init_meta_blit2d_state(struct radv_device *device)
 	if (result != VK_SUCCESS)
 		goto fail;

+	if (create_3d) {
+		result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
+							&(VkDescriptorSetLayoutCreateInfo) {
+								.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+									.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+									.bindingCount = 1,
+									.pBindings = (VkDescriptorSetLayoutBinding[]) {
+									{
+										.binding = 0,
+										.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+										.descriptorCount = 1,
+										.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+										.pImmutableSamplers = NULL
+									},
+								}
+							}, &device->meta_state.alloc, &device->meta_state.blit2d.ds_layouts[BLIT2D_SRC_TYPE_IMAGE_3D]);
+		if (result != VK_SUCCESS)
+			goto fail;
+		result = radv_CreatePipelineLayout(radv_device_to_handle(device),
+						   &(VkPipelineLayoutCreateInfo) {
+							   .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+								   .setLayoutCount = 1,
+								   .pSetLayouts = &device->meta_state.blit2d.ds_layouts[BLIT2D_SRC_TYPE_IMAGE_3D],
+								   .pushConstantRangeCount = 2,
+								   .pPushConstantRanges = push_constant_ranges,
+								   },
+						   &device->meta_state.alloc, &device->meta_state.blit2d.p_layouts[BLIT2D_SRC_TYPE_IMAGE_3D]);
+		if (result != VK_SUCCESS)
+			goto fail;
+	}
+
 	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
 						&(VkDescriptorSetLayoutCreateInfo) {
 							.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
@@ -1187,6 +1269,8 @@ radv_device_init_meta_blit2d_state(struct radv_device *device)
 		goto fail;

 	for (unsigned src = 0; src < BLIT2D_NUM_SRC_TYPES; src++) {
+		if (src == BLIT2D_SRC_TYPE_IMAGE_3D && !create_3d)
+			continue;
 		for (unsigned j = 0; j < ARRAY_SIZE(pipeline_formats); ++j) {
 			result = blit2d_init_color_pipeline(device, src, pipeline_formats[j]);
 			if (result != VK_SUCCESS)
--- a/src/amd/vulkan/radv_meta_bufimage.c
+++ b/src/amd/vulkan/radv_meta_bufimage.c
@@ -29,11 +29,15 @@
 * Compute queue: implementation also of buffer->image, image->image, and image clear.
 */

+/* GFX9 needs to use a 3D sampler to access 3D resources, so the shader has the options
+ * for that.
+ */
 static nir_shader *
-build_nir_itob_compute_shader(struct radv_device *dev)
+build_nir_itob_compute_shader(struct radv_device *dev, bool is_3d)
 {
 	nir_builder b;
-	const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
+	enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
+	const struct glsl_type *sampler_type = glsl_sampler_type(dim,
 								 false,
 								 false,
 								 GLSL_TYPE_FLOAT);
@@ -42,7 +46,7 @@ build_nir_itob_compute_shader(struct radv_device *dev)
 							     false,
 							     GLSL_TYPE_FLOAT);
 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
-	b.shader->info.name = ralloc_strdup(b.shader, "meta_itob_cs");
+	b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_itob_cs_3d" : "meta_itob_cs");
 	b.shader->info.cs.local_size[0] = 16;
 	b.shader->info.cs.local_size[1] = 16;
 	b.shader->info.cs.local_size[2] = 1;
@@ -69,32 +73,31 @@ build_nir_itob_compute_shader(struct radv_device *dev)

 	nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
 	nir_intrinsic_set_base(offset, 0);
-	nir_intrinsic_set_range(offset, 12);
+	nir_intrinsic_set_range(offset, 16);
 	offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
-	offset->num_components = 2;
-	nir_ssa_dest_init(&offset->instr, &offset->dest, 2, 32, "offset");
+	offset->num_components = is_3d ? 3 : 2;
+	nir_ssa_dest_init(&offset->instr, &offset->dest, is_3d ? 3 : 2, 32, "offset");
 	nir_builder_instr_insert(&b, &offset->instr);

 	nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
 	nir_intrinsic_set_base(stride, 0);
-	nir_intrinsic_set_range(stride, 12);
-	stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
+	nir_intrinsic_set_range(stride, 16);
+	stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
 	stride->num_components = 1;
 	nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
 	nir_builder_instr_insert(&b, &stride->instr);

 	nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa);
-
 	nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);
-	tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
+	tex->sampler_dim = dim;
 	tex->op = nir_texop_txf;
 	tex->src[0].src_type = nir_tex_src_coord;
-	tex->src[0].src = nir_src_for_ssa(nir_channels(&b, img_coord, 0x3));
+	tex->src[0].src = nir_src_for_ssa(nir_channels(&b, img_coord, is_3d ? 0x7 : 0x3));
 	tex->src[1].src_type = nir_tex_src_lod;
 	tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
 	tex->dest_type = nir_type_float;
 	tex->is_array = false;
-	tex->coord_components = 2;
+	tex->coord_components = is_3d ? 3 : 2;
 	tex->texture = nir_deref_var_create(tex, input_img);
 	tex->sampler = NULL;

@@ -126,8 +129,11 @@ radv_device_init_meta_itob_state(struct radv_device *device)
 {
 	VkResult result;
 	struct radv_shader_module cs = { .nir = NULL };
+	struct radv_shader_module cs_3d = { .nir = NULL };

-	cs.nir = build_nir_itob_compute_shader(device);
+	cs.nir = build_nir_itob_compute_shader(device, false);
+	if (device->physical_device->rad_info.chip_class >= GFX9)
+		cs_3d.nir = build_nir_itob_compute_shader(device, true);

 	/*
 	 * two descriptors one for the image being sampled
@@ -168,7 +174,7 @@ radv_device_init_meta_itob_state(struct radv_device *device)
 		.setLayoutCount = 1,
 		.pSetLayouts = &device->meta_state.itob.img_ds_layout,
 		.pushConstantRangeCount = 1,
-		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 12},
+		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
 	};

 	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
@@ -202,10 +208,36 @@ radv_device_init_meta_itob_state(struct radv_device *device)
 	if (result != VK_SUCCESS)
 		goto fail;

+	if (device->physical_device->rad_info.chip_class >= GFX9) {
+		VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
+			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+			.stage = VK_SHADER_STAGE_COMPUTE_BIT,
+			.module = radv_shader_module_to_handle(&cs_3d),
+			.pName = "main",
+			.pSpecializationInfo = NULL,
+		};
+
+		VkComputePipelineCreateInfo vk_pipeline_info_3d = {
+			.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+			.stage = pipeline_shader_stage_3d,
+			.flags = 0,
+			.layout = device->meta_state.itob.img_p_layout,
+		};
+
+		result = radv_CreateComputePipelines(radv_device_to_handle(device),
+						     radv_pipeline_cache_to_handle(&device->meta_state.cache),
+						     1, &vk_pipeline_info_3d, NULL,
+						     &device->meta_state.itob.pipeline_3d);
+		if (result != VK_SUCCESS)
+			goto fail;
+		ralloc_free(cs_3d.nir);
+	}
 	ralloc_free(cs.nir);
+
 	return VK_SUCCESS;
 fail:
 	ralloc_free(cs.nir);
+	ralloc_free(cs_3d.nir);
 	return result;
 }

@@ -221,22 +253,26 @@ radv_device_finish_meta_itob_state(struct radv_device *device)
 					&state->alloc);
 	radv_DestroyPipeline(radv_device_to_handle(device),
 			     state->itob.pipeline, &state->alloc);
+	if (device->physical_device->rad_info.chip_class >= GFX9)
+		radv_DestroyPipeline(radv_device_to_handle(device),
+				     state->itob.pipeline_3d, &state->alloc);
 }

 static nir_shader *
-build_nir_btoi_compute_shader(struct radv_device *dev)
+build_nir_btoi_compute_shader(struct radv_device *dev, bool is_3d)
 {
 	nir_builder b;
+	enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
 	const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
 							     false,
 							     false,
 							     GLSL_TYPE_FLOAT);
-	const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
+	const struct glsl_type *img_type = glsl_sampler_type(dim,
 							     false,
 							     false,
 							     GLSL_TYPE_FLOAT);
 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
-	b.shader->info.name = ralloc_strdup(b.shader, "meta_btoi_cs");
+	b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_btoi_cs_3d" : "meta_btoi_cs");
 	b.shader->info.cs.local_size[0] = 16;
 	b.shader->info.cs.local_size[1] = 16;
 	b.shader->info.cs.local_size[2] = 1;
@@ -261,16 +297,16 @@ build_nir_btoi_compute_shader(struct radv_device *dev)

 	nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
 	nir_intrinsic_set_base(offset, 0);
-	nir_intrinsic_set_range(offset, 12);
+	nir_intrinsic_set_range(offset, 16);
 	offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
-	offset->num_components = 2;
-	nir_ssa_dest_init(&offset->instr, &offset->dest, 2, 32, "offset");
+	offset->num_components = is_3d ? 3 : 2;
+	nir_ssa_dest_init(&offset->instr, &offset->dest, is_3d ? 3 : 2, 32, "offset");
 	nir_builder_instr_insert(&b, &offset->instr);

 	nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
 	nir_intrinsic_set_base(stride, 0);
-	nir_intrinsic_set_range(stride, 12);
-	stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
+	nir_intrinsic_set_range(stride, 16);
+	stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
 	stride->num_components = 1;
 	nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
 	nir_builder_instr_insert(&b, &stride->instr);
@@ -318,9 +354,10 @@ radv_device_init_meta_btoi_state(struct radv_device *device)
 {
 	VkResult result;
 	struct radv_shader_module cs = { .nir = NULL };
-
-	cs.nir = build_nir_btoi_compute_shader(device);
-
+	struct radv_shader_module cs_3d = { .nir = NULL };
+	cs.nir = build_nir_btoi_compute_shader(device, false);
+	if (device->physical_device->rad_info.chip_class >= GFX9)
+		cs_3d.nir = build_nir_btoi_compute_shader(device, true);
 	/*
 	 * two descriptors one for the image being sampled
 	 * one for the buffer being written.
@@ -360,7 +397,7 @@ radv_device_init_meta_btoi_state(struct radv_device *device)
 		.setLayoutCount = 1,
 		.pSetLayouts = &device->meta_state.btoi.img_ds_layout,
 		.pushConstantRangeCount = 1,
-		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 12},
+		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
 	};

 	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
@@ -394,9 +431,33 @@ radv_device_init_meta_btoi_state(struct radv_device *device)
 	if (result != VK_SUCCESS)
 		goto fail;

+	if (device->physical_device->rad_info.chip_class >= GFX9) {
+		VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
+			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+			.stage = VK_SHADER_STAGE_COMPUTE_BIT,
+			.module = radv_shader_module_to_handle(&cs_3d),
+			.pName = "main",
+			.pSpecializationInfo = NULL,
+		};
+
+		VkComputePipelineCreateInfo vk_pipeline_info_3d = {
+			.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+			.stage = pipeline_shader_stage_3d,
+			.flags = 0,
+			.layout = device->meta_state.btoi.img_p_layout,
+		};
+
+		result = radv_CreateComputePipelines(radv_device_to_handle(device),
+						     radv_pipeline_cache_to_handle(&device->meta_state.cache),
+						     1, &vk_pipeline_info_3d, NULL,
+						     &device->meta_state.btoi.pipeline_3d);
+		ralloc_free(cs_3d.nir);
+	}
 	ralloc_free(cs.nir);
+
 	return VK_SUCCESS;
 fail:
+	ralloc_free(cs_3d.nir);
 	ralloc_free(cs.nir);
 	return result;
 }
@@ -413,22 +474,25 @@ radv_device_finish_meta_btoi_state(struct radv_device *device)
 					&state->alloc);
 	radv_DestroyPipeline(radv_device_to_handle(device),
 			     state->btoi.pipeline, &state->alloc);
+	radv_DestroyPipeline(radv_device_to_handle(device),
+			     state->btoi.pipeline_3d, &state->alloc);
 }

 static nir_shader *
-build_nir_itoi_compute_shader(struct radv_device *dev)
+build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d)
 {
 	nir_builder b;
-	const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
+	enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
+	const struct glsl_type *buf_type = glsl_sampler_type(dim,
 							     false,
 							     false,
 							     GLSL_TYPE_FLOAT);
-	const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
+	const struct glsl_type *img_type = glsl_sampler_type(dim,
 							     false,
 							     false,
 							     GLSL_TYPE_FLOAT);
 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
-	b.shader->info.name = ralloc_strdup(b.shader, "meta_itoi_cs");
+	b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_itoi_cs_3d" : "meta_itoi_cs");
 	b.shader->info.cs.local_size[0] = 16;
 	b.shader->info.cs.local_size[1] = 16;
 	b.shader->info.cs.local_size[2] = 1;
@@ -453,18 +517,18 @@ build_nir_itoi_compute_shader(struct radv_device *dev)

 	nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
 	nir_intrinsic_set_base(src_offset, 0);
-	nir_intrinsic_set_range(src_offset, 16);
+	nir_intrinsic_set_range(src_offset, 24);
 	src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
-	src_offset->num_components = 2;
-	nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 2, 32, "src_offset");
+	src_offset->num_components = is_3d ? 3 : 2;
+	nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, is_3d ? 3 : 2, 32, "src_offset");
 	nir_builder_instr_insert(&b, &src_offset->instr);

 	nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
 	nir_intrinsic_set_base(dst_offset, 0);
-	nir_intrinsic_set_range(dst_offset, 16);
-	dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
-	dst_offset->num_components = 2;
-	nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, 2, 32, "dst_offset");
+	nir_intrinsic_set_range(dst_offset, 24);
+	dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
+	dst_offset->num_components = is_3d ? 3 : 2;
+	nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, is_3d ? 3 : 2, 32, "dst_offset");
 	nir_builder_instr_insert(&b, &dst_offset->instr);

 	nir_ssa_def *src_coord = nir_iadd(&b, global_id, &src_offset->dest.ssa);
@@ -472,15 +536,15 @@ build_nir_itoi_compute_shader(struct radv_device *dev)
 	nir_ssa_def *dst_coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);

 	nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);
-	tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
+	tex->sampler_dim = dim;
 	tex->op = nir_texop_txf;
 	tex->src[0].src_type = nir_tex_src_coord;
-	tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, 3));
+	tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, is_3d ? 0x7 : 0x3));
 	tex->src[1].src_type = nir_tex_src_lod;
 	tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
 	tex->dest_type = nir_type_float;
 	tex->is_array = false;
-	tex->coord_components = 2;
+	tex->coord_components = is_3d ? 3 : 2;
 	tex->texture = nir_deref_var_create(tex, input_img);
 	tex->sampler = NULL;

@@ -504,9 +568,10 @@ radv_device_init_meta_itoi_state(struct radv_device *device)
 {
 	VkResult result;
 	struct radv_shader_module cs = { .nir = NULL };
-
-	cs.nir = build_nir_itoi_compute_shader(device);
-
+	struct radv_shader_module cs_3d = { .nir = NULL };
+	cs.nir = build_nir_itoi_compute_shader(device, false);
+	if (device->physical_device->rad_info.chip_class >= GFX9)
+		cs_3d.nir = build_nir_itoi_compute_shader(device, true);
 	/*
 	 * two descriptors one for the image being sampled
 	 * one for the buffer being written.
@@ -546,7 +611,7 @@ radv_device_init_meta_itoi_state(struct radv_device *device)
 		.setLayoutCount = 1,
 		.pSetLayouts = &device->meta_state.itoi.img_ds_layout,
 		.pushConstantRangeCount = 1,
-		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
+		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24},
 	};

 	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
@@ -580,10 +645,35 @@ radv_device_init_meta_itoi_state(struct radv_device *device)
 	if (result != VK_SUCCESS)
 		goto fail;

+	if (device->physical_device->rad_info.chip_class >= GFX9) {
+		VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
+			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+.stage = VK_SHADER_STAGE_COMPUTE_BIT,
+			.module = radv_shader_module_to_handle(&cs_3d),
+			.pName = "main",
+			.pSpecializationInfo = NULL,
+		};
+
+		VkComputePipelineCreateInfo vk_pipeline_info_3d = {
+			.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+			.stage = pipeline_shader_stage_3d,
+			.flags = 0,
+			.layout = device->meta_state.itoi.img_p_layout,
+		};
+
+		result = radv_CreateComputePipelines(radv_device_to_handle(device),
+						     radv_pipeline_cache_to_handle(&device->meta_state.cache),
+						     1, &vk_pipeline_info_3d, NULL,
+						     &device->meta_state.itoi.pipeline_3d);
+
+		ralloc_free(cs_3d.nir);
+	}
 	ralloc_free(cs.nir);
+
 	return VK_SUCCESS;
 fail:
 	ralloc_free(cs.nir);
+	ralloc_free(cs_3d.nir);
 	return result;
 }

@@ -599,18 +689,22 @@ radv_device_finish_meta_itoi_state(struct radv_device *device)
 					&state->alloc);
 	radv_DestroyPipeline(radv_device_to_handle(device),
 			     state->itoi.pipeline, &state->alloc);
+	if (device->physical_device->rad_info.chip_class >= GFX9)
+		radv_DestroyPipeline(radv_device_to_handle(device),
+				     state->itoi.pipeline_3d, &state->alloc);
 }

 static nir_shader *
-build_nir_cleari_compute_shader(struct radv_device *dev)
+build_nir_cleari_compute_shader(struct radv_device *dev, bool is_3d)
 {
 	nir_builder b;
-	const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
+	enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
+	const struct glsl_type *img_type = glsl_sampler_type(dim,
 							     false,
 							     false,
 							     GLSL_TYPE_FLOAT);
 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
-	b.shader->info.name = ralloc_strdup(b.shader, "meta_cleari_cs");
+	b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_cleari_cs_3d" : "meta_cleari_cs");
 	b.shader->info.cs.local_size[0] = 16;
 	b.shader->info.cs.local_size[1] = 16;
 	b.shader->info.cs.local_size[2] = 1;
@@ -631,12 +725,29 @@ build_nir_cleari_compute_shader(struct radv_device *dev)

 	nir_intrinsic_instr *clear_val = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
 	nir_intrinsic_set_base(clear_val, 0);
-	nir_intrinsic_set_range(clear_val, 16);
+	nir_intrinsic_set_range(clear_val, 20);
 	clear_val->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
 	clear_val->num_components = 4;
 	nir_ssa_dest_init(&clear_val->instr, &clear_val->dest, 4, 32, "clear_value");
 	nir_builder_instr_insert(&b, &clear_val->instr);

+	nir_intrinsic_instr *layer = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
+	nir_intrinsic_set_base(layer, 0);
+	nir_intrinsic_set_range(layer, 20);
+	layer->src[0] = nir_src_for_ssa(nir_imm_int(&b, 16));
+	layer->num_components = 1;
+	nir_ssa_dest_init(&layer->instr, &layer->dest, 1, 32, "layer");
+	nir_builder_instr_insert(&b, &layer->instr);
+
+	nir_ssa_def *global_z = nir_iadd(&b, nir_channel(&b, global_id, 2), &layer->dest.ssa);
+
+	nir_ssa_def *comps[4];
+	comps[0] = nir_channel(&b, global_id, 0);
+	comps[1] = nir_channel(&b, global_id, 1);
+	comps[2] = global_z;
+	comps[3] = nir_imm_int(&b, 0);
+	global_id = nir_vec(&b, comps, 4);
+
 	nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_store);
 	store->src[0] = nir_src_for_ssa(global_id);
 	store->src[1] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
@@ -652,8 +763,10 @@ radv_device_init_meta_cleari_state(struct radv_device *device)
 {
 	VkResult result;
 	struct radv_shader_module cs = { .nir = NULL };
-
-	cs.nir = build_nir_cleari_compute_shader(device);
+	struct radv_shader_module cs_3d = { .nir = NULL };
+	cs.nir = build_nir_cleari_compute_shader(device, false);
+	if (device->physical_device->rad_info.chip_class >= GFX9)
+		cs_3d.nir = build_nir_cleari_compute_shader(device, true);

 	/*
 	 * two descriptors one for the image being sampled
@@ -687,7 +800,7 @@ radv_device_init_meta_cleari_state(struct radv_device *device)
 		.setLayoutCount = 1,
 		.pSetLayouts = &device->meta_state.cleari.img_ds_layout,
 		.pushConstantRangeCount = 1,
-		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
+		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 20},
 	};

 	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
@@ -721,10 +834,38 @@ radv_device_init_meta_cleari_state(struct radv_device *device)
 	if (result != VK_SUCCESS)
 		goto fail;

+
+	if (device->physical_device->rad_info.chip_class >= GFX9) {
+		/* compute shader */
+		VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
+			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+			.stage = VK_SHADER_STAGE_COMPUTE_BIT,
+			.module = radv_shader_module_to_handle(&cs_3d),
+			.pName = "main",
+			.pSpecializationInfo = NULL,
+		};
+
+		VkComputePipelineCreateInfo vk_pipeline_info_3d = {
+			.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+			.stage = pipeline_shader_stage_3d,
+			.flags = 0,
+			.layout = device->meta_state.cleari.img_p_layout,
+		};
+
+		result = radv_CreateComputePipelines(radv_device_to_handle(device),
+						     radv_pipeline_cache_to_handle(&device->meta_state.cache),
+						     1, &vk_pipeline_info_3d, NULL,
+						     &device->meta_state.cleari.pipeline_3d);
+		if (result != VK_SUCCESS)
+			goto fail;
+
+		ralloc_free(cs_3d.nir);
+	}
 	ralloc_free(cs.nir);
 	return VK_SUCCESS;
 fail:
 	ralloc_free(cs.nir);
+	ralloc_free(cs_3d.nir);
 	return result;
 }

@@ -740,6 +881,8 @@ radv_device_finish_meta_cleari_state(struct radv_device *device)
 					&state->alloc);
 	radv_DestroyPipeline(radv_device_to_handle(device),
 			     state->cleari.pipeline, &state->alloc);
+	radv_DestroyPipeline(radv_device_to_handle(device),
+			     state->cleari.pipeline_3d, &state->alloc);
 }

 void
@@ -758,21 +901,23 @@ radv_device_init_meta_bufimage_state(struct radv_device *device)

 	result = radv_device_init_meta_itob_state(device);
 	if (result != VK_SUCCESS)
-		return result;
+		goto fail_itob;

 	result = radv_device_init_meta_btoi_state(device);
 	if (result != VK_SUCCESS)
-		goto fail_itob;
+		goto fail_btoi;

 	result = radv_device_init_meta_itoi_state(device);
 	if (result != VK_SUCCESS)
-		goto fail_btoi;
+		goto fail_itoi;

 	result = radv_device_init_meta_cleari_state(device);
 	if (result != VK_SUCCESS)
-		goto fail_itoi;
+		goto fail_cleari;

 	return VK_SUCCESS;
+fail_cleari:
+	radv_device_finish_meta_cleari_state(device);
 fail_itoi:
 	radv_device_finish_meta_itoi_state(device);
 fail_btoi:
@@ -787,12 +932,13 @@ create_iview(struct radv_cmd_buffer *cmd_buffer,
             struct radv_meta_blit2d_surf *surf,
             struct radv_image_view *iview)
 {
-
+	VkImageViewType view_type = cmd_buffer->device->physical_device->rad_info.chip_class < GFX9 ? VK_IMAGE_VIEW_TYPE_2D :
+		radv_meta_get_view_type(surf->image);
 	radv_image_view_init(iview, cmd_buffer->device,
 			     &(VkImageViewCreateInfo) {
 				     .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
 					     .image = radv_image_to_handle(surf->image),
-					     .viewType = VK_IMAGE_VIEW_TYPE_2D,
+					     .viewType = view_type,
 					     .format = surf->format,
 					     .subresourceRange = {
 					     .aspectMask = surf->aspect_mask,
@@ -877,19 +1023,23 @@ radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
 	create_bview(cmd_buffer, dst->buffer, dst->offset, dst->format, &dst_view);
 	itob_bind_descriptors(cmd_buffer, &src_view, &dst_view);

+	if (device->physical_device->rad_info.chip_class >= GFX9 &&
+	    src->image->type == VK_IMAGE_TYPE_3D)
+		pipeline = cmd_buffer->device->meta_state.itob.pipeline_3d;

 	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
 			     VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);

 	for (unsigned r = 0; r < num_rects; ++r) {
-		unsigned push_constants[3] = {
+		unsigned push_constants[4] = {
 			rects[r].src_x,
 			rects[r].src_y,
+			src->layer,
 			dst->pitch
 		};
 		radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
 				      device->meta_state.itob.img_p_layout,
-				      VK_SHADER_STAGE_COMPUTE_BIT, 0, 12,
+				      VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
 				      push_constants);

 		radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
@@ -950,18 +1100,22 @@ radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
 	create_iview(cmd_buffer, dst, &dst_view);
 	btoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);

+	if (device->physical_device->rad_info.chip_class >= GFX9 &&
+	    dst->image->type == VK_IMAGE_TYPE_3D)
+		pipeline = cmd_buffer->device->meta_state.btoi.pipeline_3d;
 	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
 			     VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);

 	for (unsigned r = 0; r < num_rects; ++r) {
-		unsigned push_constants[3] = {
+		unsigned push_constants[4] = {
 			rects[r].dst_x,
 			rects[r].dst_y,
-			src->pitch
+			dst->layer,
+			src->pitch,
 		};
 		radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
 				      device->meta_state.btoi.img_p_layout,
-				      VK_SHADER_STAGE_COMPUTE_BIT, 0, 12,
+				      VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
 				      push_constants);

 		radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
@@ -1028,19 +1182,24 @@ radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer,

 	itoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);

+	if (device->physical_device->rad_info.chip_class >= GFX9 &&
+	    src->image->type == VK_IMAGE_TYPE_3D)
+		pipeline = cmd_buffer->device->meta_state.itoi.pipeline_3d;
 	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
 			     VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);

 	for (unsigned r = 0; r < num_rects; ++r) {
-		unsigned push_constants[4] = {
+		unsigned push_constants[6] = {
 			rects[r].src_x,
 			rects[r].src_y,
+			src->layer,
 			rects[r].dst_x,
 			rects[r].dst_y,
+			dst->layer,
 		};
 		radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
 				      device->meta_state.itoi.img_p_layout,
-				      VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
+				      VK_SHADER_STAGE_COMPUTE_BIT, 0, 24,
 				      push_constants);

 		radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
@@ -1088,19 +1247,24 @@ radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer,
 	create_iview(cmd_buffer, dst, &dst_iview);
 	cleari_bind_descriptors(cmd_buffer, &dst_iview);

+	if (device->physical_device->rad_info.chip_class >= GFX9 &&
+	    dst->image->type == VK_IMAGE_TYPE_3D)
+		pipeline = cmd_buffer->device->meta_state.cleari.pipeline_3d;
+
 	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
 			     VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);

-	unsigned push_constants[4] = {
+	unsigned push_constants[5] = {
 		clear_color->uint32[0],
 		clear_color->uint32[1],
 		clear_color->uint32[2],
 		clear_color->uint32[3],
+		dst->layer,
 	};

 	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
 			      device->meta_state.cleari.img_p_layout,
-			      VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
+			      VK_SHADER_STAGE_COMPUTE_BIT, 0, 20,
 			      push_constants);

 	radv_unaligned_dispatch(cmd_buffer, dst->image->info.width, dst->image->info.height, 1);
--- a/src/amd/vulkan/radv_meta_clear.c
+++ b/src/amd/vulkan/radv_meta_clear.c
@@ -628,6 +628,7 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
 			      VK_SHADER_STAGE_VERTEX_BIT, 0, 4,
 			      &clear_value.depth);

+	uint32_t prev_reference = cmd_buffer->state.dynamic.stencil_reference.front;
 	if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
 		radv_CmdSetStencilReference(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT,
 						  clear_value.stencil);
@@ -662,6 +663,11 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
 	radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect);

 	radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer);
+
+	if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
+		radv_CmdSetStencilReference(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT,
+						  prev_reference);
+	}
 }

 static bool
--- a/src/amd/vulkan/radv_meta_copy.c
+++ b/src/amd/vulkan/radv_meta_copy.c
@@ -37,10 +37,11 @@ meta_image_block_size(const struct radv_image *image)
 */
 static struct VkExtent3D
 meta_region_extent_el(const struct radv_image *image,
+                      const VkImageType imageType,
                      const struct VkExtent3D *extent)
 {
 	const VkExtent3D block = meta_image_block_size(image);
-	return radv_sanitize_image_extent(image->type, (VkExtent3D) {
+	return radv_sanitize_image_extent(imageType, (VkExtent3D) {
 			.width  = DIV_ROUND_UP(extent->width , block.width),
 				.height = DIV_ROUND_UP(extent->height, block.height),
 				.depth  = DIV_ROUND_UP(extent->depth , block.depth),
@@ -79,6 +80,7 @@ vk_format_for_size(int bs)

 static struct radv_meta_blit2d_surf
 blit_surf_for_image_level_layer(struct radv_image *image,
+				VkImageLayout layout,
 				const VkImageSubresourceLayers *subres)
 {
 	VkFormat format = image->vk_format;
@@ -87,7 +89,8 @@ blit_surf_for_image_level_layer(struct radv_image *image,
 	else if (subres->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)
 		format = vk_format_stencil_only(format);

-	if (!image->surface.dcc_size)
+	if (!image->surface.dcc_size &&
+	    !(image->surface.htile_size && image->tc_compatible_htile))
 		format = vk_format_for_size(vk_format_get_blocksize(format));

 	return (struct radv_meta_blit2d_surf) {
@@ -97,6 +100,7 @@ blit_surf_for_image_level_layer(struct radv_image *image,
 		.layer = subres->baseArrayLayer,
 		.image = image,
 		.aspect_mask = subres->aspectMask,
+		.current_layout = layout,
 	};
 }

@@ -104,6 +108,7 @@ static void
 meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer,
                          struct radv_buffer* buffer,
                          struct radv_image* image,
+			  VkImageLayout layout,
                          uint32_t regionCount,
                          const VkBufferImageCopy* pRegions)
 {
@@ -142,11 +147,11 @@ meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer,
 			pRegions[r].bufferImageHeight : pRegions[r].imageExtent.height,
 		};
 		const VkExtent3D buf_extent_el =
-			meta_region_extent_el(image, &bufferExtent);
+			meta_region_extent_el(image, image->type, &bufferExtent);

 		/* Start creating blit rect */
 		const VkExtent3D img_extent_el =
-			meta_region_extent_el(image, &pRegions[r].imageExtent);
+			meta_region_extent_el(image, image->type, &pRegions[r].imageExtent);
 		struct radv_meta_blit2d_rect rect = {
 			.width = img_extent_el.width,
 			.height =  img_extent_el.height,
@@ -155,6 +160,7 @@ meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer,
 		/* Create blit surfaces */
 		struct radv_meta_blit2d_surf img_bsurf =
 			blit_surf_for_image_level_layer(image,
+							layout,
 							&pRegions[r].imageSubresource);

 		struct radv_meta_blit2d_buffer buf_bsurf = {
@@ -214,7 +220,7 @@ void radv_CmdCopyBufferToImage(
 	RADV_FROM_HANDLE(radv_image, dest_image, destImage);
 	RADV_FROM_HANDLE(radv_buffer, src_buffer, srcBuffer);

-	meta_copy_buffer_to_image(cmd_buffer, src_buffer, dest_image,
+	meta_copy_buffer_to_image(cmd_buffer, src_buffer, dest_image, destImageLayout,
 				  regionCount, pRegions);
 }

@@ -222,6 +228,7 @@ static void
 meta_copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
                          struct radv_buffer* buffer,
                          struct radv_image* image,
+			  VkImageLayout layout,
                          uint32_t regionCount,
                          const VkBufferImageCopy* pRegions)
 {
@@ -253,11 +260,11 @@ meta_copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
 			pRegions[r].bufferImageHeight : pRegions[r].imageExtent.height,
 		};
 		const VkExtent3D buf_extent_el =
-			meta_region_extent_el(image, &bufferExtent);
+			meta_region_extent_el(image, image->type, &bufferExtent);

 		/* Start creating blit rect */
 		const VkExtent3D img_extent_el =
-			meta_region_extent_el(image, &pRegions[r].imageExtent);
+			meta_region_extent_el(image, image->type, &pRegions[r].imageExtent);
 		struct radv_meta_blit2d_rect rect = {
 			.width = img_extent_el.width,
 			.height =  img_extent_el.height,
@@ -266,6 +273,7 @@ meta_copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
 		/* Create blit surfaces */
 		struct radv_meta_blit2d_surf img_info =
 			blit_surf_for_image_level_layer(image,
+							layout,
 							&pRegions[r].imageSubresource);

 		struct radv_meta_blit2d_buffer buf_info = {
@@ -318,13 +326,16 @@ void radv_CmdCopyImageToBuffer(
 	RADV_FROM_HANDLE(radv_buffer, dst_buffer, destBuffer);

 	meta_copy_image_to_buffer(cmd_buffer, dst_buffer, src_image,
+				  srcImageLayout,
 				  regionCount, pRegions);
 }

 static void
 meta_copy_image(struct radv_cmd_buffer *cmd_buffer,
 		struct radv_image *src_image,
+		VkImageLayout src_image_layout,
 		struct radv_image *dest_image,
+		VkImageLayout dest_image_layout,
 		uint32_t regionCount,
 		const VkImageCopy *pRegions)
 {
@@ -351,10 +362,12 @@ meta_copy_image(struct radv_cmd_buffer *cmd_buffer,
 		/* Create blit surfaces */
 		struct radv_meta_blit2d_surf b_src =
 			blit_surf_for_image_level_layer(src_image,
+							src_image_layout,
 							&pRegions[r].srcSubresource);

 		struct radv_meta_blit2d_surf b_dst =
 			blit_surf_for_image_level_layer(dest_image,
+							dest_image_layout,
 							&pRegions[r].dstSubresource);

 		/* for DCC */
@@ -373,8 +386,18 @@ meta_copy_image(struct radv_cmd_buffer *cmd_buffer,
 			meta_region_offset_el(dest_image, &pRegions[r].dstOffset);
 		const VkOffset3D src_offset_el =
 			meta_region_offset_el(src_image, &pRegions[r].srcOffset);
+
+		/*
+		 * From Vulkan 1.0.68, "Copying Data Between Images":
+		 *    "When copying between compressed and uncompressed formats
+		 *     the extent members represent the texel dimensions of the
+		 *     source image and not the destination."
+		 * However, we must use the destination image type to avoid
+		 * clamping depth when copying multiple layers of a 2D image to
+		 * a 3D image.
+		 */
 		const VkExtent3D img_extent_el =
-			meta_region_extent_el(dest_image, &pRegions[r].extent);
+			meta_region_extent_el(src_image, dest_image->type, &pRegions[r].extent);

 		/* Start creating blit rect */
 		struct radv_meta_blit2d_rect rect = {
@@ -382,6 +405,9 @@ meta_copy_image(struct radv_cmd_buffer *cmd_buffer,
 			.height = img_extent_el.height,
 		};

+		if (src_image->type == VK_IMAGE_TYPE_3D)
+			b_src.layer = src_offset_el.z;
+
 		if (dest_image->type == VK_IMAGE_TYPE_3D)
 			b_dst.layer = dst_offset_el.z;

@@ -429,7 +455,9 @@ void radv_CmdCopyImage(
 	RADV_FROM_HANDLE(radv_image, src_image, srcImage);
 	RADV_FROM_HANDLE(radv_image, dest_image, destImage);

-	meta_copy_image(cmd_buffer, src_image, dest_image,
+	meta_copy_image(cmd_buffer,
+			src_image, srcImageLayout,
+			dest_image, destImageLayout,
 			regionCount, pRegions);
 }

@@ -449,6 +477,7 @@ void radv_blit_to_prime_linear(struct radv_cmd_buffer *cmd_buffer,
 	image_copy.extent.height = image->info.height;
 	image_copy.extent.depth = 1;

-	meta_copy_image(cmd_buffer, image, linear_image,
+	meta_copy_image(cmd_buffer, image, VK_IMAGE_LAYOUT_GENERAL, linear_image,
+			VK_IMAGE_LAYOUT_GENERAL,
 			1, &image_copy);
 }
--- a/src/amd/vulkan/radv_meta_decompress.c
+++ b/src/amd/vulkan/radv_meta_decompress.c
@@ -75,11 +75,29 @@ create_pass(struct radv_device *device,
 	return result;
 }

+static VkResult
+create_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout)
+{
+	VkPipelineLayoutCreateInfo pl_create_info = {
+		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+		.setLayoutCount = 0,
+		.pSetLayouts = NULL,
+		.pushConstantRangeCount = 0,
+		.pPushConstantRanges = NULL,
+	};
+
+	return radv_CreatePipelineLayout(radv_device_to_handle(device),
+					 &pl_create_info,
+					 &device->meta_state.alloc,
+					 layout);
+}
+
 static VkResult
 create_pipeline(struct radv_device *device,
                VkShaderModule vs_module_h,
 		uint32_t samples,
 		VkRenderPass pass,
+		VkPipelineLayout layout,
 		VkPipeline *decompress_pipeline,
 		VkPipeline *resummarize_pipeline)
 {
@@ -165,6 +183,7 @@ create_pipeline(struct radv_device *device,
 				VK_DYNAMIC_STATE_SCISSOR,
 			},
 		},
+		.layout = layout,
 		.renderPass = pass,
 		.subpass = 0,
 	};
@@ -212,6 +231,9 @@ radv_device_finish_meta_depth_decomp_state(struct radv_device *device)
 		radv_DestroyRenderPass(radv_device_to_handle(device),
 				       state->depth_decomp[i].pass,
 				       &state->alloc);
+		radv_DestroyPipelineLayout(radv_device_to_handle(device),
+					   state->depth_decomp[i].p_layout,
+					   &state->alloc);
 		radv_DestroyPipeline(radv_device_to_handle(device),
 				     state->depth_decomp[i].decompress_pipeline,
 				     &state->alloc);
@@ -243,8 +265,14 @@ radv_device_init_meta_depth_decomp_state(struct radv_device *device)
 		if (res != VK_SUCCESS)
 			goto fail;

+		res = create_pipeline_layout(device,
+					     &state->depth_decomp[i].p_layout);
+		if (res != VK_SUCCESS)
+			goto fail;
+
 		res = create_pipeline(device, vs_module_h, samples,
 				      state->depth_decomp[i].pass,
+				      state->depth_decomp[i].p_layout,
 				      &state->depth_decomp[i].decompress_pipeline,
 				      &state->depth_decomp[i].resummarize_pipeline);
 		if (res != VK_SUCCESS)
--- a/src/amd/vulkan/radv_meta_fast_clear.c
+++ b/src/amd/vulkan/radv_meta_fast_clear.c
@@ -74,9 +74,27 @@ create_pass(struct radv_device *device)
 	return result;
 }

+static VkResult
+create_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout)
+{
+	VkPipelineLayoutCreateInfo pl_create_info = {
+		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+		.setLayoutCount = 0,
+		.pSetLayouts = NULL,
+		.pushConstantRangeCount = 0,
+		.pPushConstantRanges = NULL,
+	};
+
+	return radv_CreatePipelineLayout(radv_device_to_handle(device),
+					 &pl_create_info,
+					 &device->meta_state.alloc,
+					 layout);
+}
+
 static VkResult
 create_pipeline(struct radv_device *device,
-                VkShaderModule vs_module_h)
+		VkShaderModule vs_module_h,
+		VkPipelineLayout layout)
 {
 	VkResult result;
 	VkDevice device_h = radv_device_to_handle(device);
@@ -173,6 +191,7 @@ create_pipeline(struct radv_device *device,
 								VK_DYNAMIC_STATE_SCISSOR,
 							},
 						},
+					        .layout = layout,
 						.renderPass = device->meta_state.fast_clear_flush.pass,
 						.subpass = 0,
 					       },
@@ -218,6 +237,7 @@ create_pipeline(struct radv_device *device,
 								VK_DYNAMIC_STATE_SCISSOR,
 							},
 						},
+						.layout = layout,
 						.renderPass = device->meta_state.fast_clear_flush.pass,
 						.subpass = 0,
 					       },
@@ -245,6 +265,9 @@ radv_device_finish_meta_fast_clear_flush_state(struct radv_device *device)

 	radv_DestroyRenderPass(radv_device_to_handle(device),
 			       state->fast_clear_flush.pass, &state->alloc);
+	radv_DestroyPipelineLayout(radv_device_to_handle(device),
+				   state->fast_clear_flush.p_layout,
+				   &state->alloc);
 	radv_DestroyPipeline(radv_device_to_handle(device),
 			     state->fast_clear_flush.cmask_eliminate_pipeline,
 			     &state->alloc);
@@ -269,8 +292,14 @@ radv_device_init_meta_fast_clear_flush_state(struct radv_device *device)
 	if (res != VK_SUCCESS)
 		goto fail;

+	res = create_pipeline_layout(device,
+				     &device->meta_state.fast_clear_flush.p_layout);
+	if (res != VK_SUCCESS)
+		goto fail;
+
 	VkShaderModule vs_module_h = radv_shader_module_to_handle(&vs_module);
-	res = create_pipeline(device, vs_module_h);
+	res = create_pipeline(device, vs_module_h,
+			      device->meta_state.fast_clear_flush.p_layout);
 	if (res != VK_SUCCESS)
 		goto fail;

--- a/src/amd/vulkan/radv_meta_resolve.c
+++ b/src/amd/vulkan/radv_meta_resolve.c
@@ -26,6 +26,7 @@

 #include "radv_meta.h"
 #include "radv_private.h"
+#include "vk_format.h"
 #include "nir/nir_builder.h"
 #include "sid.h"

@@ -50,7 +51,7 @@ build_nir_fs(void)
 }

 static VkResult
-create_pass(struct radv_device *device)
+create_pass(struct radv_device *device, VkFormat vk_format, VkRenderPass *pass)
 {
 	VkResult result;
 	VkDevice device_h = radv_device_to_handle(device);
@@ -59,7 +60,7 @@ create_pass(struct radv_device *device)
 	int i;

 	for (i = 0; i < 2; i++) {
-		attachments[i].format = VK_FORMAT_UNDEFINED;
+		attachments[i].format = vk_format;
 		attachments[i].samples = 1;
 		attachments[i].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
 		attachments[i].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
@@ -99,14 +100,16 @@ create_pass(struct radv_device *device)
 								.dependencyCount = 0,
 									 },
 				       alloc,
-				       &device->meta_state.resolve.pass);
+				       pass);

 	return result;
 }

 static VkResult
 create_pipeline(struct radv_device *device,
-                VkShaderModule vs_module_h)
+		 VkShaderModule vs_module_h,
+		 VkPipeline *pipeline,
+		 VkRenderPass pass)
 {
 	VkResult result;
 	VkDevice device_h = radv_device_to_handle(device);
@@ -121,6 +124,23 @@ create_pipeline(struct radv_device *device,
 		goto cleanup;
 	}

+	VkPipelineLayoutCreateInfo pl_create_info = {
+		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+		.setLayoutCount = 0,
+		.pSetLayouts = NULL,
+		.pushConstantRangeCount = 0,
+		.pPushConstantRanges = NULL,
+	};
+
+	if (!device->meta_state.resolve.p_layout) {
+		result = radv_CreatePipelineLayout(radv_device_to_handle(device),
+						   &pl_create_info,
+						   &device->meta_state.alloc,
+						   &device->meta_state.resolve.p_layout);
+		if (result != VK_SUCCESS)
+			goto cleanup;
+	}
+
 	result = radv_graphics_pipeline_create(device_h,
 					       radv_pipeline_cache_to_handle(&device->meta_state.cache),
 					       &(VkGraphicsPipelineCreateInfo) {
@@ -196,15 +216,15 @@ create_pipeline(struct radv_device *device,
 								VK_DYNAMIC_STATE_SCISSOR,
 							},
 						},
-																       .renderPass = device->meta_state.resolve.pass,
+						.layout = device->meta_state.resolve.p_layout,
+						.renderPass = pass,
 																       .subpass = 0,
 																       },
 					       &(struct radv_graphics_pipeline_create_info) {
 						       .use_rectlist = true,
 						       .custom_blend_mode = V_028808_CB_RESOLVE,
 							       },
-					       &device->meta_state.alloc,
-					       &device->meta_state.resolve.pipeline);
+					       &device->meta_state.alloc, pipeline);
 	if (result != VK_SUCCESS)
 		goto cleanup;

@@ -220,17 +240,37 @@ radv_device_finish_meta_resolve_state(struct radv_device *device)
 {
 	struct radv_meta_state *state = &device->meta_state;

-	radv_DestroyRenderPass(radv_device_to_handle(device),
-			       state->resolve.pass, &state->alloc);
-	radv_DestroyPipeline(radv_device_to_handle(device),
-			     state->resolve.pipeline, &state->alloc);
+	for (uint32_t j = 0; j < NUM_META_FS_KEYS; j++) {
+		radv_DestroyRenderPass(radv_device_to_handle(device),
+				       state->resolve.pass[j], &state->alloc);
+		radv_DestroyPipeline(radv_device_to_handle(device),
+				     state->resolve.pipeline[j], &state->alloc);
+	}
+	radv_DestroyPipelineLayout(radv_device_to_handle(device),
+				   state->resolve.p_layout, &state->alloc);
+
 }

+static VkFormat pipeline_formats[] = {
+	VK_FORMAT_R8G8B8A8_UNORM,
+	VK_FORMAT_R8G8B8A8_UINT,
+	VK_FORMAT_R8G8B8A8_SINT,
+	VK_FORMAT_A2R10G10B10_UINT_PACK32,
+	VK_FORMAT_A2R10G10B10_SINT_PACK32,
+	VK_FORMAT_R16G16B16A16_UNORM,
+	VK_FORMAT_R16G16B16A16_SNORM,
+	VK_FORMAT_R16G16B16A16_UINT,
+	VK_FORMAT_R16G16B16A16_SINT,
+	VK_FORMAT_R32_SFLOAT,
+	VK_FORMAT_R32G32_SFLOAT,
+	VK_FORMAT_R32G32B32A32_SFLOAT
+};
+
 VkResult
 radv_device_init_meta_resolve_state(struct radv_device *device)
 {
 	VkResult res = VK_SUCCESS;
-
+	struct radv_meta_state *state = &device->meta_state;
 	struct radv_shader_module vs_module = { .nir = radv_meta_build_nir_vs_generate_vertices() };
 	if (!vs_module.nir) {
 		/* XXX: Need more accurate error */
@@ -238,14 +278,19 @@ radv_device_init_meta_resolve_state(struct radv_device *device)
 		goto fail;
 	}

-	res = create_pass(device);
-	if (res != VK_SUCCESS)
-		goto fail;
+	for (uint32_t i = 0; i < ARRAY_SIZE(pipeline_formats); ++i) {
+		VkFormat format = pipeline_formats[i];
+		unsigned fs_key = radv_format_meta_fs_key(format);
+		res = create_pass(device, format, &state->resolve.pass[fs_key]);
+		if (res != VK_SUCCESS)
+			goto fail;

-	VkShaderModule vs_module_h = radv_shader_module_to_handle(&vs_module);
-	res = create_pipeline(device, vs_module_h);
-	if (res != VK_SUCCESS)
-		goto fail;
+		VkShaderModule vs_module_h = radv_shader_module_to_handle(&vs_module);
+		res = create_pipeline(device, vs_module_h,
+				      &state->resolve.pipeline[fs_key], state->resolve.pass[fs_key]);
+		if (res != VK_SUCCESS)
+			goto fail;
+	}

 	goto cleanup;

@@ -260,16 +305,18 @@ cleanup:

 static void
 emit_resolve(struct radv_cmd_buffer *cmd_buffer,
+	     VkFormat vk_format,
             const VkOffset2D *dest_offset,
             const VkExtent2D *resolve_extent)
 {
 	struct radv_device *device = cmd_buffer->device;
 	VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
+	unsigned fs_key = radv_format_meta_fs_key(vk_format);

 	cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;

 	radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
-			     device->meta_state.resolve.pipeline);
+			     device->meta_state.resolve.pipeline[fs_key]);

 	radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
 		.x = dest_offset->x,
@@ -300,11 +347,16 @@ static void radv_pick_resolve_method_images(struct radv_image *src_image,
 					    enum radv_resolve_method *method)

 {
-	if (dest_image->surface.micro_tile_mode != src_image->surface.micro_tile_mode) {
-		if (dest_image->surface.num_dcc_levels > 0)
-			*method = RESOLVE_FRAGMENT;
-		else
-			*method = RESOLVE_COMPUTE;
+	if (src_image->vk_format == VK_FORMAT_R16G16_UNORM ||
+	    src_image->vk_format == VK_FORMAT_R16G16_SNORM)
+		*method = RESOLVE_COMPUTE;
+	else if (vk_format_is_int(src_image->vk_format))
+		*method = RESOLVE_COMPUTE;
+
+	if (dest_image->surface.num_dcc_levels > 0) {
+		*method = RESOLVE_FRAGMENT;
+	} else if (dest_image->surface.micro_tile_mode != src_image->surface.micro_tile_mode) {
+		*method = RESOLVE_COMPUTE;
 	}
 }

@@ -390,6 +442,7 @@ void radv_CmdResolveImage(
 	if (dest_image->surface.dcc_size) {
 		radv_initialize_dcc(cmd_buffer, dest_image, 0xffffffff);
 	}
+	unsigned fs_key = radv_format_meta_fs_key(dest_image->vk_format);
 	for (uint32_t r = 0; r < region_count; ++r) {
 		const VkImageResolve *region = &regions[r];

@@ -489,7 +542,7 @@ void radv_CmdResolveImage(
 			radv_CmdBeginRenderPass(cmd_buffer_h,
 						      &(VkRenderPassBeginInfo) {
 							      .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
-								      .renderPass = device->meta_state.resolve.pass,
+								      .renderPass = device->meta_state.resolve.pass[fs_key],
 								      .framebuffer = fb_h,
 								      .renderArea = {
 								      .offset = {
@@ -507,6 +560,7 @@ void radv_CmdResolveImage(
 						      VK_SUBPASS_CONTENTS_INLINE);

 			emit_resolve(cmd_buffer,
+				     dest_iview.vk_format,
 				     &(VkOffset2D) {
 					     .x = dstOffset.x,
 					     .y = dstOffset.y,
@@ -560,7 +614,7 @@ radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer)
 		struct radv_image *dst_img = cmd_buffer->state.framebuffer->attachments[dest_att.attachment].attachment->image;
 		struct radv_image *src_img = cmd_buffer->state.framebuffer->attachments[src_att.attachment].attachment->image;

-		radv_pick_resolve_method_images(dst_img, src_img, &resolve_method);
+		radv_pick_resolve_method_images(src_img, dst_img, &resolve_method);
 		if (resolve_method == RESOLVE_FRAGMENT) {
 			break;
 		}
@@ -601,6 +655,7 @@ radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer)
 		radv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass, false);

 		emit_resolve(cmd_buffer,
+			     dst_img->vk_format,
 			     &(VkOffset2D) { 0, 0 },
 			     &(VkExtent2D) { fb->width, fb->height });
 	}
--- a/src/amd/vulkan/radv_meta_resolve_cs.c
+++ b/src/amd/vulkan/radv_meta_resolve_cs.c
@@ -253,22 +253,31 @@ radv_device_init_meta_resolve_compute_state(struct radv_device *device)

 	res = create_layout(device);
 	if (res != VK_SUCCESS)
-		return res;
+		goto fail;

 	for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
 		uint32_t samples = 1 << i;

 		res = create_resolve_pipeline(device, samples, false, false,
 					      &state->resolve_compute.rc[i].pipeline);
+		if (res != VK_SUCCESS)
+			goto fail;

 		res = create_resolve_pipeline(device, samples, true, false,
 					      &state->resolve_compute.rc[i].i_pipeline);
+		if (res != VK_SUCCESS)
+			goto fail;

 		res = create_resolve_pipeline(device, samples, false, true,
 					      &state->resolve_compute.rc[i].srgb_pipeline);
+		if (res != VK_SUCCESS)
+			goto fail;

 	}

+	return VK_SUCCESS;
+fail:
+	radv_device_finish_meta_resolve_compute_state(device);
 	return res;
 }

@@ -487,6 +496,14 @@ radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer)
 	if (!subpass->has_resolve)
 		return;

+	/* Resolves happen before the end-of-subpass barriers get executed,
+	 * so we have to make the attachment shader-readable */
+	cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
+	                                RADV_CMD_FLAG_FLUSH_AND_INV_CB |
+	                                RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
+	                                RADV_CMD_FLAG_INV_GLOBAL_L2 |
+	                                RADV_CMD_FLAG_INV_VMEM_L1;
+
 	for (uint32_t i = 0; i < subpass->color_count; ++i) {
 		VkAttachmentReference src_att = subpass->color_attachments[i];
 		VkAttachmentReference dest_att = subpass->resolve_attachments[i];
--- a/src/amd/vulkan/radv_meta_resolve_fs.c
+++ b/src/amd/vulkan/radv_meta_resolve_fs.c
@@ -316,16 +316,9 @@ create_resolve_pipeline(struct radv_device *device,
 					       &vk_pipeline_info, &radv_pipeline_info,
 					       &device->meta_state.alloc,
 					       pipeline);
-
 	ralloc_free(vs.nir);
 	ralloc_free(fs.nir);
-	if (result != VK_SUCCESS)
-		goto fail;

-	return VK_SUCCESS;
-fail:
-	ralloc_free(vs.nir);
-	ralloc_free(fs.nir);
 	return result;
 }

@@ -336,14 +329,19 @@ radv_device_init_meta_resolve_fragment_state(struct radv_device *device)

 	res = create_layout(device);
 	if (res != VK_SUCCESS)
-		return res;
+		goto fail;

 	for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
 		for (unsigned j = 0; j < ARRAY_SIZE(pipeline_formats); ++j) {
 			res = create_resolve_pipeline(device, i, pipeline_formats[j]);
+			if (res != VK_SUCCESS)
+				goto fail;
 		}
 	}

+	return VK_SUCCESS;
+fail:
+	radv_device_finish_meta_resolve_fragment_state(device);
 	return res;
 }

@@ -407,8 +405,8 @@ emit_resolve(struct radv_cmd_buffer *cmd_buffer,
 	cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;

 	unsigned push_constants[2] = {
-		src_offset->x,
-		src_offset->y,
+		src_offset->x - dest_offset->x,
+		src_offset->y - dest_offset->y,
 	};
 	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
 			      device->meta_state.resolve_fragment.p_layout,
@@ -540,8 +538,8 @@ void radv_meta_resolve_fragment_image(struct radv_cmd_buffer *cmd_buffer,
 					       .pAttachments = (VkImageView[]) {
 					       radv_image_view_to_handle(&dest_iview),
 				       },
-				       .width = extent.width,
-				       .height = extent.height,
+				       .width = extent.width + dstOffset.x,
+				       .height = extent.height + dstOffset.y,
 				       .layers = 1
 				}, &cmd_buffer->pool->alloc, &fb);

@@ -604,6 +602,16 @@ radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer)
 		       RADV_META_SAVE_CONSTANTS |
 		       RADV_META_SAVE_DESCRIPTORS);

+	/* Resolves happen before the end-of-subpass barriers get executed,
+	 * so we have to make the attachment shader-readable */
+	cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
+	                                RADV_CMD_FLAG_FLUSH_AND_INV_CB |
+	                                RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
+	                                RADV_CMD_FLAG_FLUSH_AND_INV_DB |
+	                                RADV_CMD_FLAG_FLUSH_AND_INV_DB_META |
+	                                RADV_CMD_FLAG_INV_GLOBAL_L2 |
+	                                RADV_CMD_FLAG_INV_VMEM_L1;
+
 	for (uint32_t i = 0; i < subpass->color_count; ++i) {
 		VkAttachmentReference src_att = subpass->color_attachments[i];
 		VkAttachmentReference dest_att = subpass->resolve_attachments[i];
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -879,6 +879,8 @@ radv_pipeline_init_multisample_state(struct radv_pipeline *pipeline,
 			S_028BE0_MAX_SAMPLE_DIST(radv_cayman_get_maxdist(log_samples)) |
 			S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples); /* CM_R_028BE0_PA_SC_AA_CONFIG */
 		ms->pa_sc_mode_cntl_1 |= S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1);
+		if (ps_iter_samples > 1)
+			pipeline->graphics.spi_baryc_cntl |= S_0286E0_POS_FLOAT_LOCATION(2);
 	}

 	const struct VkPipelineRasterizationStateRasterizationOrderAMD *raster_order =
@@ -1175,7 +1177,7 @@ static void calculate_gfx9_gs_info(const VkGraphicsPipelineCreateInfo *pCreateIn
 	case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
 	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
 	case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
-		uses_adjacency = false;
+		uses_adjacency = true;
 		break;
 	default:
 		uses_adjacency = false;
@@ -1697,14 +1699,60 @@ radv_link_shaders(struct radv_pipeline *pipeline, nir_shader **shaders)
 							   ordered_shaders[i - 1]);

 		if (progress) {
-			nir_lower_global_vars_to_local(ordered_shaders[i]);
+			if (nir_lower_global_vars_to_local(ordered_shaders[i])) {
+				radv_lower_indirect_derefs(ordered_shaders[i],
+				                           pipeline->device->physical_device);
+			}
 			radv_optimize_nir(ordered_shaders[i]);
-			nir_lower_global_vars_to_local(ordered_shaders[i - 1]);
+
+			if (nir_lower_global_vars_to_local(ordered_shaders[i - 1])) {
+				radv_lower_indirect_derefs(ordered_shaders[i - 1],
+				                           pipeline->device->physical_device);
+			}
 			radv_optimize_nir(ordered_shaders[i - 1]);
 		}
 	}
 }

+static void
+merge_tess_info(struct shader_info *tes_info,
+                const struct shader_info *tcs_info)
+{
+	/* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
+	 *
+	 *    "PointMode. Controls generation of points rather than triangles
+	 *     or lines. This functionality defaults to disabled, and is
+	 *     enabled if either shader stage includes the execution mode.
+	 *
+	 * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
+	 * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
+	 * and OutputVertices, it says:
+	 *
+	 *    "One mode must be set in at least one of the tessellation
+	 *     shader stages."
+	 *
+	 * So, the fields can be set in either the TCS or TES, but they must
+	 * agree if set in both.  Our backend looks at TES, so bitwise-or in
+	 * the values from the TCS.
+	 */
+	assert(tcs_info->tess.tcs_vertices_out == 0 ||
+	       tes_info->tess.tcs_vertices_out == 0 ||
+	       tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
+	tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
+
+	assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
+	       tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
+	       tcs_info->tess.spacing == tes_info->tess.spacing);
+	tes_info->tess.spacing |= tcs_info->tess.spacing;
+
+	assert(tcs_info->tess.primitive_mode == 0 ||
+	       tes_info->tess.primitive_mode == 0 ||
+	       tcs_info->tess.primitive_mode == tes_info->tess.primitive_mode);
+	tes_info->tess.primitive_mode |= tcs_info->tess.primitive_mode;
+	tes_info->tess.ccw |= tcs_info->tess.ccw;
+	tes_info->tess.point_mode |= tcs_info->tess.point_mode;
+}
+
 static
 void radv_create_shaders(struct radv_pipeline *pipeline,
                         struct radv_device *device,
@@ -1766,6 +1814,13 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
 						    stage ? stage->pName : "main", i,
 						    stage ? stage->pSpecializationInfo : NULL);
 		pipeline->active_stages |= mesa_to_vk_shader_stage(i);
+		/* We don't want to alter meta shaders IR directly so clone it
+		 * first.
+		 */
+		if (nir[i]->info.name) {
+			nir[i] = nir_shader_clone(NULL, nir[i]);
+		}
+
 	}

 	if (nir[MESA_SHADER_TESS_CTRL]) {
@@ -1775,10 +1830,19 @@ void radv_create_shaders(struct radv_pipeline *pipeline,

 		keys[MESA_SHADER_TESS_CTRL].tcs.tes_reads_tess_factors = !!(nir[MESA_SHADER_TESS_EVAL]->info.inputs_read & (VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER));
 		nir_lower_tes_patch_vertices(nir[MESA_SHADER_TESS_EVAL], nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_vertices_out);
+		merge_tess_info(&nir[MESA_SHADER_TESS_EVAL]->info, &nir[MESA_SHADER_TESS_CTRL]->info);
 	}

 	radv_link_shaders(pipeline, nir);

+	for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+		if (!(device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS))
+			continue;
+
+		if (modules[i])
+			nir_print_shader(nir[i], stderr);
+	}
+
 	if (nir[MESA_SHADER_FRAGMENT]) {
 		if (!pipeline->shaders[MESA_SHADER_FRAGMENT]) {
 			pipeline->shaders[MESA_SHADER_FRAGMENT] =
@@ -1863,7 +1927,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline,

 	for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
 		free(codes[i]);
-		if (modules[i] && !modules[i]->nir && !pipeline->device->trace_bo)
+		if (modules[i] && !pipeline->device->trace_bo)
 			ralloc_free(nir[i]);
 	}

@@ -1940,6 +2004,7 @@ radv_pipeline_init(struct radv_pipeline *pipeline,

 	radv_create_shaders(pipeline, device, cache, keys, pStages);

+	pipeline->graphics.spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
 	radv_pipeline_init_depth_stencil_state(pipeline, pCreateInfo, extra);
 	radv_pipeline_init_raster_state(pipeline, pCreateInfo);
 	radv_pipeline_init_multisample_state(pipeline, pCreateInfo);
--- a/src/amd/vulkan/radv_pipeline_cache.c
+++ b/src/amd/vulkan/radv_pipeline_cache.c
@@ -170,81 +170,6 @@ radv_pipeline_cache_search(struct radv_pipeline_cache *cache,
 	return entry;
 }

-bool
-radv_create_shader_variants_from_pipeline_cache(struct radv_device *device,
-					        struct radv_pipeline_cache *cache,
-					        const unsigned char *sha1,
-					        struct radv_shader_variant **variants)
-{
-	struct cache_entry *entry;
-
-	if (!cache)
-		cache = device->mem_cache;
-
-	pthread_mutex_lock(&cache->mutex);
-
-	entry = radv_pipeline_cache_search_unlocked(cache, sha1);
-
-	if (!entry) {
-		if (!device->physical_device->disk_cache ||
-		    (device->instance->debug_flags & RADV_DEBUG_NO_CACHE)) {
-			pthread_mutex_unlock(&cache->mutex);
-			return false;
-		}
-
-		uint8_t disk_sha1[20];
-		disk_cache_compute_key(device->physical_device->disk_cache,
-				       sha1, 20, disk_sha1);
-		entry = (struct cache_entry *)
-			disk_cache_get(device->physical_device->disk_cache,
-				       disk_sha1, NULL);
-		if (!entry) {
-			pthread_mutex_unlock(&cache->mutex);
-			return false;
-		}
-	}
-
-	char *p = entry->code;
-	for(int i = 0; i < MESA_SHADER_STAGES; ++i) {
-		if (!entry->variants[i] && entry->code_sizes[i]) {
-			struct radv_shader_variant *variant;
-			struct cache_entry_variant_info info;
-
-			variant = calloc(1, sizeof(struct radv_shader_variant));
-			if (!variant) {
-				pthread_mutex_unlock(&cache->mutex);
-				return false;
-			}
-
-			memcpy(&info, p, sizeof(struct cache_entry_variant_info));
-			p += sizeof(struct cache_entry_variant_info);
-
-			variant->config = info.config;
-			variant->info = info.variant_info;
-			variant->rsrc1 = info.rsrc1;
-			variant->rsrc2 = info.rsrc2;
-			variant->code_size = entry->code_sizes[i];
-			variant->ref_count = 1;
-
-			void *ptr = radv_alloc_shader_memory(device, variant);
-			memcpy(ptr, p, entry->code_sizes[i]);
-			p += entry->code_sizes[i];
-
-			entry->variants[i] = variant;
-		}
-
-	}
-
-	for (int i = 0; i < MESA_SHADER_STAGES; ++i)
-		if (entry->variants[i])
-			p_atomic_inc(&entry->variants[i]->ref_count);
-
-	memcpy(variants, entry->variants, sizeof(entry->variants));
-	pthread_mutex_unlock(&cache->mutex);
-	return true;
-}
-
-
 static void
 radv_pipeline_cache_set_entry(struct radv_pipeline_cache *cache,
 			      struct cache_entry *entry)
@@ -314,6 +239,97 @@ radv_pipeline_cache_add_entry(struct radv_pipeline_cache *cache,
 		radv_pipeline_cache_set_entry(cache, entry);
 }

+bool
+radv_create_shader_variants_from_pipeline_cache(struct radv_device *device,
+					        struct radv_pipeline_cache *cache,
+					        const unsigned char *sha1,
+					        struct radv_shader_variant **variants)
+{
+	struct cache_entry *entry;
+
+	if (!cache)
+		cache = device->mem_cache;
+
+	pthread_mutex_lock(&cache->mutex);
+
+	entry = radv_pipeline_cache_search_unlocked(cache, sha1);
+
+	if (!entry) {
+		if (!device->physical_device->disk_cache ||
+		    (device->instance->debug_flags & RADV_DEBUG_NO_CACHE)) {
+			pthread_mutex_unlock(&cache->mutex);
+			return false;
+		}
+
+		uint8_t disk_sha1[20];
+		disk_cache_compute_key(device->physical_device->disk_cache,
+				       sha1, 20, disk_sha1);
+		entry = (struct cache_entry *)
+			disk_cache_get(device->physical_device->disk_cache,
+				       disk_sha1, NULL);
+		if (!entry) {
+			pthread_mutex_unlock(&cache->mutex);
+			return false;
+		} else {
+			size_t size = entry_size(entry);
+			struct cache_entry *new_entry = vk_alloc(&cache->alloc, size, 8,
+								 VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
+			if (!new_entry) {
+				free(entry);
+				pthread_mutex_unlock(&cache->mutex);
+				return false;
+			}
+
+			memcpy(new_entry, entry, entry_size(entry));
+			free(entry);
+			entry = new_entry;
+
+			radv_pipeline_cache_add_entry(cache, new_entry);
+		}
+	}
+
+	char *p = entry->code;
+	for(int i = 0; i < MESA_SHADER_STAGES; ++i) {
+		if (!entry->variants[i] && entry->code_sizes[i]) {
+			struct radv_shader_variant *variant;
+			struct cache_entry_variant_info info;
+
+			variant = calloc(1, sizeof(struct radv_shader_variant));
+			if (!variant) {
+				pthread_mutex_unlock(&cache->mutex);
+				return false;
+			}
+
+			memcpy(&info, p, sizeof(struct cache_entry_variant_info));
+			p += sizeof(struct cache_entry_variant_info);
+
+			variant->config = info.config;
+			variant->info = info.variant_info;
+			variant->rsrc1 = info.rsrc1;
+			variant->rsrc2 = info.rsrc2;
+			variant->code_size = entry->code_sizes[i];
+			variant->ref_count = 1;
+
+			void *ptr = radv_alloc_shader_memory(device, variant);
+			memcpy(ptr, p, entry->code_sizes[i]);
+			p += entry->code_sizes[i];
+
+			entry->variants[i] = variant;
+		} else if (entry->code_sizes[i]) {
+			p += sizeof(struct cache_entry_variant_info) + entry->code_sizes[i];
+		}
+
+	}
+
+	for (int i = 0; i < MESA_SHADER_STAGES; ++i)
+		if (entry->variants[i])
+			p_atomic_inc(&entry->variants[i]->ref_count);
+
+	memcpy(variants, entry->variants, sizeof(entry->variants));
+	pthread_mutex_unlock(&cache->mutex);
+	return true;
+}
+
 void
 radv_pipeline_cache_insert_shaders(struct radv_device *device,
 				   struct radv_pipeline_cache *cache,
@@ -359,6 +375,7 @@ radv_pipeline_cache_insert_shaders(struct radv_device *device,

 	char* p = entry->code;
 	struct cache_entry_variant_info info;
+	memset(&info, 0, sizeof(info));

 	for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
 		if (!variants[i])
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -83,7 +83,9 @@ typedef uint32_t xcb_window_t;
 #define MAX_SCISSORS    16
 #define MAX_PUSH_CONSTANTS_SIZE 128
 #define MAX_PUSH_DESCRIPTORS 32
-#define MAX_DYNAMIC_BUFFERS 16
+#define MAX_DYNAMIC_UNIFORM_BUFFERS 16
+#define MAX_DYNAMIC_STORAGE_BUFFERS 8
+#define MAX_DYNAMIC_BUFFERS (MAX_DYNAMIC_UNIFORM_BUFFERS + MAX_DYNAMIC_STORAGE_BUFFERS)
 #define MAX_SAMPLES_LOG2 4
 #define NUM_META_FS_KEYS 13
 #define RADV_MAX_DRM_DEVICES 8
@@ -266,7 +268,7 @@ struct radv_physical_device {
 	struct radeon_winsys *ws;
 	struct radeon_info rad_info;
 	char                                        path[20];
-	const char *                                name;
+	char                                        name[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE];
 	uint8_t                                     driver_uuid[VK_UUID_SIZE];
 	uint8_t                                     device_uuid[VK_UUID_SIZE];
 	uint8_t                                     cache_uuid[VK_UUID_SIZE];
@@ -282,6 +284,9 @@ struct radv_physical_device {
 	 * the pipeline cache defined by apps.
 	 */
 	struct disk_cache *                          disk_cache;
+
+	VkPhysicalDeviceMemoryProperties memory_properties;
+	enum radv_mem_type mem_type_indices[RADV_MEM_TYPE_COUNT];
 };

 struct radv_instance {
@@ -345,6 +350,22 @@ radv_pipeline_cache_insert_shaders(struct radv_device *device,
 				   const void *const *codes,
 				   const unsigned *code_sizes);

+enum radv_blit_ds_layout {
+	RADV_BLIT_DS_LAYOUT_TILE_ENABLE,
+	RADV_BLIT_DS_LAYOUT_TILE_DISABLE,
+	RADV_BLIT_DS_LAYOUT_COUNT,
+};
+
+static inline enum radv_blit_ds_layout radv_meta_blit_ds_to_type(VkImageLayout layout)
+{
+	return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_BLIT_DS_LAYOUT_TILE_DISABLE : RADV_BLIT_DS_LAYOUT_TILE_ENABLE;
+}
+
+static inline VkImageLayout radv_meta_blit_ds_to_layout(enum radv_blit_ds_layout ds_layout)
+{
+	return ds_layout == RADV_BLIT_DS_LAYOUT_TILE_ENABLE ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL;
+}
+
 struct radv_meta_state {
 	VkAllocationCallbacks alloc;

@@ -377,12 +398,12 @@ struct radv_meta_state {
 		/** Pipeline that blits from a 3D image. */
 		VkPipeline pipeline_3d_src[NUM_META_FS_KEYS];

-		VkRenderPass depth_only_rp;
+		VkRenderPass depth_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
 		VkPipeline depth_only_1d_pipeline;
 		VkPipeline depth_only_2d_pipeline;
 		VkPipeline depth_only_3d_pipeline;

-		VkRenderPass stencil_only_rp;
+		VkRenderPass stencil_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
 		VkPipeline stencil_only_1d_pipeline;
 		VkPipeline stencil_only_2d_pipeline;
 		VkPipeline stencil_only_3d_pipeline;
@@ -393,41 +414,46 @@ struct radv_meta_state {
 	struct {
 		VkRenderPass render_passes[NUM_META_FS_KEYS];

-		VkPipelineLayout p_layouts[2];
-		VkDescriptorSetLayout ds_layouts[2];
-		VkPipeline pipelines[2][NUM_META_FS_KEYS];
+		VkPipelineLayout p_layouts[3];
+		VkDescriptorSetLayout ds_layouts[3];
+		VkPipeline pipelines[3][NUM_META_FS_KEYS];

-		VkRenderPass depth_only_rp;
-		VkPipeline depth_only_pipeline[2];
+		VkRenderPass depth_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
+		VkPipeline depth_only_pipeline[3];

-		VkRenderPass stencil_only_rp;
-		VkPipeline stencil_only_pipeline[2];
+		VkRenderPass stencil_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
+		VkPipeline stencil_only_pipeline[3];
 	} blit2d;

 	struct {
 		VkPipelineLayout                          img_p_layout;
 		VkDescriptorSetLayout                     img_ds_layout;
 		VkPipeline pipeline;
+		VkPipeline pipeline_3d;
 	} itob;
 	struct {
 		VkPipelineLayout                          img_p_layout;
 		VkDescriptorSetLayout                     img_ds_layout;
 		VkPipeline pipeline;
+		VkPipeline pipeline_3d;
 	} btoi;
 	struct {
 		VkPipelineLayout                          img_p_layout;
 		VkDescriptorSetLayout                     img_ds_layout;
 		VkPipeline pipeline;
+		VkPipeline pipeline_3d;
 	} itoi;
 	struct {
 		VkPipelineLayout                          img_p_layout;
 		VkDescriptorSetLayout                     img_ds_layout;
 		VkPipeline pipeline;
+		VkPipeline pipeline_3d;
 	} cleari;

 	struct {
-		VkPipeline                                pipeline;
-		VkRenderPass                              pass;
+		VkPipelineLayout                          p_layout;
+		VkPipeline                                pipeline[NUM_META_FS_KEYS];
+		VkRenderPass                              pass[NUM_META_FS_KEYS];
 	} resolve;

 	struct {
@@ -451,12 +477,14 @@ struct radv_meta_state {
 	} resolve_fragment;

 	struct {
+		VkPipelineLayout                          p_layout;
 		VkPipeline                                decompress_pipeline;
 		VkPipeline                                resummarize_pipeline;
 		VkRenderPass                              pass;
 	} depth_decomp[1 + MAX_SAMPLES_LOG2];

 	struct {
+		VkPipelineLayout                          p_layout;
 		VkPipeline                                cmask_eliminate_pipeline;
 		VkPipeline                                fmask_decompress_pipeline;
 		VkRenderPass                              pass;
@@ -892,7 +920,6 @@ void si_emit_wait_fence(struct radeon_winsys_cs *cs,
 			uint64_t va, uint32_t ref,
 			uint32_t mask);
 void si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
-			    bool predicated,
 			    enum chip_class chip_class,
 			    uint32_t *fence_ptr, uint64_t va,
 			    bool is_mec,
@@ -1106,6 +1133,7 @@ struct radv_pipeline {
 			struct radv_gs_state gs;
 			uint32_t db_shader_control;
 			uint32_t shader_z_format;
+			uint32_t spi_baryc_cntl;
 			unsigned prim;
 			unsigned gs_out;
 			uint32_t vgt_gs_mode;
--- a/src/amd/vulkan/radv_query.c
+++ b/src/amd/vulkan/radv_query.c
@@ -1152,7 +1152,7 @@ void radv_CmdEndQuery(
 		si_cs_emit_write_event_eop(cs,
 					   false,
 					   cmd_buffer->device->physical_device->rad_info.chip_class,
-					   false,
+					   radv_cmd_buffer_uses_mec(cmd_buffer),
 					   V_028A90_BOTTOM_OF_PIPE_TS, 0,
 					   1, avail_va, 0, 1);
 		break;
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -110,6 +110,45 @@ void radv_DestroyShaderModule(
 	vk_free2(&device->alloc, pAllocator, module);
 }

+bool
+radv_lower_indirect_derefs(struct nir_shader *nir,
+                           struct radv_physical_device *device)
+{
+	/* While it would be nice not to have this flag, we are constrained
+	 * by the reality that LLVM 5.0 doesn't have working VGPR indexing
+	 * on GFX9.
+	 */
+	bool llvm_has_working_vgpr_indexing =
+		device->rad_info.chip_class <= VI;
+
+	/* TODO: Indirect indexing of GS inputs is unimplemented.
+	 *
+	 * TCS and TES load inputs directly from LDS or offchip memory, so
+	 * indirect indexing is trivial.
+	 */
+	nir_variable_mode indirect_mask = 0;
+	if (nir->info.stage == MESA_SHADER_GEOMETRY ||
+	    (nir->info.stage != MESA_SHADER_TESS_CTRL &&
+	     nir->info.stage != MESA_SHADER_TESS_EVAL &&
+	     !llvm_has_working_vgpr_indexing)) {
+		indirect_mask |= nir_var_shader_in;
+	}
+	if (!llvm_has_working_vgpr_indexing &&
+	    nir->info.stage != MESA_SHADER_TESS_CTRL)
+		indirect_mask |= nir_var_shader_out;
+
+	/* TODO: We shouldn't need to do this, however LLVM isn't currently
+	 * smart enough to handle indirects without causing excess spilling
+	 * causing the gpu to hang.
+	 *
+	 * See the following thread for more details of the problem:
+	 * https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html
+	 */
+	indirect_mask |= nir_var_local;
+
+	return nir_lower_indirect_derefs(nir, indirect_mask);
+}
+
 void
 radv_optimize_nir(struct nir_shader *shader)
 {
@@ -245,40 +284,6 @@ radv_shader_compile_to_nir(struct radv_device *device,

 	nir_shader_gather_info(nir, entry_point->impl);

-	/* While it would be nice not to have this flag, we are constrained
-	 * by the reality that LLVM 5.0 doesn't have working VGPR indexing
-	 * on GFX9.
-	 */
-	bool llvm_has_working_vgpr_indexing =
-		device->physical_device->rad_info.chip_class <= VI;
-
-	/* TODO: Indirect indexing of GS inputs is unimplemented.
-	 *
-	 * TCS and TES load inputs directly from LDS or offchip memory, so
-	 * indirect indexing is trivial.
-	 */
-	nir_variable_mode indirect_mask = 0;
-	if (nir->info.stage == MESA_SHADER_GEOMETRY ||
-	    (nir->info.stage != MESA_SHADER_TESS_CTRL &&
-	     nir->info.stage != MESA_SHADER_TESS_EVAL &&
-	     !llvm_has_working_vgpr_indexing)) {
-		indirect_mask |= nir_var_shader_in;
-	}
-	if (!llvm_has_working_vgpr_indexing &&
-	    nir->info.stage != MESA_SHADER_TESS_CTRL)
-		indirect_mask |= nir_var_shader_out;
-
-	/* TODO: We shouldn't need to do this, however LLVM isn't currently
-	 * smart enough to handle indirects without causing excess spilling
-	 * causing the gpu to hang.
-	 *
-	 * See the following thread for more details of the problem:
-	 * https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html
-	 */
-	indirect_mask |= nir_var_local;
-
-	nir_lower_indirect_derefs(nir, indirect_mask);
-
 	static const nir_lower_tex_options tex_options = {
 	  .lower_txp = ~0,
 	};
@@ -289,11 +294,9 @@ radv_shader_compile_to_nir(struct radv_device *device,
 	nir_lower_var_copies(nir);
 	nir_lower_global_vars_to_local(nir);
 	nir_remove_dead_variables(nir, nir_var_local);
+	radv_lower_indirect_derefs(nir, device->physical_device);
 	radv_optimize_nir(nir);

-	if (device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS)
-		nir_print_shader(nir, stderr);
-
 	return nir;
 }

--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -103,6 +103,10 @@ void
 radv_shader_variant_destroy(struct radv_device *device,
 			    struct radv_shader_variant *variant);

+bool
+radv_lower_indirect_derefs(struct nir_shader *nir,
+                           struct radv_physical_device *device);
+
 uint32_t
 radv_shader_stage_to_user_data_0(gl_shader_stage stage, enum chip_class chip_class,
 				 bool has_gs, bool has_tess);
--- a/src/amd/vulkan/radv_wsi.c
+++ b/src/amd/vulkan/radv_wsi.c
@@ -194,12 +194,26 @@ radv_wsi_image_create(VkDevice device_h,
 		.image = image_h
 	};

+	/* Find the first VRAM memory type, or GART for PRIME images. */
+	int memory_type_index = -1;
+	for (int i = 0; i < device->physical_device->memory_properties.memoryTypeCount; ++i) {
+		bool is_local = !!(device->physical_device->memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+		if ((linear && !is_local) || (!linear && is_local)) {
+			memory_type_index = i;
+			break;
+		}
+	}
+
+	/* fallback */
+	if (memory_type_index == -1)
+		memory_type_index = 0;
+
 	result = radv_alloc_memory(device_h,
 				     &(VkMemoryAllocateInfo) {
 					     .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
 					     .pNext = &ded_alloc,
 					     .allocationSize = image->size,
-					     .memoryTypeIndex = linear ? 1 : 0,
+					     .memoryTypeIndex = memory_type_index,
 				     },
 				     NULL /* XXX: pAllocator */,
 				     RADV_MEM_IMPLICIT_SYNC,
--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@@ -676,7 +676,8 @@ si_write_scissors(struct radeon_winsys_cs *cs, int first,
 	int i;
 	float scale[3], translate[3], guardband_x = INFINITY, guardband_y = INFINITY;
 	const float max_range = 32767.0f;
-	assert(count);
+	if (!count)
+		return;

 	radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + first * 4 * 2, count * 2);
 	for (i = 0; i < count; i++) {
@@ -918,7 +919,6 @@ si_emit_acquire_mem(struct radeon_winsys_cs *cs,

 void
 si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
-		       bool predicated,
                       enum chip_class chip_class,
 		       uint32_t *flush_cnt,
 		       uint64_t flush_va,
@@ -949,7 +949,7 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
 			/* Necessary for DCC */
 			if (chip_class >= VI) {
 				si_cs_emit_write_event_eop(cs,
-							   predicated,
+							   false,
 							   chip_class,
 							   is_mec,
 							   V_028A90_FLUSH_AND_INV_CB_DATA_TS,
@@ -963,12 +963,12 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
 	}

 	if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB_META) {
-		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, predicated));
+		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
 		radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0));
 	}

 	if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB_META) {
-		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, predicated));
+		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
 		radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0));
 	}

@@ -981,13 +981,18 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
 	}

 	if (flush_bits & RADV_CMD_FLAG_CS_PARTIAL_FLUSH) {
-		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, predicated));
+		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
 		radeon_emit(cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH) | EVENT_INDEX(4));
 	}

 	if (chip_class >= GFX9 && flush_cb_db) {
 		unsigned cb_db_event, tc_flags;

+#if 0
+		/* This breaks a bunch of:
+		   dEQP-VK.renderpass.dedicated_allocation.formats.d32_sfloat_s8_uint.input*.
+		   use the big hammer always.
+		*/
 		/* Set the CB/DB flush event. */
 		switch (flush_cb_db) {
 		case RADV_CMD_FLAG_FLUSH_AND_INV_CB:
@@ -1000,7 +1005,9 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
 			/* both CB & DB */
 			cb_db_event = V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT;
 		}
-
+#else
+		cb_db_event = V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT;
+#endif
 		/* TC    | TC_WB         = invalidate L2 data
 		 * TC_MD | TC_WB         = invalidate L2 metadata
 		 * TC    | TC_WB | TC_MD = invalidate L2 data & metadata
@@ -1028,14 +1035,14 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
 		assert(flush_cnt);
 		uint32_t old_fence = (*flush_cnt)++;

-		si_cs_emit_write_event_eop(cs, predicated, chip_class, false, cb_db_event, tc_flags, 1,
+		si_cs_emit_write_event_eop(cs, false, chip_class, false, cb_db_event, tc_flags, 1,
 					   flush_va, old_fence, *flush_cnt);
-		si_emit_wait_fence(cs, predicated, flush_va, *flush_cnt, 0xffffffff);
+		si_emit_wait_fence(cs, false, flush_va, *flush_cnt, 0xffffffff);
 	}

 	/* VGT state sync */
 	if (flush_bits & RADV_CMD_FLAG_VGT_FLUSH) {
-		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, predicated));
+		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
 		radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
 	}

@@ -1048,13 +1055,13 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
 			    RADV_CMD_FLAG_INV_GLOBAL_L2 |
 			    RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2))) &&
 	    !is_mec) {
-		radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, predicated));
+		radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
 		radeon_emit(cs, 0);
 	}

 	if ((flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) ||
 	    (chip_class <= CIK && (flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2))) {
-		si_emit_acquire_mem(cs, is_mec, predicated, chip_class >= GFX9,
+		si_emit_acquire_mem(cs, is_mec, false, chip_class >= GFX9,
 				    cp_coher_cntl |
 				    S_0085F0_TC_ACTION_ENA(1) |
 				    S_0085F0_TCL1_ACTION_ENA(1) |
@@ -1068,7 +1075,7 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
 			 *
 			 * WB doesn't work without NC.
 			 */
-			si_emit_acquire_mem(cs, is_mec, predicated,
+			si_emit_acquire_mem(cs, is_mec, false,
 					    chip_class >= GFX9,
 					    cp_coher_cntl |
 					    S_0301F0_TC_WB_ACTION_ENA(1) |
@@ -1077,7 +1084,7 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
 		}
 		if (flush_bits & RADV_CMD_FLAG_INV_VMEM_L1) {
 			si_emit_acquire_mem(cs, is_mec,
-					    predicated, chip_class >= GFX9,
+					    false, chip_class >= GFX9,
 					    cp_coher_cntl |
 					    S_0085F0_TCL1_ACTION_ENA(1));
 			cp_coher_cntl = 0;
@@ -1088,7 +1095,7 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
 	 * Therefore, it should be last. Done in PFP.
 	 */
 	if (cp_coher_cntl)
-		si_emit_acquire_mem(cs, is_mec, predicated, chip_class >= GFX9, cp_coher_cntl);
+		si_emit_acquire_mem(cs, is_mec, false, chip_class >= GFX9, cp_coher_cntl);
 }

 void
@@ -1118,7 +1125,6 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
 		ptr = &cmd_buffer->gfx9_fence_idx;
 	}
 	si_cs_emit_cache_flush(cmd_buffer->cs,
-			       cmd_buffer->state.predicating,
 	                       cmd_buffer->device->physical_device->rad_info.chip_class,
 			       ptr, va,
 	                       radv_cmd_buffer_uses_mec(cmd_buffer),
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -85,6 +85,7 @@ LIBGLSL_FILES = \
 	glsl/lower_buffer_access.cpp \
 	glsl/lower_buffer_access.h \
 	glsl/lower_const_arrays_to_uniforms.cpp \
+	glsl/lower_cs_derived.cpp \
 	glsl/lower_discard.cpp \
 	glsl/lower_discard_flow.cpp \
 	glsl/lower_distance.cpp \
--- a/src/compiler/glsl/ast_function.cpp
+++ b/src/compiler/glsl/ast_function.cpp
@@ -227,19 +227,28 @@ verify_parameter_modes(_mesa_glsl_parse_state *state,
            val = ((ir_swizzle *)val)->val;
         }

-         while (val->ir_type == ir_type_dereference_array) {
-            val = ((ir_dereference_array *)val)->array;
+         for (;;) {
+            if (val->ir_type == ir_type_dereference_array) {
+               val = ((ir_dereference_array *)val)->array;
+            } else if (val->ir_type == ir_type_dereference_record &&
+                       !state->es_shader) {
+               val = ((ir_dereference_record *)val)->record;
+            } else
+               break;
         }

-         if (!val->as_dereference_variable() ||
-             val->variable_referenced()->data.mode != ir_var_shader_in) {
+         ir_variable *var = NULL;
+         if (const ir_dereference_variable *deref_var = val->as_dereference_variable())
+            var = deref_var->variable_referenced();
+
+         if (!var || var->data.mode != ir_var_shader_in) {
            _mesa_glsl_error(&loc, state,
                             "parameter `%s` must be a shader input",
                             formal->name);
            return false;
         }

-         val->variable_referenced()->data.must_be_shader_input = 1;
+         var->data.must_be_shader_input = 1;
      }

      /* Verify that 'out' and 'inout' actual parameters are lvalues. */
@@ -667,8 +676,13 @@ generate_array_index(void *mem_ctx, exec_list *instructions,
      ir_variable *sub_var = NULL;
      *function_name = array->primary_expression.identifier;

-      match_subroutine_by_name(*function_name, actual_parameters,
-                               state, &sub_var);
+      if (!match_subroutine_by_name(*function_name, actual_parameters,
+                                    state, &sub_var)) {
+         _mesa_glsl_error(&loc, state, "Unknown subroutine `%s'",
+                          *function_name);
+         *function_name = NULL; /* indicate error condition to caller */
+         return NULL;
+      }

      ir_rvalue *outer_array_idx = idx->hir(instructions, state);
      return new(mem_ctx) ir_dereference_array(sub_var, outer_array_idx);
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -5249,7 +5249,7 @@ ast_declarator_list::hir(exec_list *instructions,
                     if (var->type->fields.structure[i].type->is_array() ||
                         var->type->fields.structure[i].type->is_record())
                        _mesa_glsl_error(&loc, state,
-                                         "fragement shader input cannot have "
+                                         "fragment shader input cannot have "
                                         "a struct that contains an "
                                         "array or struct");
                  }
--- a/src/compiler/glsl/builtin_variables.cpp
+++ b/src/compiler/glsl/builtin_variables.cpp
@@ -90,9 +90,9 @@ static const struct gl_builtin_uniform_element gl_LightSource_elements[] = {
 		  SWIZZLE_Y,
 		  SWIZZLE_Z,
 		  SWIZZLE_Z)},
-   {"spotCosCutoff", {STATE_LIGHT, 0, STATE_SPOT_DIRECTION}, SWIZZLE_WWWW},
-   {"spotCutoff", {STATE_LIGHT, 0, STATE_SPOT_CUTOFF}, SWIZZLE_XXXX},
   {"spotExponent", {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_WWWW},
+   {"spotCutoff", {STATE_LIGHT, 0, STATE_SPOT_CUTOFF}, SWIZZLE_XXXX},
+   {"spotCosCutoff", {STATE_LIGHT, 0, STATE_SPOT_DIRECTION}, SWIZZLE_WWWW},
   {"constantAttenuation", {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_XXXX},
   {"linearAttenuation", {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_YYYY},
   {"quadraticAttenuation", {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_ZZZZ},
@@ -1295,15 +1295,10 @@ builtin_variable_generator::generate_cs_special_vars()
                       uvec3_t, "gl_LocalGroupSizeARB");
   }

-   if (state->ctx->Const.LowerCsDerivedVariables) {
-      add_variable("gl_GlobalInvocationID", uvec3_t, ir_var_auto, 0);
-      add_variable("gl_LocalInvocationIndex", uint_t, ir_var_auto, 0);
-   } else {
-      add_system_value(SYSTEM_VALUE_GLOBAL_INVOCATION_ID,
-                       uvec3_t, "gl_GlobalInvocationID");
-      add_system_value(SYSTEM_VALUE_LOCAL_INVOCATION_INDEX,
-                       uint_t, "gl_LocalInvocationIndex");
-   }
+   add_system_value(SYSTEM_VALUE_GLOBAL_INVOCATION_ID,
+                    uvec3_t, "gl_GlobalInvocationID");
+   add_system_value(SYSTEM_VALUE_LOCAL_INVOCATION_INDEX,
+                    uint_t, "gl_LocalInvocationIndex");
 }


@@ -1474,84 +1469,3 @@ _mesa_glsl_initialize_variables(exec_list *instructions,
      break;
   }
 }
-
-
-/**
- * Initialize compute shader variables with values that are derived from other
- * compute shader variable.
- */
-static void
-initialize_cs_derived_variables(gl_shader *shader,
-                                ir_function_signature *const main_sig)
-{
-   assert(shader->Stage == MESA_SHADER_COMPUTE);
-
-   ir_variable *gl_GlobalInvocationID =
-      shader->symbols->get_variable("gl_GlobalInvocationID");
-   assert(gl_GlobalInvocationID);
-   ir_variable *gl_WorkGroupID =
-      shader->symbols->get_variable("gl_WorkGroupID");
-   assert(gl_WorkGroupID);
-   ir_variable *gl_WorkGroupSize =
-      shader->symbols->get_variable("gl_WorkGroupSize");
-   if (gl_WorkGroupSize == NULL) {
-      void *const mem_ctx = ralloc_parent(shader->ir);
-      gl_WorkGroupSize = new(mem_ctx) ir_variable(glsl_type::uvec3_type,
-                                                  "gl_WorkGroupSize",
-                                                  ir_var_auto);
-      gl_WorkGroupSize->data.how_declared = ir_var_declared_implicitly;
-      gl_WorkGroupSize->data.read_only = true;
-      shader->ir->push_head(gl_WorkGroupSize);
-   }
-   ir_variable *gl_LocalInvocationID =
-      shader->symbols->get_variable("gl_LocalInvocationID");
-   assert(gl_LocalInvocationID);
-
-   /* gl_GlobalInvocationID =
-    *    gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID
-    */
-   ir_instruction *inst =
-      assign(gl_GlobalInvocationID,
-             add(mul(gl_WorkGroupID, gl_WorkGroupSize),
-                 gl_LocalInvocationID));
-   main_sig->body.push_head(inst);
-
-   /* gl_LocalInvocationIndex =
-    *    gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y +
-    *    gl_LocalInvocationID.y * gl_WorkGroupSize.x +
-    *    gl_LocalInvocationID.x;
-    */
-   ir_expression *index_z =
-      mul(mul(swizzle_z(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize)),
-          swizzle_y(gl_WorkGroupSize));
-   ir_expression *index_y =
-      mul(swizzle_y(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize));
-   ir_expression *index_y_plus_z = add(index_y, index_z);
-   operand index_x(swizzle_x(gl_LocalInvocationID));
-   ir_expression *index_x_plus_y_plus_z = add(index_y_plus_z, index_x);
-   ir_variable *gl_LocalInvocationIndex =
-      shader->symbols->get_variable("gl_LocalInvocationIndex");
-   assert(gl_LocalInvocationIndex);
-   inst = assign(gl_LocalInvocationIndex, index_x_plus_y_plus_z);
-   main_sig->body.push_head(inst);
-}
-
-
-/**
- * Initialize builtin variables with values based on other builtin variables.
- * These are initialized in the main function.
- */
-void
-_mesa_glsl_initialize_derived_variables(struct gl_context *ctx,
-                                        gl_shader *shader)
-{
-   /* We only need to set CS variables currently. */
-   if (shader->Stage == MESA_SHADER_COMPUTE &&
-       ctx->Const.LowerCsDerivedVariables) {
-      ir_function_signature *const main_sig =
-         _mesa_get_main_function_signature(shader->symbols);
-
-      if (main_sig != NULL)
-         initialize_cs_derived_variables(shader, main_sig);
-   }
-}
--- a/src/compiler/glsl/glsl_parser_extras.cpp
+++ b/src/compiler/glsl/glsl_parser_extras.cpp
@@ -1863,6 +1863,49 @@ set_shader_inout_layout(struct gl_shader *shader,
   shader->bound_image = state->bound_image_specified;
 }

+/* src can be NULL if only the symbols found in the exec_list should be
+ * copied
+ */
+void
+_mesa_glsl_copy_symbols_from_table(struct exec_list *shader_ir,
+                                   struct glsl_symbol_table *src,
+                                   struct glsl_symbol_table *dest)
+{
+   foreach_in_list (ir_instruction, ir, shader_ir) {
+      switch (ir->ir_type) {
+      case ir_type_function:
+         dest->add_function((ir_function *) ir);
+         break;
+      case ir_type_variable: {
+         ir_variable *const var = (ir_variable *) ir;
+
+         if (var->data.mode != ir_var_temporary)
+            dest->add_variable(var);
+         break;
+      }
+      default:
+         break;
+      }
+   }
+
+   if (src != NULL) {
+      /* Explicitly copy the gl_PerVertex interface definitions because these
+       * are needed to check they are the same during the interstage link.
+       * They can’t necessarily be found via the exec_list because the members
+       * might not be referenced. The GL spec still requires that they match
+       * in that case.
+       */
+      const glsl_type *iface =
+         src->get_interface("gl_PerVertex", ir_var_shader_in);
+      if (iface)
+         dest->add_interface(iface->name, iface, ir_var_shader_in);
+
+      iface = src->get_interface("gl_PerVertex", ir_var_shader_out);
+      if (iface)
+         dest->add_interface(iface->name, iface, ir_var_shader_out);
+   }
+}
+
 extern "C" {

 static void
@@ -1936,6 +1979,7 @@ do_late_parsing_checks(struct _mesa_glsl_parse_state *state)

 static void
 opt_shader_and_create_symbol_table(struct gl_context *ctx,
+                                   struct glsl_symbol_table *source_symbols,
                                   struct gl_shader *shader)
 {
   assert(shader->CompileStatus != compile_failure &&
@@ -1993,24 +2037,8 @@ opt_shader_and_create_symbol_table(struct gl_context *ctx,
    * We don't have to worry about types or interface-types here because those
    * are fly-weights that are looked up by glsl_type.
    */
-   foreach_in_list (ir_instruction, ir, shader->ir) {
-      switch (ir->ir_type) {
-      case ir_type_function:
-         shader->symbols->add_function((ir_function *) ir);
-         break;
-      case ir_type_variable: {
-         ir_variable *const var = (ir_variable *) ir;
-
-         if (var->data.mode != ir_var_temporary)
-            shader->symbols->add_variable(var);
-         break;
-      }
-      default:
-         break;
-      }
-   }
-
-   _mesa_glsl_initialize_derived_variables(ctx, shader);
+   _mesa_glsl_copy_symbols_from_table(shader->ir, source_symbols,
+                                      shader->symbols);
 }

 void
@@ -2047,7 +2075,9 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader,
         return;

      if (shader->CompileStatus == compiled_no_opts) {
-         opt_shader_and_create_symbol_table(ctx, shader);
+         opt_shader_and_create_symbol_table(ctx,
+                                            NULL, /* source_symbols */
+                                            shader);
         shader->CompileStatus = compile_success;
         return;
      }
@@ -2108,7 +2138,7 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader,
      lower_subroutine(shader->ir, state);

      if (!ctx->Cache || force_recompile)
-         opt_shader_and_create_symbol_table(ctx, shader);
+         opt_shader_and_create_symbol_table(ctx, state->symbols, shader);
      else {
         reparent_ir(shader->ir, shader->ir);
         shader->CompileStatus = compiled_no_opts;
@@ -2221,6 +2251,24 @@ do_common_optimization(exec_list *ir, bool linked,
            loop_progress = false;
            loop_progress |= do_constant_propagation(ir);
            loop_progress |= do_if_simplification(ir);
+
+            /* Some drivers only call do_common_optimization() once rather
+             * than in a loop. So we must call do_lower_jumps() after
+             * unrolling a loop because for drivers that use LLVM validation
+             * will fail if a jump is not the last instruction in the block.
+             * For example the following will fail LLVM validation:
+             *
+             *   (loop (
+             *      ...
+             *   break
+             *   (assign  (x) (var_ref v124)  (expression int + (var_ref v124)
+             *      (constant int (1)) ) )
+             *   ))
+             */
+            loop_progress |= do_lower_jumps(ir, true, true,
+                                            options->EmitNoMainReturn,
+                                            options->EmitNoCont,
+                                            options->EmitNoLoops);
         }
         progress |= loop_progress;
      }
--- a/src/compiler/glsl/glsl_parser_extras.h
+++ b/src/compiler/glsl/glsl_parser_extras.h
@@ -948,6 +948,11 @@ extern int glcpp_preprocess(void *ctx, const char **shader, char **info_log,
 extern void _mesa_destroy_shader_compiler(void);
 extern void _mesa_destroy_shader_compiler_caches(void);

+extern void
+_mesa_glsl_copy_symbols_from_table(struct exec_list *shader_ir,
+                                   struct glsl_symbol_table *src,
+                                   struct glsl_symbol_table *dest);
+
 #ifdef __cplusplus
 }
 #endif
--- a/src/compiler/glsl/ir.h
+++ b/src/compiler/glsl/ir.h
@@ -2412,10 +2412,6 @@ extern void
 _mesa_glsl_initialize_variables(exec_list *instructions,
 				struct _mesa_glsl_parse_state *state);

-extern void
-_mesa_glsl_initialize_derived_variables(struct gl_context *ctx,
-                                        gl_shader *shader);
-
 extern void
 reparent_ir(exec_list *list, void *mem_ctx);

--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -166,6 +166,7 @@ void optimize_dead_builtin_variables(exec_list *instructions,
 bool lower_tess_level(gl_linked_shader *shader);

 bool lower_vertex_id(gl_linked_shader *shader);
+bool lower_cs_derived(gl_linked_shader *shader);
 bool lower_blend_equation_advanced(gl_linked_shader *shader);

 bool lower_subroutine(exec_list *instructions, struct _mesa_glsl_parse_state *state);
--- a/src/compiler/glsl/link_atomics.cpp
+++ b/src/compiler/glsl/link_atomics.cpp
@@ -207,7 +207,7 @@ link_assign_atomic_counter_resources(struct gl_context *ctx,
   active_atomic_buffer *abs =
      find_active_atomic_counters(ctx, prog, &num_buffers);

-   prog->data->AtomicBuffers = rzalloc_array(prog, gl_active_atomic_buffer,
+   prog->data->AtomicBuffers = rzalloc_array(prog->data, gl_active_atomic_buffer,
                                             num_buffers);
   prog->data->NumAtomicBuffers = num_buffers;

@@ -270,7 +270,7 @@ link_assign_atomic_counter_resources(struct gl_context *ctx,
         struct gl_program *gl_prog = prog->_LinkedShaders[j]->Program;
         gl_prog->info.num_abos = num_atomic_buffers[j];
         gl_prog->sh.AtomicBuffers =
-            rzalloc_array(prog, gl_active_atomic_buffer *,
+            rzalloc_array(gl_prog, gl_active_atomic_buffer *,
                          num_atomic_buffers[j]);

         unsigned intra_stage_idx = 0;
--- a/src/compiler/glsl/link_interface_blocks.cpp
+++ b/src/compiler/glsl/link_interface_blocks.cpp
@@ -364,6 +364,35 @@ validate_interstage_inout_blocks(struct gl_shader_program *prog,
                                   consumer->Stage != MESA_SHADER_FRAGMENT) ||
                                  consumer->Stage == MESA_SHADER_GEOMETRY;

+   /* Check that block re-declarations of gl_PerVertex are compatible
+    * across shaders: From OpenGL Shading Language 4.5, section
+    * "7.1 Built-In Language Variables", page 130 of the PDF:
+    *
+    *    "If multiple shaders using members of a built-in block belonging
+    *     to the same interface are linked together in the same program,
+    *     they must all redeclare the built-in block in the same way, as
+    *     described in section 4.3.9 “Interface Blocks” for interface-block
+    *     matching, or a link-time error will result."
+    *
+    * This is done explicitly outside of iterating the member variable
+    * declarations because it is possible that the variables are not used and
+    * so they would have been optimised out.
+    */
+   const glsl_type *consumer_iface =
+      consumer->symbols->get_interface("gl_PerVertex",
+                                       ir_var_shader_in);
+
+   const glsl_type *producer_iface =
+      producer->symbols->get_interface("gl_PerVertex",
+                                       ir_var_shader_out);
+
+   if (producer_iface && consumer_iface &&
+       interstage_member_mismatch(prog, consumer_iface, producer_iface)) {
+      linker_error(prog, "Incompatible or missing gl_PerVertex re-declaration "
+                   "in consecutive shaders");
+      return;
+   }
+
   /* Add output interfaces from the producer to the symbol table. */
   foreach_in_list(ir_instruction, node, producer->ir) {
      ir_variable *var = node->as_variable();
--- a/src/compiler/glsl/link_uniforms.cpp
+++ b/src/compiler/glsl/link_uniforms.cpp
@@ -1333,7 +1333,7 @@ link_assign_uniform_storage(struct gl_context *ctx,

   union gl_constant_value *data;
   if (prog->data->UniformStorage == NULL) {
-      prog->data->UniformStorage = rzalloc_array(prog,
+      prog->data->UniformStorage = rzalloc_array(prog->data,
                                                 struct gl_uniform_storage,
                                                 prog->data->NumUniformStorage);
      data = rzalloc_array(prog->data->UniformStorage,
@@ -1400,13 +1400,6 @@ link_assign_uniform_storage(struct gl_context *ctx,
             sizeof(shader->Program->sh.SamplerTargets));
   }

-   /* If this is a fallback compile for a cache miss we already have the
-    * correct uniform mappings and we don't want to reinitialise uniforms so
-    * just return now.
-    */
-   if (prog->data->cache_fallback)
-      return;
-
 #ifndef NDEBUG
   for (unsigned i = 0; i < prog->data->NumUniformStorage; i++) {
      assert(prog->data->UniformStorage[i].storage != NULL ||
@@ -1431,11 +1424,9 @@ void
 link_assign_uniform_locations(struct gl_shader_program *prog,
                              struct gl_context *ctx)
 {
-   if (!prog->data->cache_fallback) {
-      ralloc_free(prog->data->UniformStorage);
-      prog->data->UniformStorage = NULL;
-      prog->data->NumUniformStorage = 0;
-   }
+   ralloc_free(prog->data->UniformStorage);
+   prog->data->UniformStorage = NULL;
+   prog->data->NumUniformStorage = 0;

   if (prog->UniformHash != NULL) {
      prog->UniformHash->clear();
--- a/src/compiler/glsl/link_varyings.cpp
+++ b/src/compiler/glsl/link_varyings.cpp
@@ -165,10 +165,12 @@ process_xfb_layout_qualifiers(void *mem_ctx, const gl_linked_shader *sh,

         if (var->data.from_named_ifc_block) {
            type = var->get_interface_type();
+
            /* Find the member type before it was altered by lowering */
+            const glsl_type *type_wa = type->without_array();
            member_type =
-               type->fields.structure[type->field_index(var->name)].type;
-            name = ralloc_strdup(NULL, type->without_array()->name);
+               type_wa->fields.structure[type_wa->field_index(var->name)].type;
+            name = ralloc_strdup(NULL, type_wa->name);
         } else {
            type = var->type;
            member_type = NULL;
@@ -189,7 +191,8 @@ process_xfb_layout_qualifiers(void *mem_ctx, const gl_linked_shader *sh,
 * matching input to another stage.
 */
 static void
-cross_validate_types_and_qualifiers(struct gl_shader_program *prog,
+cross_validate_types_and_qualifiers(struct gl_context *ctx,
+                                    struct gl_shader_program *prog,
                                    const ir_variable *input,
                                    const ir_variable *output,
                                    gl_shader_stage consumer_stage,
@@ -343,17 +346,30 @@ cross_validate_types_and_qualifiers(struct gl_shader_program *prog,
   }
   if (input_interpolation != output_interpolation &&
       prog->data->Version < 440) {
-      linker_error(prog,
-                   "%s shader output `%s' specifies %s "
-                   "interpolation qualifier, "
-                   "but %s shader input specifies %s "
-                   "interpolation qualifier\n",
-                   _mesa_shader_stage_to_string(producer_stage),
-                   output->name,
-                   interpolation_string(output->data.interpolation),
-                   _mesa_shader_stage_to_string(consumer_stage),
-                   interpolation_string(input->data.interpolation));
-      return;
+      if (!ctx->Const.AllowGLSLCrossStageInterpolationMismatch) {
+         linker_error(prog,
+                      "%s shader output `%s' specifies %s "
+                      "interpolation qualifier, "
+                      "but %s shader input specifies %s "
+                      "interpolation qualifier\n",
+                      _mesa_shader_stage_to_string(producer_stage),
+                      output->name,
+                      interpolation_string(output->data.interpolation),
+                      _mesa_shader_stage_to_string(consumer_stage),
+                      interpolation_string(input->data.interpolation));
+         return;
+      } else {
+         linker_warning(prog,
+                        "%s shader output `%s' specifies %s "
+                        "interpolation qualifier, "
+                        "but %s shader input specifies %s "
+                        "interpolation qualifier\n",
+                        _mesa_shader_stage_to_string(producer_stage),
+                        output->name,
+                        interpolation_string(output->data.interpolation),
+                        _mesa_shader_stage_to_string(consumer_stage),
+                        interpolation_string(input->data.interpolation));
+      }
   }
 }

@@ -361,7 +377,8 @@ cross_validate_types_and_qualifiers(struct gl_shader_program *prog,
 * Validate front and back color outputs against single color input
 */
 static void
-cross_validate_front_and_back_color(struct gl_shader_program *prog,
+cross_validate_front_and_back_color(struct gl_context *ctx,
+                                    struct gl_shader_program *prog,
                                    const ir_variable *input,
                                    const ir_variable *front_color,
                                    const ir_variable *back_color,
@@ -369,11 +386,11 @@ cross_validate_front_and_back_color(struct gl_shader_program *prog,
                                    gl_shader_stage producer_stage)
 {
   if (front_color != NULL && front_color->data.assigned)
-      cross_validate_types_and_qualifiers(prog, input, front_color,
+      cross_validate_types_and_qualifiers(ctx, prog, input, front_color,
                                          consumer_stage, producer_stage);

   if (back_color != NULL && back_color->data.assigned)
-      cross_validate_types_and_qualifiers(prog, input, back_color,
+      cross_validate_types_and_qualifiers(ctx, prog, input, back_color,
                                          consumer_stage, producer_stage);
 }

@@ -526,7 +543,7 @@ cross_validate_outputs_to_inputs(struct gl_context *ctx,
         const ir_variable *const back_color =
            parameters.get_variable("gl_BackColor");

-         cross_validate_front_and_back_color(prog, input,
+         cross_validate_front_and_back_color(ctx, prog, input,
                                             front_color, back_color,
                                             consumer->Stage, producer->Stage);
      } else if (strcmp(input->name, "gl_SecondaryColor") == 0 && input->data.used) {
@@ -536,7 +553,7 @@ cross_validate_outputs_to_inputs(struct gl_context *ctx,
         const ir_variable *const back_color =
            parameters.get_variable("gl_BackSecondaryColor");

-         cross_validate_front_and_back_color(prog, input,
+         cross_validate_front_and_back_color(ctx, prog, input,
                                             front_color, back_color,
                                             consumer->Stage, producer->Stage);
      } else {
@@ -579,7 +596,7 @@ cross_validate_outputs_to_inputs(struct gl_context *ctx,
             */
            if (!(input->get_interface_type() &&
                  output->get_interface_type()))
-               cross_validate_types_and_qualifiers(prog, input, output,
+               cross_validate_types_and_qualifiers(ctx, prog, input, output,
                                                   consumer->Stage,
                                                   producer->Stage);
         } else {
@@ -1171,7 +1188,6 @@ store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog,
      if (has_xfb_qualifiers) {
         for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
            if (prog->TransformFeedback.BufferStride[j]) {
-               buffers |= 1 << j;
               explicit_stride[j] = true;
               xfb_prog->sh.LinkedTransformFeedback->Buffers[j].Stride =
                  prog->TransformFeedback.BufferStride[j] / 4;
@@ -1196,10 +1212,24 @@ store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog,
            num_buffers++;
            buffer_stream_id = -1;
            continue;
-         } else if (tfeedback_decls[i].is_varying()) {
+         }
+
+         if (has_xfb_qualifiers) {
+            buffer = tfeedback_decls[i].get_buffer();
+         } else {
+            buffer = num_buffers;
+         }
+
+         if (tfeedback_decls[i].is_varying()) {
            if (buffer_stream_id == -1)  {
               /* First varying writing to this buffer: remember its stream */
               buffer_stream_id = (int) tfeedback_decls[i].get_stream_id();
+
+               /* Only mark a buffer as active when there is a varying
+                * attached to it. This behaviour is based on a revised version
+                * of section 13.2.2 of the GL 4.6 spec.
+                */
+               buffers |= 1 << buffer;
            } else if (buffer_stream_id !=
                       (int) tfeedback_decls[i].get_stream_id()) {
               /* Varying writes to the same buffer from a different stream */
@@ -1215,13 +1245,6 @@ store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog,
            }
         }

-         if (has_xfb_qualifiers) {
-            buffer = tfeedback_decls[i].get_buffer();
-         } else {
-            buffer = num_buffers;
-         }
-         buffers |= 1 << buffer;
-
         if (!tfeedback_decls[i].store(ctx, prog,
                                       xfb_prog->sh.LinkedTransformFeedback,
                                       buffer, num_buffers, num_outputs,
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -1111,20 +1111,21 @@ cross_validate_globals(struct gl_shader_program *prog,
            return;
         }

-         /* Only in GLSL ES 3.10, the precision qualifier should not match
-          * between block members defined in matched block names within a
-          * shader interface.
-          *
-          * In GLSL ES 3.00 and ES 3.20, precision qualifier for each block
-          * member should match.
+         /* Check the precision qualifier matches for uniform variables on
+          * GLSL ES.
          */
-         if (prog->IsES && (prog->data->Version != 310 ||
-                            !var->get_interface_type()) &&
+         if (prog->IsES && !var->get_interface_type() &&
             existing->data.precision != var->data.precision) {
-            linker_error(prog, "declarations for %s `%s` have "
-                         "mismatching precision qualifiers\n",
-                         mode_string(var), var->name);
-            return;
+            if ((existing->data.used && var->data.used) || prog->data->Version >= 300) {
+               linker_error(prog, "declarations for %s `%s` have "
+                            "mismatching precision qualifiers\n",
+                            mode_string(var), var->name);
+               return;
+            } else {
+               linker_warning(prog, "declarations for %s `%s` have "
+                              "mismatching precision qualifiers\n",
+                              mode_string(var), var->name);
+            }
         }
      } else
         variables->add_variable(var);
@@ -1195,8 +1196,8 @@ interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog,
      }

      for (unsigned int j = 0; j < sh_num_blocks; j++) {
-         int index = link_cross_validate_uniform_block(prog, &blks, num_blks,
-                                                       sh_blks[j]);
+         int index = link_cross_validate_uniform_block(prog->data, &blks,
+                                                       num_blks, sh_blks[j]);

         if (index == -1) {
            linker_error(prog, "buffer block `%s' has mismatching "
@@ -1255,21 +1256,11 @@ interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog,
 * Populates a shaders symbol table with all global declarations
 */
 static void
-populate_symbol_table(gl_linked_shader *sh)
+populate_symbol_table(gl_linked_shader *sh, glsl_symbol_table *symbols)
 {
   sh->symbols = new(sh) glsl_symbol_table;

-   foreach_in_list(ir_instruction, inst, sh->ir) {
-      ir_variable *var;
-      ir_function *func;
-
-      if ((func = inst->as_function()) != NULL) {
-         sh->symbols->add_function(func);
-      } else if ((var = inst->as_variable()) != NULL) {
-         if (var->data.mode != ir_var_temporary)
-            sh->symbols->add_variable(var);
-      }
-   }
+   _mesa_glsl_copy_symbols_from_table(sh->ir, symbols, sh->symbols);
 }


@@ -2268,8 +2259,7 @@ link_intrastage_shaders(void *mem_ctx,
      return NULL;
   }

-   if (!prog->data->cache_fallback)
-      _mesa_reference_shader_program_data(ctx, &gl_prog->sh.data, prog->data);
+   _mesa_reference_shader_program_data(ctx, &gl_prog->sh.data, prog->data);

   /* Don't use _mesa_reference_program() just take ownership */
   linked->Program = gl_prog;
@@ -2288,7 +2278,7 @@ link_intrastage_shaders(void *mem_ctx,

   link_bindless_layout_qualifiers(prog, shader_list, num_shaders);

-   populate_symbol_table(linked);
+   populate_symbol_table(linked, shader_list[0]->symbols);

   /* The pointer to the main function in the final linked shader (i.e., the
    * copy of the original shader that contained the main function).
@@ -2326,35 +2316,33 @@ link_intrastage_shaders(void *mem_ctx,
   v.run(linked->ir);
   v.fixup_unnamed_interface_types();

-   if (!prog->data->cache_fallback) {
-      /* Link up uniform blocks defined within this stage. */
-      link_uniform_blocks(mem_ctx, ctx, prog, linked, &ubo_blocks,
-                          &num_ubo_blocks, &ssbo_blocks, &num_ssbo_blocks);
+   /* Link up uniform blocks defined within this stage. */
+   link_uniform_blocks(mem_ctx, ctx, prog, linked, &ubo_blocks,
+                       &num_ubo_blocks, &ssbo_blocks, &num_ssbo_blocks);

-      if (!prog->data->LinkStatus) {
-         _mesa_delete_linked_shader(ctx, linked);
-         return NULL;
-      }
-
-      /* Copy ubo blocks to linked shader list */
-      linked->Program->sh.UniformBlocks =
-         ralloc_array(linked, gl_uniform_block *, num_ubo_blocks);
-      ralloc_steal(linked, ubo_blocks);
-      for (unsigned i = 0; i < num_ubo_blocks; i++) {
-         linked->Program->sh.UniformBlocks[i] = &ubo_blocks[i];
-      }
-      linked->Program->info.num_ubos = num_ubo_blocks;
-
-      /* Copy ssbo blocks to linked shader list */
-      linked->Program->sh.ShaderStorageBlocks =
-         ralloc_array(linked, gl_uniform_block *, num_ssbo_blocks);
-      ralloc_steal(linked, ssbo_blocks);
-      for (unsigned i = 0; i < num_ssbo_blocks; i++) {
-         linked->Program->sh.ShaderStorageBlocks[i] = &ssbo_blocks[i];
-      }
-      linked->Program->info.num_ssbos = num_ssbo_blocks;
+   if (!prog->data->LinkStatus) {
+      _mesa_delete_linked_shader(ctx, linked);
+      return NULL;
   }

+   /* Copy ubo blocks to linked shader list */
+   linked->Program->sh.UniformBlocks =
+      ralloc_array(linked, gl_uniform_block *, num_ubo_blocks);
+   ralloc_steal(linked, ubo_blocks);
+   for (unsigned i = 0; i < num_ubo_blocks; i++) {
+      linked->Program->sh.UniformBlocks[i] = &ubo_blocks[i];
+   }
+   linked->Program->info.num_ubos = num_ubo_blocks;
+
+   /* Copy ssbo blocks to linked shader list */
+   linked->Program->sh.ShaderStorageBlocks =
+      ralloc_array(linked, gl_uniform_block *, num_ssbo_blocks);
+   ralloc_steal(linked, ssbo_blocks);
+   for (unsigned i = 0; i < num_ssbo_blocks; i++) {
+      linked->Program->sh.ShaderStorageBlocks[i] = &ssbo_blocks[i];
+   }
+   linked->Program->info.num_ssbos = num_ssbo_blocks;
+
   /* At this point linked should contain all of the linked IR, so
    * validate it to make sure nothing went wrong.
    */
@@ -2374,6 +2362,9 @@ link_intrastage_shaders(void *mem_ctx,
   if (ctx->Const.VertexID_is_zero_based)
      lower_vertex_id(linked);

+   if (ctx->Const.LowerCsDerivedVariables)
+      lower_cs_derived(linked);
+
 #ifdef DEBUG
   /* Compute the source checksum. */
   linked->SourceChecksum = 0;
@@ -3614,7 +3605,7 @@ add_program_resource(struct gl_shader_program *prog,
      return true;

   prog->data->ProgramResourceList =
-      reralloc(prog,
+      reralloc(prog->data,
               prog->data->ProgramResourceList,
               gl_program_resource,
               prog->data->NumProgramResourceList + 1);
@@ -3809,6 +3800,7 @@ add_shader_variable(const struct gl_context *ctx,
                    GLenum programInterface, ir_variable *var,
                    const char *name, const glsl_type *type,
                    bool use_implicit_location, int location,
+                    bool inouts_share_location,
                    const glsl_type *outermost_struct_type = NULL)
 {
   const glsl_type *interface_type = var->get_interface_type();
@@ -3871,7 +3863,7 @@ add_shader_variable(const struct gl_context *ctx,
                                  stage_mask, programInterface,
                                  var, field_name, field->type,
                                  use_implicit_location, field_location,
-                                  outermost_struct_type))
+                                  false, outermost_struct_type))
            return false;

         field_location += field->type->count_attribute_slots(false);
@@ -3879,6 +3871,43 @@ add_shader_variable(const struct gl_context *ctx,
      return true;
   }

+   case GLSL_TYPE_ARRAY: {
+      /* The ARB_program_interface_query spec says:
+       *
+       *     "For an active variable declared as an array of basic types, a
+       *      single entry will be generated, with its name string formed by
+       *      concatenating the name of the array and the string "[0]"."
+       *
+       *     "For an active variable declared as an array of an aggregate data
+       *      type (structures or arrays), a separate entry will be generated
+       *      for each active array element, unless noted immediately below.
+       *      The name of each entry is formed by concatenating the name of
+       *      the array, the "[" character, an integer identifying the element
+       *      number, and the "]" character.  These enumeration rules are
+       *      applied recursively, treating each enumerated array element as a
+       *      separate active variable."
+       */
+      const struct glsl_type *array_type = type->fields.array;
+      if (array_type->base_type == GLSL_TYPE_STRUCT ||
+          array_type->base_type == GLSL_TYPE_ARRAY) {
+         unsigned elem_location = location;
+         unsigned stride = inouts_share_location ? 0 :
+                           array_type->count_attribute_slots(false);
+         for (unsigned i = 0; i < type->length; i++) {
+            char *elem = ralloc_asprintf(shProg, "%s[%d]", name, i);
+            if (!add_shader_variable(ctx, shProg, resource_set,
+                                     stage_mask, programInterface,
+                                     var, elem, array_type,
+                                     use_implicit_location, elem_location,
+                                     false, outermost_struct_type))
+               return false;
+            elem_location += stride;
+         }
+         return true;
+      }
+      /* fallthrough */
+   }
+
   default: {
      /* The ARB_program_interface_query spec says:
       *
@@ -3899,6 +3928,20 @@ add_shader_variable(const struct gl_context *ctx,
   }
 }

+static bool
+inout_has_same_location(const ir_variable *var, unsigned stage)
+{
+   if (!var->data.patch &&
+       ((var->data.mode == ir_var_shader_out &&
+         stage == MESA_SHADER_TESS_CTRL) ||
+        (var->data.mode == ir_var_shader_in &&
+         (stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_TESS_EVAL ||
+          stage == MESA_SHADER_GEOMETRY))))
+      return true;
+   else
+      return false;
+}
+
 static bool
 add_interface_variables(const struct gl_context *ctx,
                        struct gl_shader_program *shProg,
@@ -3955,7 +3998,8 @@ add_interface_variables(const struct gl_context *ctx,
      if (!add_shader_variable(ctx, shProg, resource_set,
                               1 << stage, programInterface,
                               var, var->name, var->type, vs_input_or_fs_output,
-                               var->data.location - loc_bias))
+                               var->data.location - loc_bias,
+                               inout_has_same_location(var, stage)))
         return false;
   }
   return true;
@@ -3993,7 +4037,8 @@ add_packed_varyings(const struct gl_context *ctx,
            if (!add_shader_variable(ctx, shProg, resource_set,
                                     stage_mask,
                                     iface, var, var->name, var->type, false,
-                                     var->data.location - VARYING_SLOT_VAR0))
+                                     var->data.location - VARYING_SLOT_VAR0,
+                                     inout_has_same_location(var, stage)))
               return false;
         }
      }
@@ -4019,7 +4064,8 @@ add_fragdata_arrays(const struct gl_context *ctx,
         if (!add_shader_variable(ctx, shProg, resource_set,
                                  1 << MESA_SHADER_FRAGMENT,
                                  GL_PROGRAM_OUTPUT, var, var->name, var->type,
-                                  true, var->data.location - FRAG_RESULT_DATA0))
+                                  true, var->data.location - FRAG_RESULT_DATA0,
+                                  false))
            return false;
      }
   }
@@ -4585,14 +4631,12 @@ link_and_validate_uniforms(struct gl_context *ctx,
   update_array_sizes(prog);
   link_assign_uniform_locations(prog, ctx);

-   if (!prog->data->cache_fallback) {
-      link_assign_atomic_counter_resources(ctx, prog);
-      link_calculate_subroutine_compat(prog);
-      check_resources(ctx, prog);
-      check_subroutine_resources(prog);
-      check_image_resources(ctx, prog);
-      link_check_atomic_counter_resources(ctx, prog);
-   }
+   link_assign_atomic_counter_resources(ctx, prog);
+   link_calculate_subroutine_compat(prog);
+   check_resources(ctx, prog);
+   check_subroutine_resources(prog);
+   check_image_resources(ctx, prog);
+   link_check_atomic_counter_resources(ctx, prog);
 }

 static bool
@@ -4906,10 +4950,8 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
      last = i;
   }

-   if (!prog->data->cache_fallback) {
-      check_explicit_uniform_locations(ctx, prog);
-      link_assign_subroutine_types(prog);
-   }
+   check_explicit_uniform_locations(ctx, prog);
+   link_assign_subroutine_types(prog);

   if (!prog->data->LinkStatus)
      goto done;
@@ -4964,15 +5006,13 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
   if (prog->SeparateShader)
      disable_varying_optimizations_for_sso(prog);

-   if (!prog->data->cache_fallback) {
-      /* Process UBOs */
-      if (!interstage_cross_validate_uniform_blocks(prog, false))
-         goto done;
+   /* Process UBOs */
+   if (!interstage_cross_validate_uniform_blocks(prog, false))
+      goto done;

-      /* Process SSBOs */
-      if (!interstage_cross_validate_uniform_blocks(prog, true))
-         goto done;
-   }
+   /* Process SSBOs */
+   if (!interstage_cross_validate_uniform_blocks(prog, true))
+      goto done;

   /* Do common optimization before assigning storage for attributes,
    * uniforms, and varyings.  Later optimization could possibly make
--- a/src/compiler/glsl/loop_unroll.cpp
+++ b/src/compiler/glsl/loop_unroll.cpp
@@ -519,7 +519,7 @@ loop_unroll_visitor::visit_leave(ir_loop *ir)
    * isn't any additional unknown terminators, or any other jumps nested
    * inside futher ifs.
    */
-   if (ls->num_loop_jumps != 2)
+   if (ls->num_loop_jumps != 2 || ls->terminators.length() != 2)
      return visit_continue;

   ir_instruction *first_ir =
@@ -528,8 +528,6 @@ loop_unroll_visitor::visit_leave(ir_loop *ir)
   unsigned term_count = 0;
   bool first_term_then_continue = false;
   foreach_in_list(loop_terminator, t, &ls->terminators) {
-      assert(term_count < 2);
-
      ir_if *ir_if = t->ir->as_if();
      assert(ir_if != NULL);

--- a/src/compiler/glsl/lower_buffer_access.cpp
+++ b/src/compiler/glsl/lower_buffer_access.cpp
@@ -72,16 +72,22 @@ lower_buffer_access::emit_access(void *mem_ctx,
            new(mem_ctx) ir_dereference_record(deref->clone(mem_ctx, NULL),
                                               field->name);

-         field_offset =
-            glsl_align(field_offset,
-                       field->type->std140_base_alignment(row_major));
+         unsigned field_align;
+         if (packing == GLSL_INTERFACE_PACKING_STD430)
+            field_align = field->type->std430_base_alignment(row_major);
+         else
+            field_align = field->type->std140_base_alignment(row_major);
+         field_offset = glsl_align(field_offset, field_align);

         emit_access(mem_ctx, is_write, field_deref, base_offset,
                     deref_offset + field_offset,
                     row_major, 1, packing,
                     writemask_for_size(field_deref->type->vector_elements));

-         field_offset += field->type->std140_size(row_major);
+         if (packing == GLSL_INTERFACE_PACKING_STD430)
+            field_offset += field->type->std430_size(row_major);
+         else
+            field_offset += field->type->std140_size(row_major);
      }
      return;
   }
--- a/src/compiler/glsl/lower_cs_derived.cpp
+++ b/src/compiler/glsl/lower_cs_derived.cpp
@@ -0,0 +1,234 @@
+/*
+ * Copyright © 2017 Ilia Mirkin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file lower_cs_derived.cpp
+ *
+ * For hardware that does not support the gl_GlobalInvocationID and
+ * gl_LocalInvocationIndex system values, replace them with fresh
+ * globals. Note that we can't rely on gl_WorkGroupSize or
+ * gl_LocalGroupSizeARB being available, since they may only have been defined
+ * in a non-main shader.
+ *
+ * [ This can happen if only a secondary shader has the layout(local_size_*)
+ *   declaration. ]
+ *
+ * This is meant to be run post-linking.
+ */
+
+#include "glsl_symbol_table.h"
+#include "ir_hierarchical_visitor.h"
+#include "ir.h"
+#include "ir_builder.h"
+#include "linker.h"
+#include "program/prog_statevars.h"
+#include "builtin_functions.h"
+
+using namespace ir_builder;
+
+namespace {
+
+class lower_cs_derived_visitor : public ir_hierarchical_visitor {
+public:
+   explicit lower_cs_derived_visitor(gl_linked_shader *shader)
+      : progress(false),
+        shader(shader),
+        local_size_variable(shader->Program->info.cs.local_size_variable),
+        gl_WorkGroupSize(NULL),
+        gl_WorkGroupID(NULL),
+        gl_LocalInvocationID(NULL),
+        gl_GlobalInvocationID(NULL),
+        gl_LocalInvocationIndex(NULL)
+   {
+      main_sig = _mesa_get_main_function_signature(shader->symbols);
+      assert(main_sig);
+   }
+
+   virtual ir_visitor_status visit(ir_dereference_variable *);
+
+   ir_variable *add_system_value(
+         int slot, const glsl_type *type, const char *name);
+   void find_sysvals();
+   void make_gl_GlobalInvocationID();
+   void make_gl_LocalInvocationIndex();
+
+   bool progress;
+
+private:
+   gl_linked_shader *shader;
+   bool local_size_variable;
+   ir_function_signature *main_sig;
+
+   ir_rvalue *gl_WorkGroupSize;
+   ir_variable *gl_WorkGroupID;
+   ir_variable *gl_LocalInvocationID;
+
+   ir_variable *gl_GlobalInvocationID;
+   ir_variable *gl_LocalInvocationIndex;
+};
+
+} /* anonymous namespace */
+
+ir_variable *
+lower_cs_derived_visitor::add_system_value(
+      int slot, const glsl_type *type, const char *name)
+{
+   ir_variable *var = new(shader) ir_variable(type, name, ir_var_system_value);
+   var->data.how_declared = ir_var_declared_implicitly;
+   var->data.read_only = true;
+   var->data.location = slot;
+   var->data.explicit_location = true;
+   var->data.explicit_index = 0;
+   shader->ir->push_head(var);
+
+   return var;
+}
+
+void
+lower_cs_derived_visitor::find_sysvals()
+{
+   if (gl_WorkGroupSize != NULL)
+      return;
+
+   ir_variable *WorkGroupSize;
+   if (local_size_variable)
+      WorkGroupSize = shader->symbols->get_variable("gl_LocalGroupSizeARB");
+   else
+      WorkGroupSize = shader->symbols->get_variable("gl_WorkGroupSize");
+   if (WorkGroupSize)
+      gl_WorkGroupSize = new(shader) ir_dereference_variable(WorkGroupSize);
+   gl_WorkGroupID = shader->symbols->get_variable("gl_WorkGroupID");
+   gl_LocalInvocationID = shader->symbols->get_variable("gl_LocalInvocationID");
+
+   /*
+    * These may be missing due to either dead code elimination, or, in the
+    * case of the group size, due to the layout being declared in a non-main
+    * shader. Re-create them.
+    */
+
+   if (!gl_WorkGroupID)
+      gl_WorkGroupID = add_system_value(
+            SYSTEM_VALUE_WORK_GROUP_ID, glsl_type::uvec3_type, "gl_WorkGroupID");
+   if (!gl_LocalInvocationID)
+      gl_LocalInvocationID = add_system_value(
+            SYSTEM_VALUE_LOCAL_INVOCATION_ID, glsl_type::uvec3_type,
+            "gl_LocalInvocationID");
+   if (!WorkGroupSize) {
+      if (local_size_variable) {
+         gl_WorkGroupSize = new(shader) ir_dereference_variable(
+               add_system_value(
+                     SYSTEM_VALUE_LOCAL_GROUP_SIZE, glsl_type::uvec3_type,
+                     "gl_LocalGroupSizeARB"));
+      } else {
+         ir_constant_data data;
+         memset(&data, 0, sizeof(data));
+         for (int i = 0; i < 3; i++)
+            data.u[i] = shader->Program->info.cs.local_size[i];
+         gl_WorkGroupSize = new(shader) ir_constant(glsl_type::uvec3_type, &data);
+      }
+   }
+}
+
+void
+lower_cs_derived_visitor::make_gl_GlobalInvocationID()
+{
+   if (gl_GlobalInvocationID != NULL)
+      return;
+
+   find_sysvals();
+
+   /* gl_GlobalInvocationID =
+    *    gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID
+    */
+   gl_GlobalInvocationID = new(shader) ir_variable(
+         glsl_type::uvec3_type, "__GlobalInvocationID", ir_var_temporary);
+   shader->ir->push_head(gl_GlobalInvocationID);
+
+   ir_instruction *inst =
+      assign(gl_GlobalInvocationID,
+             add(mul(gl_WorkGroupID, gl_WorkGroupSize->clone(shader, NULL)),
+                 gl_LocalInvocationID));
+   main_sig->body.push_head(inst);
+}
+
+void
+lower_cs_derived_visitor::make_gl_LocalInvocationIndex()
+{
+   if (gl_LocalInvocationIndex != NULL)
+      return;
+
+   find_sysvals();
+
+   /* gl_LocalInvocationIndex =
+    *    gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y +
+    *    gl_LocalInvocationID.y * gl_WorkGroupSize.x +
+    *    gl_LocalInvocationID.x;
+    */
+   gl_LocalInvocationIndex = new(shader)
+      ir_variable(glsl_type::uint_type, "__LocalInvocationIndex", ir_var_temporary);
+   shader->ir->push_head(gl_LocalInvocationIndex);
+
+   ir_expression *index_z =
+      mul(mul(swizzle_z(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize->clone(shader, NULL))),
+          swizzle_y(gl_WorkGroupSize->clone(shader, NULL)));
+   ir_expression *index_y =
+      mul(swizzle_y(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize->clone(shader, NULL)));
+   ir_expression *index_y_plus_z = add(index_y, index_z);
+   operand index_x(swizzle_x(gl_LocalInvocationID));
+   ir_expression *index_x_plus_y_plus_z = add(index_y_plus_z, index_x);
+   ir_instruction *inst =
+      assign(gl_LocalInvocationIndex, index_x_plus_y_plus_z);
+   main_sig->body.push_head(inst);
+}
+
+ir_visitor_status
+lower_cs_derived_visitor::visit(ir_dereference_variable *ir)
+{
+   if (ir->var->data.mode == ir_var_system_value &&
+       ir->var->data.location == SYSTEM_VALUE_GLOBAL_INVOCATION_ID) {
+      make_gl_GlobalInvocationID();
+      ir->var = gl_GlobalInvocationID;
+      progress = true;
+   }
+
+   if (ir->var->data.mode == ir_var_system_value &&
+       ir->var->data.location == SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) {
+      make_gl_LocalInvocationIndex();
+      ir->var = gl_LocalInvocationIndex;
+      progress = true;
+   }
+
+   return visit_continue;
+}
+
+bool
+lower_cs_derived(gl_linked_shader *shader)
+{
+   if (shader->Stage != MESA_SHADER_COMPUTE)
+      return false;
+
+   lower_cs_derived_visitor v(shader);
+   v.run(shader->ir);
+
+   return v.progress;
+}
--- a/src/compiler/glsl/lower_named_interface_blocks.cpp
+++ b/src/compiler/glsl/lower_named_interface_blocks.cpp
@@ -115,6 +115,7 @@ public:
   void run(exec_list *instructions);

   virtual ir_visitor_status visit_leave(ir_assignment *);
+   virtual ir_visitor_status visit_leave(ir_expression *);
   virtual void handle_rvalue(ir_rvalue **rvalue);
 };

@@ -238,6 +239,23 @@ flatten_named_interface_blocks_declarations::visit_leave(ir_assignment *ir)
   return rvalue_visit(ir);
 }

+ir_visitor_status
+flatten_named_interface_blocks_declarations::visit_leave(ir_expression *ir)
+{
+   ir_visitor_status status = rvalue_visit(ir);
+
+   if (ir->operation == ir_unop_interpolate_at_centroid ||
+       ir->operation == ir_binop_interpolate_at_offset ||
+       ir->operation == ir_binop_interpolate_at_sample) {
+      const ir_rvalue *val = ir->operands[0];
+
+      /* This disables varying packing for this input. */
+      val->variable_referenced()->data.must_be_shader_input = 1;
+   }
+
+   return status;
+}
+
 void
 flatten_named_interface_blocks_declarations::handle_rvalue(ir_rvalue **rvalue)
 {
--- a/src/compiler/glsl/lower_vec_index_to_cond_assign.cpp
+++ b/src/compiler/glsl/lower_vec_index_to_cond_assign.cpp
@@ -128,7 +128,36 @@ ir_vec_index_to_cond_assign_visitor::convert_vector_extract_to_cond_assign(ir_rv
 {
   ir_expression *const expr = ir->as_expression();

-   if (expr == NULL || expr->operation != ir_binop_vector_extract)
+   if (expr == NULL)
+      return ir;
+
+   if (expr->operation == ir_unop_interpolate_at_centroid ||
+       expr->operation == ir_binop_interpolate_at_offset ||
+       expr->operation == ir_binop_interpolate_at_sample) {
+      /* Lower interpolateAtXxx(some_vec[idx], ...) to
+       * interpolateAtXxx(some_vec, ...)[idx] before lowering to conditional
+       * assignments, to maintain the rule that the interpolant is an l-value
+       * referring to a (part of a) shader input.
+       *
+       * This is required when idx is dynamic (otherwise it gets lowered to
+       * a swizzle).
+       */
+      ir_expression *const interpolant = expr->operands[0]->as_expression();
+      if (!interpolant || interpolant->operation != ir_binop_vector_extract)
+         return ir;
+
+      ir_rvalue *vec_input = interpolant->operands[0];
+      ir_expression *const vec_interpolate =
+         new(base_ir) ir_expression(expr->operation, vec_input->type,
+                                    vec_input, expr->operands[1]);
+
+      return convert_vec_index_to_cond_assign(ralloc_parent(ir),
+                                              vec_interpolate,
+                                              interpolant->operands[1],
+                                              ir->type);
+   }
+
+   if (expr->operation != ir_binop_vector_extract)
      return ir;

   return convert_vec_index_to_cond_assign(ralloc_parent(ir),
--- a/src/compiler/glsl/meson.build
+++ b/src/compiler/glsl/meson.build
@@ -124,6 +124,7 @@ files_libglsl = files(
  'lower_buffer_access.cpp',
  'lower_buffer_access.h',
  'lower_const_arrays_to_uniforms.cpp',
+  'lower_cs_derived.cpp',
  'lower_discard.cpp',
  'lower_discard_flow.cpp',
  'lower_distance.cpp',
--- a/src/compiler/glsl/opt_dead_builtin_variables.cpp
+++ b/src/compiler/glsl/opt_dead_builtin_variables.cpp
@@ -62,23 +62,6 @@ optimize_dead_builtin_variables(exec_list *instructions,
       * information, so removing these variables from the user shader will
       * cause problems later.
       *
-       * For compute shaders, gl_GlobalInvocationID has some dependencies, so
-       * we avoid removing these dependencies.
-       *
-       * We also avoid removing gl_GlobalInvocationID at this stage because it
-       * might be used by a linked shader. In this case it still needs to be
-       * initialized by the main function.
-       *
-       *    gl_GlobalInvocationID =
-       *       gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID
-       *
-       * Similarly, we initialize gl_LocalInvocationIndex in the main function:
-       *
-       *    gl_LocalInvocationIndex =
-       *       gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y +
-       *       gl_LocalInvocationID.y * gl_WorkGroupSize.x +
-       *       gl_LocalInvocationID.x;
-       *
       * Matrix uniforms with "Transpose" are not eliminated because there's
       * an optimization pass that can turn references to the regular matrix
       * into references to the transpose matrix.  Eliminating the transpose
@@ -90,11 +73,6 @@ optimize_dead_builtin_variables(exec_list *instructions,
       */
      if (strcmp(var->name, "gl_ModelViewProjectionMatrix") == 0
          || strcmp(var->name, "gl_Vertex") == 0
-          || strcmp(var->name, "gl_WorkGroupID") == 0
-          || strcmp(var->name, "gl_WorkGroupSize") == 0
-          || strcmp(var->name, "gl_LocalInvocationID") == 0
-          || strcmp(var->name, "gl_GlobalInvocationID") == 0
-          || strcmp(var->name, "gl_LocalInvocationIndex") == 0
          || strstr(var->name, "Transpose") != NULL)
         continue;

--- a/src/compiler/glsl/shader_cache.cpp
+++ b/src/compiler/glsl/shader_cache.cpp
@@ -670,7 +670,7 @@ read_uniforms(struct blob_reader *metadata, struct gl_shader_program *prog)
   prog->data->NumUniformStorage = blob_read_uint32(metadata);
   prog->data->NumUniformDataSlots = blob_read_uint32(metadata);

-   uniforms = rzalloc_array(prog, struct gl_uniform_storage,
+   uniforms = rzalloc_array(prog->data, struct gl_uniform_storage,
                            prog->data->NumUniformStorage);
   prog->data->UniformStorage = uniforms;

@@ -1126,7 +1126,7 @@ read_program_resource_list(struct blob_reader *metadata,
   prog->data->NumProgramResourceList = blob_read_uint32(metadata);

   prog->data->ProgramResourceList =
-      ralloc_array(prog, gl_program_resource,
+      ralloc_array(prog->data, gl_program_resource,
                   prog->data->NumProgramResourceList);

   for (unsigned i = 0; i < prog->data->NumProgramResourceList; i++) {
@@ -1448,7 +1448,7 @@ shader_cache_read_program_metadata(struct gl_context *ctx,
      return false;

   struct disk_cache *cache = ctx->Cache;
-   if (!cache || prog->data->cache_fallback || prog->data->skip_cache)
+   if (!cache || prog->data->skip_cache)
      return false;

   /* Include bindings when creating sha1. These bindings change the resulting
--- a/src/compiler/glsl/tests/array_refcount_test.cpp
+++ b/src/compiler/glsl/tests/array_refcount_test.cpp
@@ -628,7 +628,7 @@ TEST_F(array_refcount_test, visit_array_indexing_an_array)

   ir_array_refcount_entry *const entry_c = v.get_variable_entry(var_c);

-   for (unsigned i = 0; i < var_c->type->array_size(); i++) {
+   for (int i = 0; i < var_c->type->array_size(); i++) {
      EXPECT_EQ(true, entry_c->is_linearized_index_referenced(i)) <<
         "array c, i = " << i;
   }
--- a/src/compiler/glsl/tests/uniform_initializer_utils.cpp
+++ b/src/compiler/glsl/tests/uniform_initializer_utils.cpp
@@ -198,6 +198,22 @@ generate_array_data(void *mem_ctx, enum glsl_base_type base_type,
   val = new(mem_ctx) ir_constant(array_type, &values_for_array);
 }

+static uint64_t
+uint64_storage(union gl_constant_value *storage)
+{
+   uint64_t val;
+   memcpy(&val, &storage->i, sizeof(uint64_t));
+   return val;
+}
+
+static uint64_t
+double_storage(union gl_constant_value *storage)
+{
+   double val;
+   memcpy(&val, &storage->i, sizeof(double));
+   return val;
+}
+
 /**
 * Verify that the data stored for the uniform matches the initializer
 *
@@ -246,13 +262,13 @@ verify_data(gl_constant_value *storage, unsigned storage_array_size,
 	    EXPECT_EQ(val->value.b[i] ? boolean_true : 0, storage[i].i);
 	    break;
 	 case GLSL_TYPE_DOUBLE:
-	    EXPECT_EQ(val->value.d[i], *(double *)&storage[i*2].i);
+	    EXPECT_EQ(val->value.d[i], double_storage(&storage[i*2]));
 	    break;
 	 case GLSL_TYPE_UINT64:
-	    EXPECT_EQ(val->value.u64[i], *(uint64_t *)&storage[i*2].i);
+            EXPECT_EQ(val->value.u64[i], uint64_storage(&storage[i*2]));
 	    break;
 	 case GLSL_TYPE_INT64:
-	    EXPECT_EQ(val->value.i64[i], *(int64_t *)&storage[i*2].i);
+	    EXPECT_EQ(val->value.i64[i], uint64_storage(&storage[i*2]));
 	    break;
         case GLSL_TYPE_ATOMIC_UINT:
 	 case GLSL_TYPE_STRUCT:
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -41,9 +41,9 @@
 #include "compiler/shader_info.h"
 #include <stdio.h>

-#ifdef DEBUG
+#ifndef NDEBUG
 #include "util/debug.h"
-#endif /* DEBUG */
+#endif /* NDEBUG */

 #include "nir_opcodes.h"

@@ -1214,7 +1214,6 @@ typedef struct {
    *    - nir_texop_txf_ms
    *    - nir_texop_txs
    *    - nir_texop_lod
-    *    - nir_texop_tg4
    *    - nir_texop_query_levels
    *    - nir_texop_texture_samples
    *    - nir_texop_samples_identical
@@ -2342,7 +2341,7 @@ static inline void nir_metadata_set_validation_flag(nir_shader *shader) { (void)
 static inline void nir_metadata_check_validation_flag(nir_shader *shader) { (void) shader; }
 static inline bool should_clone_nir(void) { return false; }
 static inline bool should_print_nir(void) { return false; }
-#endif /* DEBUG */
+#endif /* NDEBUG */

 #define _PASS(nir, do_pass) do {                                     \
   do_pass                                                           \
--- a/src/compiler/nir/nir_intrinsics.h
+++ b/src/compiler/nir/nir_intrinsics.h
@@ -434,9 +434,9 @@ INTRINSIC(load_interpolated_input, 2, ARR(2, 1), true, 0, 0,
 /* src[] = { buffer_index, offset }. No const_index */
 LOAD(ssbo, 2, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
 /* src[] = { offset }. const_index[] = { base, component } */
-LOAD(output, 1, 1, BASE, COMPONENT, xx, NIR_INTRINSIC_CAN_ELIMINATE)
+LOAD(output, 1, 2, BASE, COMPONENT, xx, NIR_INTRINSIC_CAN_ELIMINATE)
 /* src[] = { vertex, offset }. const_index[] = { base, component } */
-LOAD(per_vertex_output, 2, 1, BASE, COMPONENT, xx, NIR_INTRINSIC_CAN_ELIMINATE)
+LOAD(per_vertex_output, 2, 2, BASE, COMPONENT, xx, NIR_INTRINSIC_CAN_ELIMINATE)
 /* src[] = { offset }. const_index[] = { base } */
 LOAD(shared, 1, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
 /* src[] = { offset }. const_index[] = { base, range } */
--- a/src/compiler/nir/nir_lower_indirect_derefs.c
+++ b/src/compiler/nir/nir_lower_indirect_derefs.c
@@ -95,9 +95,15 @@ emit_load_store(nir_builder *b, nir_intrinsic_instr *orig_instr,
   if (src == NULL) {
      /* This is a load instruction */
      nir_intrinsic_instr *load =
-         nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var);
+         nir_intrinsic_instr_create(b->shader, orig_instr->intrinsic);
      load->num_components = orig_instr->num_components;
      load->variables[0] = nir_deref_var_clone(deref, load);
+
+      /* Copy over any sources.  This is needed for interp_var_at */
+      for (unsigned i = 0;
+           i < nir_intrinsic_infos[orig_instr->intrinsic].num_srcs; i++)
+         nir_src_copy(&load->src[i], &orig_instr->src[i], load);
+
      unsigned bit_size = orig_instr->dest.ssa.bit_size;
      nir_ssa_dest_init(&load->instr, &load->dest,
                        load->num_components, bit_size, NULL);
@@ -142,6 +148,9 @@ lower_indirect_block(nir_block *block, nir_builder *b,

      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
      if (intrin->intrinsic != nir_intrinsic_load_var &&
+          intrin->intrinsic != nir_intrinsic_interp_var_at_centroid &&
+          intrin->intrinsic != nir_intrinsic_interp_var_at_sample &&
+          intrin->intrinsic != nir_intrinsic_interp_var_at_offset &&
          intrin->intrinsic != nir_intrinsic_store_var)
         continue;

@@ -158,7 +167,7 @@ lower_indirect_block(nir_block *block, nir_builder *b,

      b->cursor = nir_before_instr(&intrin->instr);

-      if (intrin->intrinsic == nir_intrinsic_load_var) {
+      if (intrin->intrinsic != nir_intrinsic_store_var) {
         nir_ssa_def *result;
         emit_load_store(b, intrin, intrin->variables[0],
                         &intrin->variables[0]->deref, &result, NULL);
--- a/src/compiler/nir/nir_lower_vars_to_ssa.c
+++ b/src/compiler/nir/nir_lower_vars_to_ssa.c
@@ -464,7 +464,7 @@ lower_copies_to_load_store(struct deref_node *node,

         struct set_entry *arg_entry = _mesa_set_search(arg_node->copies, copy);
         assert(arg_entry);
-         _mesa_set_remove(node->copies, arg_entry);
+         _mesa_set_remove(arg_node->copies, arg_entry);
      }

      nir_instr_remove(&copy->instr);
--- a/src/compiler/nir/nir_lower_vec_to_movs.c
+++ b/src/compiler/nir/nir_lower_vec_to_movs.c
@@ -230,6 +230,7 @@ lower_vec_to_movs_block(nir_block *block, nir_function_impl *impl)
         continue; /* The loop */
      }

+      bool vec_had_ssa_dest = vec->dest.dest.is_ssa;
      if (vec->dest.dest.is_ssa) {
         /* Since we insert multiple MOVs, we have a register destination. */
         nir_register *reg = nir_local_reg_create(impl);
@@ -263,7 +264,11 @@ lower_vec_to_movs_block(nir_block *block, nir_function_impl *impl)
         if (!(vec->dest.write_mask & (1 << i)))
            continue;

-         if (!(finished_write_mask & (1 << i)))
+         /* Coalescing moves the register writes from the vec up to the ALU
+          * instruction in the source.  We can only do this if the original
+          * vecN had an SSA destination.
+          */
+         if (vec_had_ssa_dest && !(finished_write_mask & (1 << i)))
            finished_write_mask |= try_coalesce(vec, i);

         if (!(finished_write_mask & (1 << i)))
--- a/src/compiler/nir/nir_metadata.c
+++ b/src/compiler/nir/nir_metadata.c
@@ -59,7 +59,7 @@ nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved)
   impl->valid_metadata &= preserved;
 }

-#ifdef DEBUG
+#ifndef NDEBUG
 /**
 * Make sure passes properly invalidate metadata (part 1).
 *
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -397,8 +397,8 @@ binop("umul_high", tuint32, commutative,
      "(uint32_t)(((uint64_t) src0 * (uint64_t) src1) >> 32)")

 binop("fdiv", tfloat, "", "src0 / src1")
-binop("idiv", tint, "", "src0 / src1")
-binop("udiv", tuint, "", "src0 / src1")
+binop("idiv", tint, "", "src1 == 0 ? 0 : (src0 / src1)")
+binop("udiv", tuint, "", "src1 == 0 ? 0 : (src0 / src1)")

 # returns a boolean representing the carry resulting from the addition of
 # the two unsigned arguments.
@@ -717,12 +717,12 @@ opcode("bitfield_insert", 0, tuint32, [0, 0, 0, 0],
 unsigned base = src0, insert = src1;
 int offset = src2, bits = src3;
 if (bits == 0) {
-   dst = 0;
+   dst = base;
 } else if (offset < 0 || bits < 0 || bits + offset > 32) {
   dst = 0;
 } else {
   unsigned mask = ((1ull << bits) - 1) << offset;
-   dst = (base & ~mask) | ((insert << bits) & mask);
+   dst = (base & ~mask) | ((insert << offset) & mask);
 }
 """)

--- a/src/compiler/nir/nir_opt_intrinsics.c
+++ b/src/compiler/nir/nir_opt_intrinsics.c
@@ -28,6 +28,26 @@
 * \file nir_opt_intrinsics.c
 */

+static nir_ssa_def *
+high_subgroup_mask(nir_builder *b,
+                   nir_ssa_def *count,
+                   uint64_t base_mask)
+{
+   /* group_mask could probably be calculated more efficiently but we want to
+    * be sure not to shift by 64 if the subgroup size is 64 because the GLSL
+    * shift operator is undefined in that case. In any case if we were worried
+    * about efficency this should probably be done further down because the
+    * subgroup size is likely to be known at compile time.
+    */
+   nir_ssa_def *subgroup_size = nir_load_subgroup_size(b);
+   nir_ssa_def *all_bits = nir_imm_int64(b, ~0ull);
+   nir_ssa_def *shift = nir_isub(b, nir_imm_int(b, 64), subgroup_size);
+   nir_ssa_def *group_mask = nir_ushr(b, all_bits, shift);
+   nir_ssa_def *higher_bits = nir_ishl(b, nir_imm_int64(b, base_mask), count);
+
+   return nir_iand(b, higher_bits, group_mask);
+}
+
 static bool
 opt_intrinsics_impl(nir_function_impl *impl)
 {
@@ -95,10 +115,10 @@ opt_intrinsics_impl(nir_function_impl *impl)
               replacement = nir_ishl(&b, nir_imm_int64(&b, 1ull), count);
               break;
            case nir_intrinsic_load_subgroup_ge_mask:
-               replacement = nir_ishl(&b, nir_imm_int64(&b, ~0ull), count);
+               replacement = high_subgroup_mask(&b, count, ~0ull);
               break;
            case nir_intrinsic_load_subgroup_gt_mask:
-               replacement = nir_ishl(&b, nir_imm_int64(&b, ~1ull), count);
+               replacement = high_subgroup_mask(&b, count, ~1ull);
               break;
            case nir_intrinsic_load_subgroup_le_mask:
               replacement = nir_inot(&b, nir_ishl(&b, nir_imm_int64(&b, ~1ull), count));
--- a/src/compiler/nir/nir_opt_loop_unroll.c
+++ b/src/compiler/nir/nir_opt_loop_unroll.c
@@ -39,10 +39,10 @@
 #define LOOP_UNROLL_LIMIT 96

 /* Prepare this loop for unrolling by first converting to lcssa and then
- * converting the phis from the loops first block and the block that follows
- * the loop into regs.  Partially converting out of SSA allows us to unroll
- * the loop without having to keep track of and update phis along the way
- * which gets tricky and doesn't add much value over conveting to regs.
+ * converting the phis from the top level of the loop body to regs.
+ * Partially converting out of SSA allows us to unroll the loop without having
+ * to keep track of and update phis along the way which gets tricky and
+ * doesn't add much value over converting to regs.
 *
 * The loop may have a continue instruction at the end of the loop which does
 * nothing.  Once we're out of SSA, we can safely delete it so we don't have
@@ -53,13 +53,20 @@ loop_prepare_for_unroll(nir_loop *loop)
 {
   nir_convert_loop_to_lcssa(loop);

-   nir_lower_phis_to_regs_block(nir_loop_first_block(loop));
+   /* Lower phis at the top level of the loop body */
+   foreach_list_typed_safe(nir_cf_node, node, node, &loop->body) {
+      if (nir_cf_node_block == node->type) {
+         nir_lower_phis_to_regs_block(nir_cf_node_as_block(node));
+      }
+   }

+   /* Lower phis after the loop */
   nir_block *block_after_loop =
      nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node));

   nir_lower_phis_to_regs_block(block_after_loop);

+   /* Remove continue if its the last instruction in the loop */
   nir_instr *last_instr = nir_block_last_instr(nir_loop_last_block(loop));
   if (last_instr && last_instr->type == nir_instr_type_jump) {
      assert(nir_instr_as_jump(last_instr)->type == nir_jump_continue);
--- a/src/compiler/nir/nir_print.c
+++ b/src/compiler/nir/nir_print.c
@@ -457,7 +457,7 @@ print_var_decl(nir_variable *var, print_state *state)
      switch (var->data.mode) {
      case nir_var_shader_in:
      case nir_var_shader_out:
-         if (num_components != 4 && num_components != 0) {
+         if (num_components < 4 && num_components != 0) {
            const char *xyzw = "xyzw";
            for (int i = 0; i < num_components; i++)
               components_local[i + 1] = xyzw[i + var->data.location_frac];
--- a/src/compiler/nir/nir_validate.c
+++ b/src/compiler/nir/nir_validate.c
@@ -35,7 +35,7 @@
 /* Since this file is just a pile of asserts, don't bother compiling it if
 * we're not building a debug build.
 */
-#ifdef DEBUG
+#ifndef NDEBUG

 /*
 * Per-register validation state.
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -1490,6 +1490,8 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
      struct vtn_value *val =
         vtn_push_value(b, w[2], vtn_value_type_sampled_image);
      val->sampled_image = ralloc(b, struct vtn_sampled_image);
+      val->sampled_image->type =
+         vtn_value(b, w[1], vtn_value_type_type)->type;
      val->sampled_image->image =
         vtn_value(b, w[3], vtn_value_type_pointer)->pointer;
      val->sampled_image->sampler =
@@ -1516,19 +1518,14 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
      sampled = *sampled_val->sampled_image;
   } else {
      assert(sampled_val->value_type == vtn_value_type_pointer);
+      sampled.type = sampled_val->pointer->type;
      sampled.image = NULL;
      sampled.sampler = sampled_val->pointer;
   }

-   const struct glsl_type *image_type;
-   if (sampled.image) {
-      image_type = sampled.image->var->var->interface_type;
-   } else {
-      image_type = sampled.sampler->var->var->interface_type;
-   }
+   const struct glsl_type *image_type = sampled.type->type;
   const enum glsl_sampler_dim sampler_dim = glsl_get_sampler_dim(image_type);
   const bool is_array = glsl_sampler_type_is_array(image_type);
-   const bool is_shadow = glsl_sampler_type_is_shadow(image_type);

   /* Figure out the base texture operation */
   nir_texop texop;
@@ -1652,6 +1649,7 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
      break;
   }

+   bool is_shadow = false;
   unsigned gather_component = 0;
   switch (opcode) {
   case SpvOpImageSampleDrefImplicitLod:
@@ -1660,6 +1658,7 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
   case SpvOpImageSampleProjDrefExplicitLod:
   case SpvOpImageDrefGather:
      /* These all have an explicit depth value as their next source */
+      is_shadow = true;
      (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_comparator);
      break;

@@ -1757,6 +1756,7 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
   case nir_texop_txb:
   case nir_texop_txl:
   case nir_texop_txd:
+   case nir_texop_tg4:
      /* These operations require a sampler */
      instr->sampler = nir_deref_var_clone(sampler, instr);
      break;
@@ -1764,7 +1764,6 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
   case nir_texop_txf_ms:
   case nir_texop_txs:
   case nir_texop_lod:
-   case nir_texop_tg4:
   case nir_texop_query_levels:
   case nir_texop_texture_samples:
   case nir_texop_samples_identical:
@@ -2802,7 +2801,8 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode,

   case SpvOpMemoryModel:
      assert(w[1] == SpvAddressingModelLogical);
-      assert(w[2] == SpvMemoryModelGLSL450);
+      assert(w[2] == SpvMemoryModelSimple ||
+             w[2] == SpvMemoryModelGLSL450);
      break;

   case SpvOpEntryPoint: {
--- a/src/compiler/spirv/vtn_private.h
+++ b/src/compiler/spirv/vtn_private.h
@@ -411,6 +411,7 @@ struct vtn_image_pointer {
 };

 struct vtn_sampled_image {
+   struct vtn_type *type;
   struct vtn_pointer *image; /* Image or array of images */
   struct vtn_pointer *sampler; /* Sampler */
 };
--- a/src/compiler/spirv/vtn_variables.c
+++ b/src/compiler/spirv/vtn_variables.c
@@ -518,35 +518,37 @@ vtn_pointer_to_offset(struct vtn_builder *b, struct vtn_pointer *ptr,
   *index_out = get_vulkan_resource_index(b, ptr, &type, &idx);

   nir_ssa_def *offset = nir_imm_int(&b->nb, 0);
-   for (; idx < ptr->chain->length; idx++) {
-      enum glsl_base_type base_type = glsl_get_base_type(type->type);
-      switch (base_type) {
-      case GLSL_TYPE_UINT:
-      case GLSL_TYPE_INT:
-      case GLSL_TYPE_UINT64:
-      case GLSL_TYPE_INT64:
-      case GLSL_TYPE_FLOAT:
-      case GLSL_TYPE_DOUBLE:
-      case GLSL_TYPE_BOOL:
-      case GLSL_TYPE_ARRAY:
-         offset = nir_iadd(&b->nb, offset,
-                           vtn_access_link_as_ssa(b, ptr->chain->link[idx],
-                                                  type->stride));
+   if (ptr->chain) {
+      for (; idx < ptr->chain->length; idx++) {
+         enum glsl_base_type base_type = glsl_get_base_type(type->type);
+         switch (base_type) {
+         case GLSL_TYPE_UINT:
+         case GLSL_TYPE_INT:
+         case GLSL_TYPE_UINT64:
+         case GLSL_TYPE_INT64:
+         case GLSL_TYPE_FLOAT:
+         case GLSL_TYPE_DOUBLE:
+         case GLSL_TYPE_BOOL:
+         case GLSL_TYPE_ARRAY:
+            offset = nir_iadd(&b->nb, offset,
+                              vtn_access_link_as_ssa(b, ptr->chain->link[idx],
+                                                   type->stride));

-         type = type->array_element;
-         break;
+            type = type->array_element;
+            break;

-      case GLSL_TYPE_STRUCT: {
-         assert(ptr->chain->link[idx].mode == vtn_access_mode_literal);
-         unsigned member = ptr->chain->link[idx].id;
-         offset = nir_iadd(&b->nb, offset,
-                           nir_imm_int(&b->nb, type->offsets[member]));
-         type = type->members[member];
-         break;
-      }
+         case GLSL_TYPE_STRUCT: {
+            assert(ptr->chain->link[idx].mode == vtn_access_mode_literal);
+            unsigned member = ptr->chain->link[idx].id;
+            offset = nir_iadd(&b->nb, offset,
+                              nir_imm_int(&b->nb, type->offsets[member]));
+            type = type->members[member];
+            break;
+         }

-      default:
-         unreachable("Invalid type for deref");
+         default:
+            unreachable("Invalid type for deref");
+         }
      }
   }

@@ -1805,6 +1807,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
         struct vtn_value *val =
            vtn_push_value(b, w[2], vtn_value_type_sampled_image);
         val->sampled_image = ralloc(b, struct vtn_sampled_image);
+         val->sampled_image->type = base_val->sampled_image->type;
         val->sampled_image->image =
            vtn_pointer_dereference(b, base_val->sampled_image->image, chain);
         val->sampled_image->sampler = base_val->sampled_image->sampler;
--- a/src/egl/Makefile.am
+++ b/src/egl/Makefile.am
@@ -46,7 +46,6 @@ libEGL_common_la_SOURCES = \
 	$(LIBEGL_C_FILES)

 libEGL_common_la_LIBADD = \
-	$(top_builddir)/src/mapi/shared-glapi/libglapi.la \
 	$(top_builddir)/src/util/libmesautil.la \
 	$(EGL_LIB_DEPS)

@@ -165,7 +164,9 @@ libEGL_mesa_la_SOURCES = \
 	main/egldispatchstubs.c \
 	g_egldispatchstubs.c \
 	g_egldispatchstubs.h
-libEGL_mesa_la_LIBADD = libEGL_common.la
+libEGL_mesa_la_LIBADD = \
+	libEGL_common.la \
+	$(top_builddir)/src/mapi/shared-glapi/libglapi.la
 libEGL_mesa_la_LDFLAGS = \
 	-no-undefined \
 	-version-number 0 \
@@ -177,7 +178,9 @@ else # USE_LIBGLVND

 lib_LTLIBRARIES = libEGL.la
 libEGL_la_SOURCES =
-libEGL_la_LIBADD = libEGL_common.la
+libEGL_la_LIBADD = \
+	libEGL_common.la \
+	$(top_builddir)/src/mapi/shared-glapi/libglapi.la
 libEGL_la_LDFLAGS = \
 	-no-undefined \
 	-version-number 1:0 \
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -904,10 +904,6 @@ dri2_initialize(_EGLDriver *drv, _EGLDisplay *disp)
      return EGL_TRUE;
   }

-   /* not until swrast_dri is supported */
-   if (disp->Options.UseFallback)
-      return EGL_FALSE;
-
   switch (disp->Platform) {
 #ifdef HAVE_SURFACELESS_PLATFORM
   case _EGL_PLATFORM_SURFACELESS:
@@ -977,7 +973,7 @@ dri2_display_destroy(_EGLDisplay *disp)
   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);

   if (dri2_dpy->own_dri_screen) {
-      if (dri2_dpy->vtbl->close_screen_notify)
+      if (dri2_dpy->vtbl && dri2_dpy->vtbl->close_screen_notify)
         dri2_dpy->vtbl->close_screen_notify(disp);
      dri2_dpy->core->destroyScreen(dri2_dpy->dri_screen);
   }
--- a/src/egl/drivers/dri2/egl_dri2.h
+++ b/src/egl/drivers/dri2/egl_dri2.h
@@ -44,7 +44,7 @@

 #ifdef HAVE_WAYLAND_PLATFORM
 #include <wayland-client.h>
-#include "wayland-egl-backend.h"
+#include "wayland/wayland-egl/wayland-egl-backend.h"
 /* forward declarations of protocol elements */
 struct zwp_linux_dmabuf_v1;
 #endif
--- a/src/egl/drivers/dri2/platform_android.c
+++ b/src/egl/drivers/dri2/platform_android.c
@@ -1136,12 +1136,16 @@ static const __DRIextension *droid_image_loader_extensions[] = {
 };

 EGLBoolean
-dri2_initialize_android(_EGLDriver *drv, _EGLDisplay *dpy)
+dri2_initialize_android(_EGLDriver *drv, _EGLDisplay *disp)
 {
   struct dri2_egl_display *dri2_dpy;
   const char *err;
   int ret;

+   /* Not supported yet */
+   if (disp->Options.UseFallback)
+      return EGL_FALSE;
+
   loader_set_logger(_eglLog);

   dri2_dpy = calloc(1, sizeof(*dri2_dpy));
@@ -1156,7 +1160,7 @@ dri2_initialize_android(_EGLDriver *drv, _EGLDisplay *dpy)
      goto cleanup;
   }

-   dpy->DriverData = (void *) dri2_dpy;
+   disp->DriverData = (void *) dri2_dpy;

   dri2_dpy->fd = droid_open_device(dri2_dpy);
   if (dri2_dpy->fd < 0) {
@@ -1176,41 +1180,41 @@ dri2_initialize_android(_EGLDriver *drv, _EGLDisplay *dpy)
    * the __DRI_DRI2_LOADER extension */
   if (!dri2_dpy->is_render_node) {
      dri2_dpy->loader_extensions = droid_dri2_loader_extensions;
-      if (!dri2_load_driver(dpy)) {
+      if (!dri2_load_driver(disp)) {
         err = "DRI2: failed to load driver";
         goto cleanup;
      }
   } else {
      dri2_dpy->loader_extensions = droid_image_loader_extensions;
-      if (!dri2_load_driver_dri3(dpy)) {
+      if (!dri2_load_driver_dri3(disp)) {
         err = "DRI3: failed to load driver";
         goto cleanup;
      }
   }

-   if (!dri2_create_screen(dpy)) {
+   if (!dri2_create_screen(disp)) {
      err = "DRI2: failed to create screen";
      goto cleanup;
   }

-   if (!dri2_setup_extensions(dpy)) {
+   if (!dri2_setup_extensions(disp)) {
      err = "DRI2: failed to setup extensions";
      goto cleanup;
   }

-   dri2_setup_screen(dpy);
+   dri2_setup_screen(disp);

-   if (!droid_add_configs_for_visuals(drv, dpy)) {
+   if (!droid_add_configs_for_visuals(drv, disp)) {
      err = "DRI2: failed to add configs";
      goto cleanup;
   }

-   dpy->Extensions.ANDROID_framebuffer_target = EGL_TRUE;
-   dpy->Extensions.ANDROID_image_native_buffer = EGL_TRUE;
-   dpy->Extensions.ANDROID_recordable = EGL_TRUE;
-   dpy->Extensions.EXT_buffer_age = EGL_TRUE;
+   disp->Extensions.ANDROID_framebuffer_target = EGL_TRUE;
+   disp->Extensions.ANDROID_image_native_buffer = EGL_TRUE;
+   disp->Extensions.ANDROID_recordable = EGL_TRUE;
+   disp->Extensions.EXT_buffer_age = EGL_TRUE;
 #if ANDROID_API_LEVEL >= 23
-   dpy->Extensions.KHR_partial_update = EGL_TRUE;
+   disp->Extensions.KHR_partial_update = EGL_TRUE;
 #endif

   /* Fill vtbl last to prevent accidentally calling virtual function during
@@ -1221,6 +1225,6 @@ dri2_initialize_android(_EGLDriver *drv, _EGLDisplay *dpy)
   return EGL_TRUE;

 cleanup:
-   dri2_display_destroy(dpy);
+   dri2_display_destroy(disp);
   return _eglError(EGL_NOT_INITIALIZED, err);
 }
--- a/src/egl/drivers/dri2/platform_drm.c
+++ b/src/egl/drivers/dri2/platform_drm.c
@@ -652,6 +652,10 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp)
   struct gbm_device *gbm;
   const char *err;

+   /* Not supported yet */
+   if (disp->Options.UseFallback)
+      return EGL_FALSE;
+
   loader_set_logger(_eglLog);

   dri2_dpy = calloc(1, sizeof *dri2_dpy);
--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -671,6 +671,35 @@ static const struct wl_callback_listener throttle_listener = {
   .done = wayland_throttle_callback
 };

+static EGLBoolean
+get_fourcc(struct dri2_egl_display *dri2_dpy,
+           __DRIimage *image, int *fourcc)
+{
+   EGLBoolean query;
+   int dri_format;
+
+   query = dri2_dpy->image->queryImage(image, __DRI_IMAGE_ATTRIB_FOURCC,
+                                       fourcc);
+   if (query)
+      return true;
+
+   query = dri2_dpy->image->queryImage(image, __DRI_IMAGE_ATTRIB_FORMAT,
+                                       &dri_format);
+   if (!query)
+      return false;
+
+   switch (dri_format) {
+   case __DRI_IMAGE_FORMAT_ARGB8888:
+      *fourcc = __DRI_IMAGE_FOURCC_ARGB8888;
+      return true;
+   case __DRI_IMAGE_FORMAT_XRGB8888:
+      *fourcc = __DRI_IMAGE_FOURCC_XRGB8888;
+      return true;
+   default:
+      return false;
+   }
+}
+
 static struct wl_buffer *
 create_wl_buffer(struct dri2_egl_display *dri2_dpy,
                 struct dri2_egl_surface *dri2_surf,
@@ -684,8 +713,7 @@ create_wl_buffer(struct dri2_egl_display *dri2_dpy,
   query = dri2_dpy->image->queryImage(image, __DRI_IMAGE_ATTRIB_WIDTH, &width);
   query &= dri2_dpy->image->queryImage(image, __DRI_IMAGE_ATTRIB_HEIGHT,
                                        &height);
-   query &= dri2_dpy->image->queryImage(image, __DRI_IMAGE_ATTRIB_FOURCC,
-                                        &fourcc);
+   query &= get_fourcc(dri2_dpy, image, &fourcc);
   if (!query)
      return NULL;

@@ -1199,8 +1227,8 @@ dri2_wl_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *disp)
      int has_format;
      unsigned int rgba_masks[4];
   } visuals[] = {
-      { "XRGB8888", HAS_XRGB8888, { 0xff0000, 0xff00, 0x00ff, 0xff000000 } },
-      { "ARGB8888", HAS_ARGB8888, { 0xff0000, 0xff00, 0x00ff, 0 } },
+      { "XRGB8888", HAS_XRGB8888, { 0xff0000, 0xff00, 0x00ff, 0 } },
+      { "ARGB8888", HAS_ARGB8888, { 0xff0000, 0xff00, 0x00ff, 0xff000000 } },
      { "RGB565",   HAS_RGB565,   { 0x00f800, 0x07e0, 0x001f, 0 } },
   };
   unsigned int format_count[ARRAY_SIZE(visuals)] = { 0 };
@@ -1692,7 +1720,7 @@ dri2_wl_swrast_commit_backbuffer(struct dri2_egl_surface *dri2_surf)
    * handle the commit and send a release event before checking for a free
    * buffer */
   if (dri2_surf->throttle_callback == NULL) {
-      dri2_surf->throttle_callback = wl_display_sync(dri2_dpy->wl_dpy_wrapper);
+      dri2_surf->throttle_callback = wl_display_sync(dri2_surf->wl_dpy_wrapper);
      wl_callback_add_listener(dri2_surf->throttle_callback,
                               &throttle_listener, dri2_surf);
   }
--- a/src/egl/drivers/dri2/platform_x11_dri3.c
+++ b/src/egl/drivers/dri2/platform_x11_dri3.c
@@ -75,6 +75,17 @@ egl_dri3_get_dri_context(struct loader_dri3_drawable *draw)
   return dri2_ctx->dri_context;
 }

+static __DRIscreen *
+egl_dri3_get_dri_screen(void)
+{
+   _EGLContext *ctx = _eglGetCurrentContext();
+   struct dri2_egl_context *dri2_ctx;
+   if (!ctx)
+      return NULL;
+   dri2_ctx = dri2_egl_context(ctx);
+   return dri2_egl_display(dri2_ctx->base.Resource.Display)->dri_screen;
+}
+
 static void
 egl_dri3_flush_drawable(struct loader_dri3_drawable *draw, unsigned flags)
 {
@@ -88,6 +99,7 @@ static const struct loader_dri3_vtable egl_dri3_vtable = {
   .set_drawable_size = egl_dri3_set_drawable_size,
   .in_current_context = egl_dri3_in_current_context,
   .get_dri_context = egl_dri3_get_dri_context,
+   .get_dri_screen = egl_dri3_get_dri_screen,
   .flush_drawable = egl_dri3_flush_drawable,
   .show_fps = NULL,
 };
--- a/src/egl/meson.build
+++ b/src/egl/meson.build
@@ -21,7 +21,9 @@
 c_args_for_egl = []
 link_for_egl = []
 deps_for_egl = []
-incs_for_egl = []
+incs_for_egl = [
+  inc_include, inc_src, inc_loader, inc_gbm, include_directories('main'),
+]
 files_egl = files(
  'main/eglapi.c',
  'main/eglapi.h',
@@ -159,10 +161,7 @@ libegl = shared_library(
    '-D_EGL_BUILT_IN_DRIVER_DRI2',
    '-D_EGL_NATIVE_PLATFORM=_EGL_PLATFORM_@0@'.format(egl_native_platform.to_upper()),
  ],
-  include_directories : [
-    incs_for_egl, inc_include, inc_src, inc_loader, inc_gbm,
-    include_directories('main'),
-  ],
+  include_directories : incs_for_egl,
  link_with : [link_for_egl, libloader, libxmlconfig, libglapi, libmesa_util],
  link_args : [ld_args_bsymbolic, ld_args_gc_sections],
  dependencies : [deps_for_egl, dep_dl, dep_libdrm, dep_clock, dep_thread],
--- a/src/egl/wayland/wayland-egl/Makefile.am
+++ b/src/egl/wayland/wayland-egl/Makefile.am
@@ -3,7 +3,7 @@ pkgconfig_DATA = wayland-egl.pc

 AM_CFLAGS = $(DEFINES) \
 	    $(VISIBILITY_CFLAGS) \
-	    $(WAYLAND_SERVER_CFLAGS)
+	    $(WAYLAND_CLIENT_CFLAGS)

 lib_LTLIBRARIES = libwayland-egl.la
 noinst_HEADERS = wayland-egl-backend.h
--- a/Show More
+++ b/Show More