docs: add release notes for 17.2.2

Signed-off-by: Juan A. Suarez Romero <jasuarez@igalia.com>
Update version to 17.2.2
2017-10-02 17:26:10 +02:00 · 2017-10-02 17:15:13 +02:00 · 2017-10-02 17:12:17 +02:00 · 2017-10-02 17:12:17 +02:00 · 2017-10-02 17:12:17 +02:00 · 2017-09-28 13:54:58 +00:00
83 changed files with 1947 additions and 1035 deletions
--- a/.travis.yml
+++ b/.travis.yml
@@ -40,6 +40,7 @@ matrix:
        - GALLIUM_ST="--enable-dri --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx --disable-gallium-osmesa"
        - GALLIUM_DRIVERS=""
        - VULKAN_DRIVERS=""
+        - LIBUNWIND_FLAGS="--disable-libunwind"
      addons:
        apt:
          packages:
@@ -66,6 +67,7 @@ matrix:
        - GALLIUM_ST="--enable-dri --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx --disable-gallium-osmesa"
        - GALLIUM_DRIVERS="swr"
        - VULKAN_DRIVERS=""
+        - LIBUNWIND_FLAGS="--enable-libunwind"
      addons:
        apt:
          sources:
@@ -81,6 +83,7 @@ matrix:
            - libexpat1-dev
            - libx11-xcb-dev
            - libelf-dev
+            - libunwind8-dev
    - env:
        - LABEL="make Gallium Drivers Other"
        - BUILD=make
@@ -93,6 +96,7 @@ matrix:
        - GALLIUM_ST="--enable-dri --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx --disable-gallium-osmesa"
        - GALLIUM_DRIVERS="i915,nouveau,pl111,r300,r600,radeonsi,freedreno,svga,swrast,vc4,virgl,etnaviv,imx"
        - VULKAN_DRIVERS=""
+        - LIBUNWIND_FLAGS="--enable-libunwind"
      addons:
        apt:
          sources:
@@ -108,6 +112,7 @@ matrix:
            - libexpat1-dev
            - libx11-xcb-dev
            - libelf-dev
+            - libunwind8-dev
    - env:
        # NOTE: Analogous to SWR above, building Clover is quite slow.
        - LABEL="make Gallium ST Clover"
@@ -125,6 +130,7 @@ matrix:
        # Regardless - we're doing a quick build test here.
        - GALLIUM_DRIVERS="i915"
        - VULKAN_DRIVERS=""
+        - LIBUNWIND_FLAGS="--enable-libunwind"
      addons:
        apt:
          sources:
@@ -144,11 +150,14 @@ matrix:
            - libexpat1-dev
            - libx11-xcb-dev
            - libelf-dev
+            - libunwind8-dev
    - env:
        - LABEL="make Gallium ST Other"
        - BUILD=make
        - MAKEFLAGS="-j4"
        - MAKE_CHECK_COMMAND="true"
+        - LLVM_VERSION=3.3
+        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
        - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
        - DRI_DRIVERS=""
        - GALLIUM_ST="--enable-dri --disable-opencl --enable-xa --enable-nine --enable-xvmc --enable-vdpau --enable-va --enable-omx --enable-gallium-osmesa"
@@ -157,9 +166,12 @@ matrix:
        # Regardless - we're doing a quick build test here.
        - GALLIUM_DRIVERS="i915,swrast"
        - VULKAN_DRIVERS=""
+        - LIBUNWIND_FLAGS="--enable-libunwind"
      addons:
        apt:
          packages:
+            # We actually want to test against llvm-3.3
+            - llvm-3.3-dev
            # Nine requires gcc 4.6... which is the one we have right ?
            - libxvmc-dev
            # Build locally, for now.
@@ -174,6 +186,7 @@ matrix:
            - libexpat1-dev
            - libx11-xcb-dev
            - libelf-dev
+            - libunwind8-dev
    - env:
        - LABEL="make Vulkan"
        - BUILD=make
@@ -186,6 +199,7 @@ matrix:
        - GALLIUM_ST="--enable-dri --enable-dri3 --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx --disable-gallium-osmesa"
        - GALLIUM_DRIVERS=""
        - VULKAN_DRIVERS="intel,radeon"
+        - LIBUNWIND_FLAGS="--disable-libunwind"
      addons:
        apt:
          sources:
@@ -367,6 +381,7 @@ script:
      export CC="$CC -isystem`pwd`";

      ./autogen.sh --enable-debug
+        $LIBUNWIND_FLAGS
        $DRI_LOADERS
        --with-dri-drivers=$DRI_DRIVERS
        $GALLIUM_ST
--- a/Makefile.am
+++ b/Makefile.am
@@ -41,6 +41,7 @@ AM_DISTCHECK_CONFIGURE_FLAGS = \
 	--enable-xa \
 	--enable-xvmc \
 	--enable-llvm-shared-libs \
+	--enable-libunwind \
 	--with-platforms=x11,wayland,drm,surfaceless \
 	--with-dri-drivers=i915,i965,nouveau,radeon,r200,swrast \
 	--with-gallium-drivers=i915,nouveau,r300,pl111,r600,radeonsi,freedreno,svga,swrast,vc4,virgl,swr,etnaviv,imx \
--- a/2
+++ b/2
@@ -1 +1 @@
-17.2.0-rc6
+17.2.2
--- a/bin/.cherry-ignore
+++ b/bin/.cherry-ignore
@@ -2,3 +2,31 @@
 #         causing regressions have been reverted.
 365d34540f331df57780dddf8da87235be0a6bcb mesa: correctly calculate the storage offset for i915
 de0e62e1065e2d9172acf3ab7c70bba0160125c8 st/mesa: correctly calculate the storage offset
+
+# stable: Add loader::getCapability patches. It's rather invasive infra
+#         not suitable as a bugfix.
+1bf703e4ea5c4f742bc7ba55d01e5afc3f4e11f9 dri_interface,egl,gallium: only expose RGBA visuals on Android
+be5773fa8dfe9255d9abaf5c7d5bbbd2d922da08 Android: fix compile error for DRI2 loader getCapability
+31a6750988d7dd431f72ff1ff11bfca83bde5d8c st/dri: NULL check before deref DRI loader .getCapability
+
+# stable: The commit addresses code that did not land in the stable branch
+31bb8517a194af733deefe2d821537d994d39365 radv/gfx9: fix tile swizzle handling for gfx9
+
+# stable: Commit is not applicable when 4fab67a4415 is missing.
+d496780fb2c7f2cf0e32b6a79dc528e5156dfcb3 intel/eu/validate: Look up types on demand in execution_type()
+
+# fixes: Depend on preseding commit which adds new public GBM API
+3a5e3aa5a53cff55a5e31766d713a41ffa5a93d7 egl/drm: Fix misused x and y offsets in swrast_put_image2()
+fe2a6281b3b299998fe7399e7dbcc2077d773824 egl/drm: Fix misused x and y offsets in swrast_get_image()
+
+# fixes: This commit addressed an earlier commit c7e9ebb3ab8 which did not
+#        land in branch
+45c5c444518b7e83d9accd9f44702fa49282a3b8 radeonsi/gfx9: proper workaround for LS/HS VGPR initialization bug
+
+# fixes: This commit addressed earlier commits 61ad2f13 and 6dcc54b4 which did
+#        not land in branch
+979978ee06867a531b8d56cee252f5c83920a339 radv: Check for GFX9 for 1D arrays in image_size intrinsic.
+
+# fixes: This commit addressed earlier commits dcf46e99 and 60878dd0 which did
+#        not land in branch
+8e9e339c530c7b82b5a29d4b3183e8f5a01eae28 radv: copy the number of viewports/scissors at pipeline bind time
--- a/configure.ac
+++ b/configure.ac
@@ -410,8 +410,21 @@ int main() {
 }]])], GCC_ATOMIC_BUILTINS_SUPPORTED=1)
 if test "x$GCC_ATOMIC_BUILTINS_SUPPORTED" = x1; then
    DEFINES="$DEFINES -DUSE_GCC_ATOMIC_BUILTINS"
+    dnl On some platforms, new-style atomics need a helper library
+    AC_MSG_CHECKING(whether -latomic is needed)
+    AC_LINK_IFELSE([AC_LANG_SOURCE([[
+    #include <stdint.h>
+    uint64_t v;
+    int main() {
+        return (int)__atomic_load_n(&v, __ATOMIC_ACQUIRE);
+    }]])], GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC=no, GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC=yes)
+    AC_MSG_RESULT($GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC)
+    if test "x$GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC" = xyes; then
+        LIBATOMIC_LIBS="-latomic"
+    fi
 fi
 AM_CONDITIONAL([GCC_ATOMIC_BUILTINS_SUPPORTED], [test x$GCC_ATOMIC_BUILTINS_SUPPORTED = x1])
+AC_SUBST([LIBATOMIC_LIBS])

 dnl Check if host supports 64-bit atomics
 dnl note that lack of support usually results in link (not compile) error
@@ -826,6 +839,27 @@ AC_CHECK_HEADER([sys/sysctl.h], [DEFINES="$DEFINES -DHAVE_SYS_SYSCTL_H"])
 AC_CHECK_FUNC([strtof], [DEFINES="$DEFINES -DHAVE_STRTOF"])
 AC_CHECK_FUNC([mkostemp], [DEFINES="$DEFINES -DHAVE_MKOSTEMP"])

+AC_MSG_CHECKING([whether strtod has locale support])
+AC_LINK_IFELSE([AC_LANG_SOURCE([[
+    #define _GNU_SOURCE
+    #include <stdlib.h>
+    #include <locale.h>
+    #ifdef HAVE_XLOCALE_H
+    #include <xlocale.h>
+    #endif
+    int main() {
+       locale_t loc = newlocale(LC_CTYPE_MASK, "C", NULL);
+       const char *s = "1.0";
+       char *end;
+       double d = strtod_l(s, end, loc);
+       float f = strtof_l(s, end, loc);
+       freelocale(loc);
+       return 0;
+    }]])],
+  [DEFINES="$DEFINES -DHAVE_STRTOD_L"];
+   AC_MSG_RESULT([yes]),
+   AC_MSG_RESULT([no]))
+
 dnl Check to see if dlopen is in default libraries (like Solaris, which
 dnl has it in libc), or if libdl is needed to get it.
 AC_CHECK_FUNC([dlopen], [DEFINES="$DEFINES -DHAVE_DLOPEN"],
--- a/docs/egl.html
+++ b/docs/egl.html
@@ -130,27 +130,6 @@ mesa/demos repository.</p>
 runtime</p>

 <dl>
-<dt><code>EGL_DRIVERS_PATH</code></dt>
-<dd>
-
-<p>By default, the main library will look for drivers in the directory where
-the drivers are installed to.  This variable specifies a list of
-colon-separated directories where the main library will look for drivers, in
-addition to the default directory.  This variable is ignored for setuid/setgid
-binaries.</p>
-
-<p>This variable is usually set to test an uninstalled build.  For example, one
-may set</p>
-
-<pre>
-  $ export LD_LIBRARY_PATH=$mesa/lib
-  $ export EGL_DRIVERS_PATH=$mesa/lib/egl
-</pre>
-
-<p>to test a build without installation</p>
-
-</dd>
-
 <dt><code>EGL_DRIVER</code></dt>
 <dd>

--- a/docs/relnotes/17.2.0.html
+++ b/docs/relnotes/17.2.0.html
@@ -14,7 +14,7 @@
 <iframe src="../contents.html"></iframe>
 <div class="content">

-<h1>Mesa 17.2.0 Release Notes / TBD</h1>
+<h1>Mesa 17.2.0 Release Notes / September 4, 2017</h1>

 <p>
 Mesa 17.2.0 is a new development release.
@@ -33,7 +33,8 @@ because compatibility contexts are not supported.

 <h2>SHA256 checksums</h2>
 <pre>
-TBD.
+9484ad96b4bb6cda5bbf1aef52dfa35183dc21aa6258a2991c245996c2fdaf85  mesa-17.2.0.tar.gz
+3123448f770eae58bc73e15480e78909defb892f10ab777e9116c9b218094943  mesa-17.2.0.tar.xz
 </pre>


@@ -56,9 +57,156 @@ Note: some of the new features are only available with certain drivers.
 <h2>Bug fixes</h2>

 <ul>
-TBD
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=68365">Bug 68365</a> - [SNB Bisected]Piglit spec_ARB_framebuffer_object_fbo-blit-stretch  fail</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=77240">Bug 77240</a> - khrplatform.h not installed if EGL is disabled</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95530">Bug 95530</a> - Stellaris - colored overlay of sectors doesn't render on i965</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96449">Bug 96449</a> - Dying Light reports OpenGL version 3.0 with mesa-git</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96958">Bug 96958</a> - [SKL] Improper rendering in Europa Universalis IV</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97524">Bug 97524</a> - Samplers referring to the same texture unit with different types should raise GL_INVALID_OPERATION</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97957">Bug 97957</a> - Awful screen tearing in a separate X server with DRI3</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98238">Bug 98238</a> - Witcher 2: objects are black when changing lod on Radeon Pitcairn</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98428">Bug 98428</a> - Undefined non-weak-symbol in dri-drivers</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98833">Bug 98833</a> - [REGRESSION, bisected] Wayland revert commit breaks non-Vsync fullscreen frame updates</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99467">Bug 99467</a> - [radv] DOOM 2016 + wine. Green screen everywhere (but can be started)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100070">Bug 100070</a> - Rocket League: grass gets rendered incorrectly</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100242">Bug 100242</a> - radeon buffer allocation failure during startup of Factorio</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100620">Bug 100620</a> - [SKL] 48-bit addresses break DOOM</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100690">Bug 100690</a> - [Regression, bisected] TotalWar: Warhammer corrupted graphics</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100741">Bug 100741</a> - Chromium - Memory leak</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100785">Bug 100785</a> - [regression, bisected] arb_gpu_shader5 piglit fail</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100854">Bug 100854</a> - YUV to RGB Color Space Conversion result is not precise</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100871">Bug 100871</a> - gles cts hangs mesa indefinitely</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100877">Bug 100877</a> - vulkan/tests/block_pool_no_free regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100892">Bug 100892</a> - Polaris 12: winsys init bad switch (missing break) initializing addrlib</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100925">Bug 100925</a> - [HSW/BSW/BDW/SKL] Google Earth is not resolving all the details in the map correctly</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100937">Bug 100937</a> - Mesa fails to build with GCC 4.8</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100945">Bug 100945</a> - Build failure in GNOME Continuous</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100988">Bug 100988</a> - glXGetCurrentDisplay() no longer works for FakeGLX contexts?</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101071">Bug 101071</a> - compiling glsl fails with undefined reference to `pthread_create'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101088">Bug 101088</a> - `gallium: remove pipe_index_buffer and set_index_buffer` causes glitches and crash in gallium nine</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101110">Bug 101110</a> - Build failure in GNOME Continuous</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101189">Bug 101189</a> - Latest git fails to compile with radeon</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101252">Bug 101252</a> - eglGetDisplay() is not thread safe</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101254">Bug 101254</a> - VDPAU videos don't start playing with r600 gallium driver</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101283">Bug 101283</a> - skylake: page fault accessing address 0</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101284">Bug 101284</a> - [G45] ES2-CTS.functional.texture.specification.basic_copytexsubimage2d.cube_rgba</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101294">Bug 101294</a> - radeonsi minecraft forge splash freeze since 17.1</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101306">Bug 101306</a> - [BXT] gles asserts in cts</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101326">Bug 101326</a> - gallium/wgl: Allow context creation without prior SetPixelFormat()</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101334">Bug 101334</a> - AMD SI cards: Some vulkan apps freeze the system</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101336">Bug 101336</a> - glcpp-test.sh regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101340">Bug 101340</a> - i915_surface.c:108:4: error: too few arguments to function ‘util_blitter_default_src_texture’</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101360">Bug 101360</a> - Assertion failure comparing result of ballotARB</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101401">Bug 101401</a> - [REGRESSION][BISECTED] GDM fails to start after 8ec4975cd83365c791a1</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101418">Bug 101418</a> - Build failure in GNOME Continuous</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101451">Bug 101451</a> - [G33] ES2-CTS.functional.clipping.polygon regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101464">Bug 101464</a> - PrimitiveRestartNV inside a render list causes a crash</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101471">Bug 101471</a> - Mesa fails to build: unknown typename bool</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101535">Bug 101535</a> - [bisected] [Skylake] Kwin won't start and glxgears coredumps</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101538">Bug 101538</a> - From &quot;Use isl for hiz layouts&quot; commit onwards, everything crashes with Mesa</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101539">Bug 101539</a> - [Regresion] [IVB] Segment fault in recent commit in intel_miptree_level_has_hiz under Ivy bridge</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101558">Bug 101558</a> - [regression][bisected] MPV playing video via opengl &quot;randomly&quot; results in only part of the window / screen being rendered with Mesa GIT.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101596">Bug 101596</a> - Blender renders black UI elements</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101607">Bug 101607</a> - Regression in anisotropic filtering from &quot;i965: Convert fs sampler state to use genxml&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101657">Bug 101657</a> - strtod.c:32:10: fatal error: xlocale.h: No such file or directory</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101666">Bug 101666</a> - bitfieldExtract is marked as a built-in function on OpenGL ES 3.0, but was added in OpenGL ES 3.1</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101683">Bug 101683</a> - Some games hang while loading when compositing is shut off or absent</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101703">Bug 101703</a> - No stencil buffer allocated when requested by GLUT</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101704">Bug 101704</a> - [regression][bisected] glReadPixels() from pbuffer failing in Android CTS camera tests</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101766">Bug 101766</a> - Assertion `!&quot;invalid type&quot;' failed when constant expression involves literal of different type</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101774">Bug 101774</a> - gen_clflush.h:37:7: error: implicit declaration of function ‘__builtin_ia32_clflush’</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101775">Bug 101775</a> - Xorg segfault since 147d7fb &quot;st/mesa: add a winsys buffers list in st_context&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101829">Bug 101829</a> - read-after-free in st_framebuffer_validate</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101831">Bug 101831</a> - Build failure in GNOME Continuous</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101851">Bug 101851</a> - [regression] libEGL_common.a undefined reference to '__gxx_personality_v0'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101867">Bug 101867</a> - Launch options window renders black in Feral Games in current Mesa trunk</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101876">Bug 101876</a> - SIGSEGV when launching Steam</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101910">Bug 101910</a> - [BYT] ES31-CTS.functional.copy_image.non_compressed.viewclass_96_bits.rgb32f_rgb32f</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101925">Bug 101925</a> - playstore/webview crash</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101961">Bug 101961</a> - Serious Sam Fusion hangs system completely</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101982">Bug 101982</a> - Weston crashes when running an OpenGL program on i965</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101983">Bug 101983</a> - [G33] ES2-CTS.functional.shaders.struct.uniform.sampler_nested* regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102024">Bug 102024</a> - FORMAT_FEATURE_SAMPLED_IMAGE_BIT not supported for D16_UNORM and D32_SFLOAT</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102148">Bug 102148</a> - Crash when running qopenglwidget example on mesa llvmpipe win32</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102241">Bug 102241</a> - gallium/wgl: SwapBuffers freezing regularly with swap interval enabled</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102308">Bug 102308</a> - segfault in glCompressedTextureSubImage3D</li>
+
 </ul>

+
 <h2>Changes</h2>

 <ul>
--- a/docs/relnotes/17.2.1.html
+++ b/docs/relnotes/17.2.1.html
@@ -0,0 +1,200 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 17.2.1 Release Notes / September 17, 2017</h1>
+
+<p>
+Mesa 17.2.1 is a bug fix release which fixes bugs found since the 17.2.0 release.
+</p>
+<p>
+Mesa 17.2.1 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+c902d8dc2540195bc570d88af1a8fd8a1774373660a27bb1d539551f46824bc1  mesa-17.2.1.tar.gz
+77385d17827cff24a3bae134342234f2efe7f7f990e778109682571dbbc9ba1e  mesa-17.2.1.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100613">Bug 100613</a> - Regression in Mesa 17 on s390x (zSystems)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101709">Bug 101709</a> - [llvmpipe] piglit gl-1.0-scissor-offscreen regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102454">Bug 102454</a> - glibc 2.26 doesn't provide anymore xlocale.h</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102467">Bug 102467</a> - src/mesa/state_tracker/st_cb_readpixels.c:178]: (warning) Redundant assignment</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102502">Bug 102502</a> - [bisected] Kodi crashes since commit 707d2e8b - gallium: fold u_trim_pipe_prim call from st/mesa to drivers</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Bas Nieuwenhuizen (4):</p>
+<ul>
+  <li>radv: Actually set the cmd_buffer usage_flags.</li>
+  <li>radv: Fix vkCopyImage with both depth and stencil aspects.</li>
+  <li>radv: Disable multilayer &amp; multilevel DCC.</li>
+  <li>radv: Don't allocate CMASK for linear images.</li>
+</ul>
+
+<p>Ben Crocker (1):</p>
+<ul>
+  <li>llvmpipe: lp_build_gather_elem_vec BE fix for 3x16 load</li>
+</ul>
+
+<p>Brian Paul (1):</p>
+<ul>
+  <li>llvmpipe: initialize llvmpipe-&gt;dirty with LP_NEW_SCISSOR</li>
+</ul>
+
+<p>Charmaine Lee (1):</p>
+<ul>
+  <li>vbo: fix offset in minmax cache key</li>
+</ul>
+
+<p>Dave Airlie (12):</p>
+<ul>
+  <li>radv: disable 1d/2d linear optimisation on gfx9.</li>
+  <li>radv/gfx9: set descriptor up for base_mip to level range.</li>
+  <li>Revert "radv: disable support for VEGA for now."</li>
+  <li>radv/winsys: use amdgpu_bo_va_op_raw.</li>
+  <li>radv/gfx9: allocate events from uncached VA space</li>
+  <li>radv: use simpler indirect packet 3 if possible.</li>
+  <li>radv: don't use iview for meta image width/height.</li>
+  <li>radv: handle GFX9 1D textures</li>
+  <li>radv/gfx9: set mip0-depth correctly for 2d arrays/3d images</li>
+  <li>radv/ac: bump params array for image atomic comp swap</li>
+  <li>radv/gfx9: fix image resource handling.</li>
+  <li>radv/winsys: fix flags vs va_flags thinko.</li>
+</ul>
+
+<p>Emil Velikov (7):</p>
+<ul>
+  <li>docs: add sha256 checksums for 17.2.0</li>
+  <li>cherry-ignore: add getCapability patches</li>
+  <li>cherry-ignore: ignore gfx9 tile swizzle fix</li>
+  <li>cherry-ignore: add execution_type() fix to the list</li>
+  <li>cherry-ignore: add EGL+gbm swast patches</li>
+  <li>egl/x11/dri3: adding missing __DRI_BACKGROUND_CALLABLE extension</li>
+  <li>Update version to 17.2.1</li>
+</ul>
+
+<p>Eric Engestrom (3):</p>
+<ul>
+  <li>util: improve compiler guard</li>
+  <li>mesa/st: remove unwanted backup file</li>
+  <li>docs/egl: remove reference to EGL_DRIVERS_PATH</li>
+</ul>
+
+<p>Grazvydas Ignotas (1):</p>
+<ul>
+  <li>radv: don't assert on empty hash table</li>
+</ul>
+
+<p>Jason Ekstrand (2):</p>
+<ul>
+  <li>anv/formats: Nicely handle unknown VkFormat enums</li>
+  <li>spirv: Add support for the HelperInvocation builtin</li>
+</ul>
+
+<p>Karol Herbst (1):</p>
+<ul>
+  <li>nvc0: write 0 to pipeline_statistics.cs_invocations</li>
+</ul>
+
+<p>Kenneth Graunke (2):</p>
+<ul>
+  <li>i965: Fix crash in fallback GTT mapping.</li>
+  <li>i965: Set "Subslice Hashing Mode" to 16x16 on Apollolake.</li>
+</ul>
+
+<p>Marek Olšák (1):</p>
+<ul>
+  <li>st/mesa: skip draw calls with pipe_draw_info::count == 0</li>
+</ul>
+
+<p>Michael Olbrich (1):</p>
+<ul>
+  <li>egl/dri2: only destroy created objects</li>
+</ul>
+
+<p>Nicolai HÃÂ¤hnle (1):</p>
+<ul>
+  <li>radeonsi: apply a mask to gl_SampleMaskIn in the PS prolog</li>
+</ul>
+
+<p>Nicolai Hähnle (4):</p>
+<ul>
+  <li>radeonsi/gfx9: always flush DB metadata on framebuffer changes</li>
+  <li>st/glsl_to_tgsi: only the first (inner-most) array reference can be a 2D index</li>
+  <li>ac/surface: match Z and stencil tile config</li>
+  <li>glsl: fix glsl_struct_field size calculations for shader cache</li>
+</ul>
+
+<p>Ray Strode (1):</p>
+<ul>
+  <li>gallivm: correct channel shift logic on big endian</li>
+</ul>
+
+<p>Rob Clark (1):</p>
+<ul>
+  <li>freedreno: skip batch-cache for compute shaders</li>
+</ul>
+
+<p>Roland Scheidegger (1):</p>
+<ul>
+  <li>st/mesa: fix view template initialization in try_pbo_readpixels</li>
+</ul>
+
+<p>Samuel Pitoiset (1):</p>
+<ul>
+  <li>radeonsi: update dirty_level_mask before dispatching</li>
+</ul>
+
+<p>Timothy Arceri (9):</p>
+<ul>
+  <li>glsl: allow NULL to be passed to encode_type_to_blob()</li>
+  <li>glsl: stop adding pointers from gl_shader_variable to the cache</li>
+  <li>glsl: stop adding pointers from glsl_struct_field to the cache</li>
+  <li>glsl: add has_uniform_storage() helper to shader cache</li>
+  <li>glsl: don't write uniform storage offset if there isn't one</li>
+  <li>glsl: always write a name/label string to the cache</li>
+  <li>compiler: move pointers to the start of shader_info</li>
+  <li>glsl: stop adding pointers from shader_info to the cache</li>
+  <li>glsl: stop adding pointers from bindless structs to the cache</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/17.2.2.html
+++ b/docs/relnotes/17.2.2.html
@@ -0,0 +1,202 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 17.2.2 Release Notes / October 2, 2017</h1>
+
+<p>
+Mesa 17.2.2 is a bug fix release which fixes bugs found since the 17.2.1 release.
+</p>
+<p>
+Mesa 17.2.2 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+TBD
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102573">Bug 102573</a> - fails to build on armel</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102844">Bug 102844</a> - memory leak with glDeleteProgram for shader program type GL_COMPUTE_SHADER</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102847">Bug 102847</a> - swr fail to build with llvm-5.0.0</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102904">Bug 102904</a> - piglit and gl45 cts linker tests regressed</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Alexandru-Liviu Prodea (1):</p>
+<ul>
+  <li>Scons: Add LLVM 5.0 support</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (1):</p>
+<ul>
+  <li>radv: Check for GFX9 for 1D arrays in image_size intrinsic.</li>
+</ul>
+
+<p>Boris Brezillon (1):</p>
+<ul>
+  <li>broadcom/vc4: Fix infinite retry in vc4_bo_alloc()</li>
+</ul>
+
+<p>Dave Airlie (3):</p>
+<ul>
+  <li>radv/nir: call opt_remove_phis after trivial continues.</li>
+  <li>ac/surface: handle S8 on gfx9</li>
+  <li>st/glsl-&gt;tgsi: fix u64 to bool comparisons.</li>
+</ul>
+
+<p>David Airlie (1):</p>
+<ul>
+  <li>radv: add gfx9 scissor workaround</li>
+</ul>
+
+<p>Emil Velikov (2):</p>
+<ul>
+  <li>docs: add sha256 checksums for 17.2.1</li>
+  <li>automake: enable libunwind in `make distcheck'</li>
+</ul>
+
+<p>Eric Anholt (4):</p>
+<ul>
+  <li>broadcom/vc4: Fix use-after-free for flushing when writing to a texture.</li>
+  <li>broadcom/vc4: Fix use-after-free trying to mix a quad and tile clear.</li>
+  <li>broadcom/vc4: Fix use-after-free when deleting a program.</li>
+  <li>broadcom/vc4: Keep pipe_sampler_view-&gt;texture matching the original texture.</li>
+</ul>
+
+<p>Gert Wollny (2):</p>
+<ul>
+  <li>travis: force llvm-3.3 for "make Gallium ST Other"</li>
+  <li>travis: Add libunwind-dev to gallium/make builds</li>
+</ul>
+
+<p>Grazvydas Ignotas (1):</p>
+<ul>
+  <li>configure: check if -latomic is needed for __atomic_*</li>
+</ul>
+
+<p>Ian Romanick (1):</p>
+<ul>
+  <li>nv20: Fix GL_CLAMP</li>
+</ul>
+
+<p>Jason Ekstrand (6):</p>
+<ul>
+  <li>i965/blorp: Set r8stencil_needs_update when writing stencil</li>
+  <li>vulkan/wsi/wayland: Stop printing out the DRM device</li>
+  <li>vulkan/wsi/wayland: Refactor wsi_wl_display code</li>
+  <li>vulkan/wsi/wayland: Stop caching Wayland displays</li>
+  <li>vulkan/wsi/wayland: Copy wl_proxy objects from oldSwapchain if available</li>
+  <li>vulkan/wsi/wayland: Return better error messages</li>
+</ul>
+
+<p>Juan A. Suarez Romero (4):</p>
+<ul>
+  <li>cherry-ignore: add "radeonsi/gfx9: proper workaround for LS/HS VGPR initialization bug"</li>
+  <li>cherry-ignore: add "radv: Check for GFX9 for 1D arrays in image_size intrinsic."</li>
+  <li>cherry-ignore: add "radv: copy the number of viewports/scissors at pipeline bind time"</li>
+  <li>Update version to 17.2.2</li>
+</ul>
+
+<p>Józef Kucia (1):</p>
+<ul>
+  <li>anv: Fix descriptors copying</li>
+</ul>
+
+<p>Kenneth Graunke (2):</p>
+<ul>
+  <li>i965/vec4: Actually handle atomic op intrinsics.</li>
+  <li>i965/vec4: Fix swizzles on atomic sources.</li>
+</ul>
+
+<p>Leo Liu (1):</p>
+<ul>
+  <li>st/va/postproc: use video original size for postprocessing</li>
+</ul>
+
+<p>Lucas Stach (1):</p>
+<ul>
+  <li>etnaviv: fix 16bpp clears</li>
+</ul>
+
+<p>Matt Turner (2):</p>
+<ul>
+  <li>util: Link libmesautil into u_atomic_test</li>
+  <li>util/u_atomic: Add implementation of __sync_val_compare_and_swap_8</li>
+</ul>
+
+<p>Nicolai Hähnle (9):</p>
+<ul>
+  <li>radeonsi: workaround for gather4 on integer cube maps</li>
+  <li>amd/common: round cube array slice in ac_prepare_cube_coords</li>
+  <li>amd/common: add workaround for cube map array layer clamping</li>
+  <li>glsl/linker: fix output variable overlap check</li>
+  <li>radeonsi: fix array textures layer coordinate</li>
+  <li>radeonsi: set MIP_POINT_PRECLAMP to 0</li>
+  <li>amd/addrlib: fix missing va_end() after va_copy()</li>
+  <li>amd/common: move ac_build_phi from radeonsi</li>
+  <li>radeonsi: fix a regression in integer cube map handling</li>
+</ul>
+
+<p>Samuel Iglesias Gonsálvez (1):</p>
+<ul>
+  <li>anv: fix viewport transformation for z component</li>
+</ul>
+
+<p>Samuel Pitoiset (1):</p>
+<ul>
+  <li>radv: fix saved compute state when doing statistics/occlusion queries</li>
+</ul>
+
+<p>Tapani Pälli (1):</p>
+<ul>
+  <li>mesa: free current ComputeProgram state in _mesa_free_context_data</li>
+</ul>
+
+<p>Tim Rowley (1):</p>
+<ul>
+  <li>swr/rast: remove llvm fence/atomics from generated files</li>
+</ul>
+
+<p>Tomasz Figa (1):</p>
+<ul>
+  <li>egl/dri2: Implement swapInterval fallback in a conformant way</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/scons/gallium.py
+++ b/scons/gallium.py
@@ -157,6 +157,19 @@ def check_header(env, header):
    env = conf.Finish()
    return have_header

+def check_functions(env, functions):
+    '''Check if all of the functions exist'''
+
+    conf = SCons.Script.Configure(env)
+    have_functions = True
+
+    for function in functions:
+        if not conf.CheckFunc(function):
+            have_functions = False
+
+    env = conf.Finish()
+    return have_functions
+
 def check_prog(env, prog):
    """Check whether this program exists."""

@@ -339,6 +352,9 @@ def generate(env):
        if check_header(env, 'xlocale.h'):
            cppdefines += ['HAVE_XLOCALE_H']

+        if check_functions(env, ['strtod_l', 'strtof_l']):
+            cppdefines += ['HAVE_STRTOD_L']
+
    if platform == 'windows':
        cppdefines += [
            'WIN32',
--- a/scons/llvm.py
+++ b/scons/llvm.py
@@ -104,7 +104,26 @@ def generate(env):
        ])
        env.Prepend(LIBPATH = [os.path.join(llvm_dir, 'lib')])
        # LIBS should match the output of `llvm-config --libs engine mcjit bitwriter x86asmprinter irreader`
-        if llvm_version >= distutils.version.LooseVersion('4.0'):
+        if llvm_version >= distutils.version.LooseVersion('5.0'):
+            env.Prepend(LIBS = [
+                'LLVMX86Disassembler', 'LLVMX86AsmParser',
+                'LLVMX86CodeGen', 'LLVMSelectionDAG', 'LLVMAsmPrinter',
+                'LLVMDebugInfoCodeView', 'LLVMCodeGen',
+                'LLVMScalarOpts', 'LLVMInstCombine',
+                'LLVMTransformUtils',
+                'LLVMBitWriter', 'LLVMX86Desc',
+                'LLVMMCDisassembler', 'LLVMX86Info',
+                'LLVMX86AsmPrinter', 'LLVMX86Utils',
+                'LLVMMCJIT', 'LLVMExecutionEngine', 'LLVMTarget',
+                'LLVMAnalysis', 'LLVMProfileData',
+                'LLVMRuntimeDyld', 'LLVMObject', 'LLVMMCParser',
+                'LLVMBitReader', 'LLVMMC', 'LLVMCore',
+                'LLVMSupport',
+                'LLVMIRReader', 'LLVMAsmParser',
+                'LLVMDemangle', 'LLVMGlobalISel', 'LLVMDebugInfoMSF',
+                'LLVMBinaryFormat',
+            ])
+        elif llvm_version >= distutils.version.LooseVersion('4.0'):
            env.Prepend(LIBS = [
                'LLVMX86Disassembler', 'LLVMX86AsmParser',
                'LLVMX86CodeGen', 'LLVMSelectionDAG', 'LLVMAsmPrinter',
--- a/src/amd/addrlib/core/addrobject.cpp
+++ b/src/amd/addrlib/core/addrobject.cpp
@@ -216,20 +216,16 @@ VOID Object::DebugPrint(
 #if DEBUG
    if (m_client.callbacks.debugPrint != NULL)
    {
-        va_list ap;
-
-        va_start(ap, pDebugString);
-
        ADDR_DEBUGPRINT_INPUT debugPrintInput = {0};

        debugPrintInput.size         = sizeof(ADDR_DEBUGPRINT_INPUT);
        debugPrintInput.pDebugString = const_cast<CHAR*>(pDebugString);
        debugPrintInput.hClient      = m_client.handle;
-        va_copy(debugPrintInput.ap, ap);
+        va_start(debugPrintInput.ap, pDebugString);

        m_client.callbacks.debugPrint(&debugPrintInput);

-        va_end(ap);
+        va_end(debugPrintInput.ap);
    }
 #endif
 }
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -45,10 +45,13 @@
 * The caller is responsible for initializing ctx::module and ctx::builder.
 */
 void
-ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context)
+ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context,
+		     enum chip_class chip_class)
 {
 	LLVMValueRef args[1];

+	ctx->chip_class = chip_class;
+
 	ctx->context = context;
 	ctx->module = NULL;
 	ctx->builder = NULL;
@@ -176,6 +179,20 @@ void ac_build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned bufsize)
 	}
 }

+/**
+ * Helper function that builds an LLVM IR PHI node and immediately adds
+ * incoming edges.
+ */
+LLVMValueRef
+ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type,
+	     unsigned count_incoming, LLVMValueRef *values,
+	     LLVMBasicBlockRef *blocks)
+{
+	LLVMValueRef phi = LLVMBuildPhi(ctx->builder, type, "");
+	LLVMAddIncoming(phi, values, blocks, count_incoming);
+	return phi;
+}
+
 LLVMValueRef
 ac_build_gather_values_extended(struct ac_llvm_context *ctx,
 				LLVMValueRef *values,
@@ -312,7 +329,7 @@ static void build_cube_select(LLVMBuilderRef builder,

 void
 ac_prepare_cube_coords(struct ac_llvm_context *ctx,
-		       bool is_deriv, bool is_array,
+		       bool is_deriv, bool is_array, bool is_lod,
 		       LLVMValueRef *coords_arg,
 		       LLVMValueRef *derivs_arg)
 {
@@ -322,6 +339,38 @@ ac_prepare_cube_coords(struct ac_llvm_context *ctx,
 	LLVMValueRef coords[3];
 	LLVMValueRef invma;

+	if (is_array && !is_lod) {
+		LLVMValueRef tmp = coords_arg[3];
+		tmp = ac_build_intrinsic(ctx, "llvm.rint.f32", ctx->f32, &tmp, 1, 0);
+
+		/* Section 8.9 (Texture Functions) of the GLSL 4.50 spec says:
+		 *
+		 *    "For Array forms, the array layer used will be
+		 *
+		 *       max(0, min(d−1, floor(layer+0.5)))
+		 *
+		 *     where d is the depth of the texture array and layer
+		 *     comes from the component indicated in the tables below.
+		 *     Workaroudn for an issue where the layer is taken from a
+		 *     helper invocation which happens to fall on a different
+		 *     layer due to extrapolation."
+		 *
+		 * VI and earlier attempt to implement this in hardware by
+		 * clamping the value of coords[2] = (8 * layer) + face.
+		 * Unfortunately, this means that the we end up with the wrong
+		 * face when clamping occurs.
+		 *
+		 * Clamp the layer earlier to work around the issue.
+		 */
+		if (ctx->chip_class <= VI) {
+			LLVMValueRef ge0;
+			ge0 = LLVMBuildFCmp(builder, LLVMRealOGE, tmp, ctx->f32_0, "");
+			tmp = LLVMBuildSelect(builder, ge0, tmp, ctx->f32_0, "");
+		}
+
+		coords_arg[3] = tmp;
+	}
+
 	build_cube_intrinsic(ctx, coords_arg, &selcoords);

 	invma = ac_build_intrinsic(ctx, "llvm.fabs.f32",
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -28,6 +28,8 @@
 #include <stdbool.h>
 #include <llvm-c/TargetMachine.h>

+#include "amd_family.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -61,10 +63,13 @@ struct ac_llvm_context {
 	unsigned fpmath_md_kind;
 	LLVMValueRef fpmath_md_2p5_ulp;
 	LLVMValueRef empty_md;
+
+	enum chip_class chip_class;
 };

 void
-ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context);
+ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context,
+		     enum chip_class chip_class);

 LLVMValueRef
 ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
@@ -73,6 +78,11 @@ ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,

 void ac_build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned bufsize);

+LLVMValueRef
+ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type,
+	     unsigned count_incoming, LLVMValueRef *values,
+	     LLVMBasicBlockRef *blocks);
+
 LLVMValueRef
 ac_build_gather_values_extended(struct ac_llvm_context *ctx,
 				LLVMValueRef *values,
@@ -91,7 +101,7 @@ ac_build_fdiv(struct ac_llvm_context *ctx,

 void
 ac_prepare_cube_coords(struct ac_llvm_context *ctx,
-		       bool is_deriv, bool is_array,
+		       bool is_deriv, bool is_array, bool is_lod,
 		       LLVMValueRef *coords_arg,
 		       LLVMValueRef *derivs_arg);

--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3309,13 +3309,13 @@ static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx,

 	int count;
 	enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
+	bool is_array = glsl_sampler_type_is_array(type);
 	bool add_frag_pos = (dim == GLSL_SAMPLER_DIM_SUBPASS ||
 			     dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
 	bool is_ms = (dim == GLSL_SAMPLER_DIM_MS ||
 		      dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
-
-	count = image_type_to_components_count(dim,
-					       glsl_sampler_type_is_array(type));
+	bool gfx9_1d = ctx->options->chip_class >= GFX9 && dim == GLSL_SAMPLER_DIM_1D;
+	count = image_type_to_components_count(dim, is_array);

 	if (is_ms) {
 		LLVMValueRef fmask_load_address[3];
@@ -3323,7 +3323,7 @@ static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx,

 		fmask_load_address[0] = LLVMBuildExtractElement(ctx->builder, src0, masks[0], "");
 		fmask_load_address[1] = LLVMBuildExtractElement(ctx->builder, src0, masks[1], "");
-		if (glsl_sampler_type_is_array(type))
+		if (is_array)
 			fmask_load_address[2] = LLVMBuildExtractElement(ctx->builder, src0, masks[2], "");
 		else
 			fmask_load_address[2] = NULL;
@@ -3338,7 +3338,7 @@ static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx,
 							       sample_index,
 							       get_sampler_desc(ctx, instr->variables[0], DESC_FMASK));
 	}
-	if (count == 1) {
+	if (count == 1 && !gfx9_1d) {
 		if (instr->src[0].ssa->num_components)
 			res = LLVMBuildExtractElement(ctx->builder, src0, masks[0], "");
 		else
@@ -3348,13 +3348,22 @@ static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx,
 		if (is_ms)
 			count--;
 		for (chan = 0; chan < count; ++chan) {
-			coords[chan] = LLVMBuildExtractElement(ctx->builder, src0, masks[chan], "");
+			coords[chan] = llvm_extract_elem(ctx, src0, chan);
 		}
-
 		if (add_frag_pos) {
 			for (chan = 0; chan < count; ++chan)
 				coords[chan] = LLVMBuildAdd(ctx->builder, coords[chan], LLVMBuildFPToUI(ctx->builder, ctx->frag_pos[chan], ctx->i32, ""), "");
 		}
+
+		if (gfx9_1d) {
+			if (is_array) {
+				coords[2] = coords[1];
+				coords[1] = ctx->ac.i32_0;
+			} else
+				coords[1] = ctx->ac.i32_0;
+			count++;
+		}
+
 		if (is_ms) {
 			coords[count] = sample_index;
 			count++;
@@ -3490,7 +3499,7 @@ static void visit_image_store(struct nir_to_llvm_context *ctx,
 static LLVMValueRef visit_image_atomic(struct nir_to_llvm_context *ctx,
                                       const nir_intrinsic_instr *instr)
 {
-	LLVMValueRef params[6];
+	LLVMValueRef params[7];
 	int param_count = 0;
 	const nir_variable *var = instr->variables[0]->var;

@@ -3591,14 +3600,23 @@ static LLVMValueRef visit_image_size(struct nir_to_llvm_context *ctx,

 	res = ac_build_image_opcode(&ctx->ac, &args);

+	LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
+
 	if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE &&
 	    glsl_sampler_type_is_array(type)) {
-		LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
 		LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false);
 		LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, res, two, "");
 		z = LLVMBuildSDiv(ctx->builder, z, six, "");
 		res = LLVMBuildInsertElement(ctx->builder, res, z, two, "");
 	}
+	if (ctx->options->chip_class >= GFX9 &&
+	    glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_1D &&
+	    glsl_sampler_type_is_array(type)) {
+		LLVMValueRef layers = LLVMBuildExtractElement(ctx->builder, res, two, "");
+		res = LLVMBuildInsertElement(ctx->builder, res, layers,
+						ctx->ac.i32_1, "");
+
+	}
 	return res;
 }

@@ -4455,36 +4473,50 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)

 	/* pack derivatives */
 	if (ddx || ddy) {
+		int num_src_deriv_channels, num_dest_deriv_channels;
 		switch (instr->sampler_dim) {
 		case GLSL_SAMPLER_DIM_3D:
 		case GLSL_SAMPLER_DIM_CUBE:
 			num_deriv_comp = 3;
+			num_src_deriv_channels = 3;
+			num_dest_deriv_channels = 3;
 			break;
 		case GLSL_SAMPLER_DIM_2D:
 		default:
+			num_src_deriv_channels = 2;
+			num_dest_deriv_channels = 2;
 			num_deriv_comp = 2;
 			break;
 		case GLSL_SAMPLER_DIM_1D:
-			num_deriv_comp = 1;
+			num_src_deriv_channels = 1;
+			if (ctx->options->chip_class >= GFX9) {
+				num_dest_deriv_channels = 2;
+				num_deriv_comp = 2;
+			} else {
+				num_dest_deriv_channels = 1;
+				num_deriv_comp = 1;
+			}
 			break;
 		}

-		for (unsigned i = 0; i < num_deriv_comp; i++) {
+		for (unsigned i = 0; i < num_src_deriv_channels; i++) {
 			derivs[i] = to_float(&ctx->ac, llvm_extract_elem(ctx, ddx, i));
-			derivs[num_deriv_comp + i] = to_float(&ctx->ac, llvm_extract_elem(ctx, ddy, i));
+			derivs[num_dest_deriv_channels + i] = to_float(&ctx->ac, llvm_extract_elem(ctx, ddy, i));
+		}
+		for (unsigned i = num_src_deriv_channels; i < num_dest_deriv_channels; i++) {
+			derivs[i] = ctx->ac.f32_0;
+			derivs[num_dest_deriv_channels + i] = ctx->ac.f32_0;
 		}
 	}

 	if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && coord) {
-		if (instr->is_array && instr->op != nir_texop_lod)
-			coords[3] = apply_round_slice(ctx, coords[3]);
 		for (chan = 0; chan < instr->coord_components; chan++)
 			coords[chan] = to_float(&ctx->ac, coords[chan]);
 		if (instr->coord_components == 3)
 			coords[3] = LLVMGetUndef(ctx->f32);
 		ac_prepare_cube_coords(&ctx->ac,
 			instr->op == nir_texop_txd, instr->is_array,
-			coords, derivs);
+			instr->op == nir_texop_lod, coords, derivs);
 		if (num_deriv_comp)
 			num_deriv_comp--;
 	}
@@ -4512,6 +4544,23 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)
 			}
 			address[count++] = coords[2];
 		}
+
+		if (ctx->options->chip_class >= GFX9) {
+			LLVMValueRef filler;
+			if (instr->op == nir_texop_txf)
+				filler = ctx->ac.i32_0;
+			else
+				filler = LLVMConstReal(ctx->f32, 0.5);
+
+			if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D) {
+				if (instr->is_array) {
+					address[count] = address[count - 1];
+					address[count - 1] = filler;
+					count++;
+				} else
+					address[count++] = filler;
+			}
+		}
 	}

 	/* Pack LOD */
@@ -4606,6 +4655,14 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)
 		LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, result, two, "");
 		z = LLVMBuildSDiv(ctx->builder, z, six, "");
 		result = LLVMBuildInsertElement(ctx->builder, result, z, two, "");
+	} else if (ctx->options->chip_class >= GFX9 &&
+		   instr->op == nir_texop_txs &&
+		   instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
+		   instr->is_array) {
+		LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
+		LLVMValueRef layers = LLVMBuildExtractElement(ctx->builder, result, two, "");
+		result = LLVMBuildInsertElement(ctx->builder, result, layers,
+						ctx->ac.i32_1, "");
 	} else if (instr->dest.ssa.num_components != 4)
 		result = trim_vector(ctx, result, instr->dest.ssa.num_components);

@@ -6104,7 +6161,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
 	ctx.context = LLVMContextCreate();
 	ctx.module = LLVMModuleCreateWithNameInContext("shader", ctx.context);

-	ac_llvm_context_init(&ctx.ac, ctx.context);
+	ac_llvm_context_init(&ctx.ac, ctx.context, options->chip_class);
 	ctx.ac.module = ctx.module;

 	ctx.has_ds_bpermute = ctx.options->chip_class >= VI;
@@ -6438,7 +6495,7 @@ void ac_create_gs_copy_shader(LLVMTargetMachineRef tm,
 	ctx.options = options;
 	ctx.shader_info = shader_info;

-	ac_llvm_context_init(&ctx.ac, ctx.context);
+	ac_llvm_context_init(&ctx.ac, ctx.context, options->chip_class);
 	ctx.ac.module = ctx.module;

 	ctx.is_gs_copy_shader = true;
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -553,15 +553,35 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
 	AddrSurfInfoIn.flags.noStencil = (surf->flags & RADEON_SURF_SBUFFER) == 0;
 	AddrSurfInfoIn.flags.compressZ = AddrSurfInfoIn.flags.depth;

-	/* noStencil = 0 can result in a depth part that is incompatible with
-	 * mipmapped texturing. So set noStencil = 1 when mipmaps are requested (in
-	 * this case, we may end up setting stencil_adjusted).
+	/* On CI/VI, the DB uses the same pitch and tile mode (except tilesplit)
+	 * for Z and stencil. This can cause a number of problems which we work
+	 * around here:
 	 *
-	 * TODO: update addrlib to a newer version, remove this, and
-	 * use flags.matchStencilTileCfg = 1 as an alternative fix.
+	 * - a depth part that is incompatible with mipmapped texturing
+	 * - at least on Stoney, entirely incompatible Z/S aspects (e.g.
+	 *   incorrect tiling applied to the stencil part, stencil buffer
+	 *   memory accesses that go out of bounds) even without mipmapping
+	 *
+	 * Some piglit tests that are prone to different types of related
+	 * failures:
+	 *  ./bin/ext_framebuffer_multisample-upsample 2 stencil
+	 *  ./bin/framebuffer-blit-levels {draw,read} stencil
+	 *  ./bin/ext_framebuffer_multisample-unaligned-blit N {depth,stencil} {msaa,upsample,downsample}
+	 *  ./bin/fbo-depth-array fs-writes-{depth,stencil} / {depth,stencil}-{clear,layered-clear,draw}
+	 *  ./bin/depthstencil-render-miplevels 1024 d=s=z24_s8
 	 */
-	if (config->info.levels > 1)
+	int stencil_tile_idx = -1;
+
+	if (AddrSurfInfoIn.flags.depth && !AddrSurfInfoIn.flags.noStencil &&
+	    (config->info.levels > 1 || info->family == CHIP_STONEY)) {
+		/* Compute stencilTileIdx that is compatible with the (depth)
+		 * tileIdx. This degrades the depth surface if necessary to
+		 * ensure that a matching stencilTileIdx exists. */
+		AddrSurfInfoIn.flags.matchStencilTileCfg = 1;
+
+		/* Keep the depth mip-tail compatible with texturing. */
 		AddrSurfInfoIn.flags.noStencil = 1;
+	}

 	/* Set preferred macrotile parameters. This is usually required
 	 * for shared resources. This is for 2D tiling only. */
@@ -643,12 +663,33 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
 			if (level > 0)
 				continue;

+			/* Check that we actually got a TC-compatible HTILE if
+			 * we requested it (only for level 0, since we're not
+			 * supporting HTILE on higher mip levels anyway). */
+			assert(AddrSurfInfoOut.tcCompatible ||
+			       !AddrSurfInfoIn.flags.tcCompatible ||
+			       AddrSurfInfoIn.flags.matchStencilTileCfg);
+
+			if (AddrSurfInfoIn.flags.matchStencilTileCfg) {
+				if (!AddrSurfInfoOut.tcCompatible) {
+					AddrSurfInfoIn.flags.tcCompatible = 0;
+					surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE;
+				}
+
+				AddrSurfInfoIn.flags.matchStencilTileCfg = 0;
+				AddrSurfInfoIn.tileIndex = AddrSurfInfoOut.tileIndex;
+				stencil_tile_idx = AddrSurfInfoOut.stencilTileIdx;
+
+				assert(stencil_tile_idx >= 0);
+			}
+
 			gfx6_surface_settings(info, &AddrSurfInfoOut, surf);
 		}
 	}

 	/* Calculate texture layout information for stencil. */
 	if (surf->flags & RADEON_SURF_SBUFFER) {
+		AddrSurfInfoIn.tileIndex = stencil_tile_idx;
 		AddrSurfInfoIn.bpp = 8;
 		AddrSurfInfoIn.flags.depth = 0;
 		AddrSurfInfoIn.flags.stencil = 1;
@@ -1042,9 +1083,16 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib,

 	/* Calculate texture layout information for stencil. */
 	if (surf->flags & RADEON_SURF_SBUFFER) {
-		AddrSurfInfoIn.bpp = 8;
-		AddrSurfInfoIn.flags.depth = 0;
 		AddrSurfInfoIn.flags.stencil = 1;
+		AddrSurfInfoIn.bpp = 8;
+
+		if (!AddrSurfInfoIn.flags.depth) {
+			r = gfx9_get_preferred_swizzle_mode(addrlib, &AddrSurfInfoIn, false,
+							    &AddrSurfInfoIn.swizzleMode);
+			if (r)
+				return r;
+		} else
+			AddrSurfInfoIn.flags.depth = 0;

 		r = gfx9_compute_miptree(addrlib, surf, compressed, &AddrSurfInfoIn);
 		if (r)
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -938,6 +938,11 @@ static void
 radv_emit_scissor(struct radv_cmd_buffer *cmd_buffer)
 {
 	uint32_t count = cmd_buffer->state.dynamic.scissor.count;
+
+	if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
+		cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH;
+		si_emit_cache_flush(cmd_buffer);
+	}
 	si_write_scissors(cmd_buffer->cs, 0, count,
 			  cmd_buffer->state.dynamic.scissor.scissors,
 			  cmd_buffer->state.dynamic.viewport.viewports,
@@ -1984,6 +1989,7 @@ VkResult radv_BeginCommandBuffer(

 	memset(&cmd_buffer->state, 0, sizeof(cmd_buffer->state));
 	cmd_buffer->state.last_primitive_reset_en = -1;
+	cmd_buffer->usage_flags = pBeginInfo->flags;

 	/* setup initial configuration into command buffer */
 	if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
@@ -2788,20 +2794,30 @@ radv_emit_indirect_draw(struct radv_cmd_buffer *cmd_buffer,
 	radeon_emit(cs, indirect_va);
 	radeon_emit(cs, indirect_va >> 32);

-	radeon_emit(cs, PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT_MULTI :
-				       PKT3_DRAW_INDIRECT_MULTI,
-			     8, false));
-	radeon_emit(cs, 0);
-	radeon_emit(cs, (base_reg - SI_SH_REG_OFFSET) >> 2);
-	radeon_emit(cs, ((base_reg + 4) - SI_SH_REG_OFFSET) >> 2);
-	radeon_emit(cs, (((base_reg + 8) - SI_SH_REG_OFFSET) >> 2) |
-	                S_2C3_DRAW_INDEX_ENABLE(draw_id_enable) |
-	                S_2C3_COUNT_INDIRECT_ENABLE(!!count_va));
-	radeon_emit(cs, draw_count); /* count */
-	radeon_emit(cs, count_va); /* count_addr */
-	radeon_emit(cs, count_va >> 32);
-	radeon_emit(cs, stride); /* stride */
-	radeon_emit(cs, di_src_sel);
+	if (draw_count == 1 && !count_va && !draw_id_enable) {
+		radeon_emit(cs, PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT :
+				     PKT3_DRAW_INDIRECT, 3, false));
+		radeon_emit(cs, 0);
+		radeon_emit(cs, (base_reg - SI_SH_REG_OFFSET) >> 2);
+		radeon_emit(cs, ((base_reg + 4) - SI_SH_REG_OFFSET) >> 2);
+		radeon_emit(cs, di_src_sel);
+	} else {
+		radeon_emit(cs, PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT_MULTI :
+				     PKT3_DRAW_INDIRECT_MULTI,
+				     8, false));
+		radeon_emit(cs, 0);
+		radeon_emit(cs, (base_reg - SI_SH_REG_OFFSET) >> 2);
+		radeon_emit(cs, ((base_reg + 4) - SI_SH_REG_OFFSET) >> 2);
+		radeon_emit(cs, (((base_reg + 8) - SI_SH_REG_OFFSET) >> 2) |
+			    S_2C3_DRAW_INDEX_ENABLE(draw_id_enable) |
+			    S_2C3_COUNT_INDIRECT_ENABLE(!!count_va));
+		radeon_emit(cs, draw_count); /* count */
+		radeon_emit(cs, count_va); /* count_addr */
+		radeon_emit(cs, count_va >> 32);
+		radeon_emit(cs, stride); /* stride */
+		radeon_emit(cs, di_src_sel);
+	}
+
 	radv_cmd_buffer_trace_emit(cmd_buffer);
 }

--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -2813,7 +2813,7 @@ VkResult radv_CreateEvent(

 	event->bo = device->ws->buffer_create(device->ws, 8, 8,
 					      RADEON_DOMAIN_GTT,
-					      RADEON_FLAG_CPU_ACCESS);
+					      RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS);
 	if (!event->bo) {
 		vk_free2(&device->alloc, pAllocator, event);
 		return VK_ERROR_OUT_OF_DEVICE_MEMORY;
@@ -3115,8 +3115,8 @@ radv_initialise_color_surface(struct radv_device *device,
 	}

 	if (device->physical_device->rad_info.chip_class >= GFX9) {
-		uint32_t max_slice = radv_surface_layer_count(iview);
-		unsigned mip0_depth = iview->base_layer + max_slice - 1;
+		unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D ?
+		  (iview->extent.depth - 1) : (iview->image->info.array_size - 1);

 		cb->cb_color_view |= S_028C6C_MIP_LEVEL(iview->base_mip);
 		cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -34,7 +34,7 @@
 #include "util/debug.h"
 #include "util/u_atomic.h"
 static unsigned
-radv_choose_tiling(struct radv_device *Device,
+radv_choose_tiling(struct radv_device *device,
 		   const struct radv_image_create_info *create_info)
 {
 	const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
@@ -45,14 +45,15 @@ radv_choose_tiling(struct radv_device *Device,
 	}

 	if (!vk_format_is_compressed(pCreateInfo->format) &&
-	    !vk_format_is_depth_or_stencil(pCreateInfo->format)) {
+	    !vk_format_is_depth_or_stencil(pCreateInfo->format)
+	    && device->physical_device->rad_info.chip_class <= VI) {
+		/* this causes hangs in some VK CTS tests on GFX9. */
 		/* Textures with a very small height are recommended to be linear. */
 		if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
 		    /* Only very thin and long 2D textures should benefit from
 		     * linear_aligned. */
 		    (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
 			return RADEON_SURF_MODE_LINEAR_ALIGNED;
-
 	}

 	/* MSAA resources must be 2D tiled. */
@@ -119,6 +120,7 @@ radv_init_surface(struct radv_device *device,
 	                           VK_IMAGE_USAGE_STORAGE_BIT)) ||
 	    (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) ||
            (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) ||
+            pCreateInfo->mipLevels > 1 || pCreateInfo->arrayLayers > 1 ||
            device->physical_device->rad_info.chip_class < VI ||
            create_info->scanout || (device->debug_flags & RADV_DEBUG_NO_DCC) ||
            !radv_is_colorbuffer_format_supported(pCreateInfo->format, &blendable))
@@ -279,10 +281,14 @@ si_set_mutable_tex_desc_fields(struct radv_device *device,
 }

 static unsigned radv_tex_dim(VkImageType image_type, VkImageViewType view_type,
-			     unsigned nr_layers, unsigned nr_samples, bool is_storage_image)
+			     unsigned nr_layers, unsigned nr_samples, bool is_storage_image, bool gfx9)
 {
 	if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
 		return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE;
+
+	/* GFX9 allocates 1D textures as 2D. */
+	if (gfx9 && image_type == VK_IMAGE_TYPE_1D)
+		image_type = VK_IMAGE_TYPE_2D;
 	switch (image_type) {
 	case VK_IMAGE_TYPE_1D:
 		return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D;
@@ -373,7 +379,7 @@ si_make_texture_descriptor(struct radv_device *device,
 	}

 	type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
-			    is_storage_image);
+			    is_storage_image, device->physical_device->rad_info.chip_class >= GFX9);
 	if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
 	        height = 1;
 		depth = image->info.array_size;
@@ -494,7 +500,7 @@ si_make_texture_descriptor(struct radv_device *device,
 			S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
 			S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
 			S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
-			S_008F1C_TYPE(radv_tex_dim(image->type, view_type, 1, 0, false));
+			S_008F1C_TYPE(radv_tex_dim(image->type, view_type, 1, 0, false, false));
 		fmask_state[4] = 0;
 		fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
 		fmask_state[6] = 0;
@@ -832,8 +838,10 @@ radv_image_create(VkDevice _device,

 	if ((pCreateInfo->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) &&
 	    pCreateInfo->mipLevels == 1 &&
-	    !image->surface.dcc_size && image->info.depth == 1 && can_cmask_dcc)
+	    !image->surface.dcc_size && image->info.depth == 1 && can_cmask_dcc &&
+	    !image->surface.is_linear)
 		radv_image_alloc_cmask(device, image);
+
 	if (image->info.samples > 1 && vk_format_is_color(pCreateInfo->format)) {
 		radv_image_alloc_fmask(device, image);
 	} else if (vk_format_is_depth(pCreateInfo->format)) {
@@ -870,6 +878,7 @@ radv_image_view_make_descriptor(struct radv_image_view *iview,
 	uint32_t blk_w;
 	uint32_t *descriptor;
 	uint32_t *fmask_descriptor;
+	uint32_t hw_level = 0;

 	if (is_storage_image) {
 		descriptor = iview->storage_descriptor;
@@ -882,11 +891,13 @@ radv_image_view_make_descriptor(struct radv_image_view *iview,
 	assert(image->surface.blk_w % vk_format_get_blockwidth(image->vk_format) == 0);
 	blk_w = image->surface.blk_w / vk_format_get_blockwidth(image->vk_format) * vk_format_get_blockwidth(iview->vk_format);

+	if (device->physical_device->rad_info.chip_class >= GFX9)
+		hw_level = iview->base_mip;
 	si_make_texture_descriptor(device, image, is_storage_image,
 				   iview->type,
 				   iview->vk_format,
 				   components,
-				   0, iview->level_count - 1,
+				   hw_level, hw_level + iview->level_count - 1,
 				   iview->base_layer,
 				   iview->base_layer + iview->layer_count - 1,
 				   iview->extent.width,
@@ -1043,23 +1054,34 @@ radv_DestroyImage(VkDevice _device, VkImage _image,
 }

 void radv_GetImageSubresourceLayout(
-	VkDevice                                    device,
+	VkDevice                                    _device,
 	VkImage                                     _image,
 	const VkImageSubresource*                   pSubresource,
 	VkSubresourceLayout*                        pLayout)
 {
 	RADV_FROM_HANDLE(radv_image, image, _image);
+	RADV_FROM_HANDLE(radv_device, device, _device);
 	int level = pSubresource->mipLevel;
 	int layer = pSubresource->arrayLayer;
 	struct radeon_surf *surface = &image->surface;

-	pLayout->offset = surface->u.legacy.level[level].offset + surface->u.legacy.level[level].slice_size * layer;
-	pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
-	pLayout->arrayPitch = surface->u.legacy.level[level].slice_size;
-	pLayout->depthPitch = surface->u.legacy.level[level].slice_size;
-	pLayout->size = surface->u.legacy.level[level].slice_size;
-	if (image->type == VK_IMAGE_TYPE_3D)
-		pLayout->size *= u_minify(image->info.depth, level);
+	if (device->physical_device->rad_info.chip_class >= GFX9) {
+		pLayout->offset = surface->u.gfx9.offset[level] + surface->u.gfx9.surf_slice_size * layer;
+		pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe;
+		pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
+		pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
+		pLayout->size = surface->u.gfx9.surf_slice_size;
+		if (image->type == VK_IMAGE_TYPE_3D)
+			pLayout->size *= u_minify(image->info.depth, level);
+	} else {
+		pLayout->offset = surface->u.legacy.level[level].offset + surface->u.legacy.level[level].slice_size * layer;
+		pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
+		pLayout->arrayPitch = surface->u.legacy.level[level].slice_size;
+		pLayout->depthPitch = surface->u.legacy.level[level].slice_size;
+		pLayout->size = surface->u.legacy.level[level].slice_size;
+		if (image->type == VK_IMAGE_TYPE_3D)
+			pLayout->size *= u_minify(image->info.depth, level);
+	}
 }


--- a/src/amd/vulkan/radv_meta_blit.c
+++ b/src/amd/vulkan/radv_meta_blit.c
@@ -275,15 +275,20 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
               VkFilter blit_filter)
 {
 	struct radv_device *device = cmd_buffer->device;
+	uint32_t src_width = radv_minify(src_iview->image->info.width, src_iview->base_mip);
+	uint32_t src_height = radv_minify(src_iview->image->info.height, src_iview->base_mip);
+	uint32_t src_depth = radv_minify(src_iview->image->info.depth, src_iview->base_mip);
+	uint32_t dst_width = radv_minify(dest_iview->image->info.width, dest_iview->base_mip);
+	uint32_t dst_height = radv_minify(dest_iview->image->info.height, dest_iview->base_mip);

 	assert(src_image->info.samples == dest_image->info.samples);

 	float vertex_push_constants[5] = {
-		(float)src_offset_0.x / (float)src_iview->extent.width,
-		(float)src_offset_0.y / (float)src_iview->extent.height,
-		(float)src_offset_1.x / (float)src_iview->extent.width,
-		(float)src_offset_1.y / (float)src_iview->extent.height,
-		(float)src_offset_0.z / (float)src_iview->extent.depth,
+		(float)src_offset_0.x / (float)src_width,
+		(float)src_offset_0.y / (float)src_height,
+		(float)src_offset_1.x / (float)src_width,
+		(float)src_offset_1.y / (float)src_height,
+		(float)src_offset_0.z / (float)src_depth,
 	};

 	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
@@ -310,8 +315,8 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
 					       .pAttachments = (VkImageView[]) {
 					       radv_image_view_to_handle(dest_iview),
 				       },
-				       .width = dest_iview->extent.width,
-				       .height = dest_iview->extent.height,
+				       .width = dst_width,
+				       .height = dst_height,
 				       .layers = 1,
 				}, &cmd_buffer->pool->alloc, &fb);
 	VkPipeline pipeline;
--- a/src/amd/vulkan/radv_meta_blit2d.c
+++ b/src/amd/vulkan/radv_meta_blit2d.c
@@ -53,7 +53,8 @@ enum blit2d_src_type {
 static void
 create_iview(struct radv_cmd_buffer *cmd_buffer,
             struct radv_meta_blit2d_surf *surf,
-             struct radv_image_view *iview, VkFormat depth_format)
+             struct radv_image_view *iview, VkFormat depth_format,
+              VkImageAspectFlagBits aspects)
 {
 	VkFormat format;

@@ -69,7 +70,7 @@ create_iview(struct radv_cmd_buffer *cmd_buffer,
 					     .viewType = VK_IMAGE_VIEW_TYPE_2D,
 					     .format = format,
 					     .subresourceRange = {
-					     .aspectMask = surf->aspect_mask,
+					     .aspectMask = aspects,
 					     .baseMipLevel = surf->level,
 					     .levelCount = 1,
 					     .baseArrayLayer = surf->layer,
@@ -111,7 +112,8 @@ blit2d_bind_src(struct radv_cmd_buffer *cmd_buffer,
                struct radv_meta_blit2d_surf *src_img,
                struct radv_meta_blit2d_buffer *src_buf,
                struct blit2d_src_temps *tmp,
-                enum blit2d_src_type src_type, VkFormat depth_format)
+                enum blit2d_src_type src_type, VkFormat depth_format,
+                VkImageAspectFlagBits aspects)
 {
 	struct radv_device *device = cmd_buffer->device;

@@ -138,7 +140,7 @@ blit2d_bind_src(struct radv_cmd_buffer *cmd_buffer,
 				      VK_SHADER_STAGE_FRAGMENT_BIT, 16, 4,
 				      &src_buf->pitch);
 	} else {
-		create_iview(cmd_buffer, src_img, &tmp->iview, depth_format);
+		create_iview(cmd_buffer, src_img, &tmp->iview, depth_format, aspects);

 		radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
 					      device->meta_state.blit2d.p_layouts[src_type],
@@ -175,9 +177,10 @@ blit2d_bind_dst(struct radv_cmd_buffer *cmd_buffer,
                uint32_t width,
                uint32_t height,
 		VkFormat depth_format,
-                struct blit2d_dst_temps *tmp)
+                struct blit2d_dst_temps *tmp,
+                VkImageAspectFlagBits aspects)
 {
-	create_iview(cmd_buffer, dst, &tmp->iview, depth_format);
+	create_iview(cmd_buffer, dst, &tmp->iview, depth_format, aspects);

 	radv_CreateFramebuffer(radv_device_to_handle(cmd_buffer->device),
 			       &(VkFramebufferCreateInfo) {
@@ -250,106 +253,111 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer,
 	struct radv_device *device = cmd_buffer->device;

 	for (unsigned r = 0; r < num_rects; ++r) {
-		VkFormat depth_format = 0;
-		if (dst->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
-			depth_format = vk_format_stencil_only(dst->image->vk_format);
-		else if (dst->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT)
-			depth_format = vk_format_depth_only(dst->image->vk_format);
-		struct blit2d_src_temps src_temps;
-		blit2d_bind_src(cmd_buffer, src_img, src_buf, &src_temps, src_type, depth_format);
+		unsigned i;
+		for_each_bit(i, dst->aspect_mask) {
+			unsigned aspect_mask = 1u << i;
+			VkFormat depth_format = 0;
+			if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
+				depth_format = vk_format_stencil_only(dst->image->vk_format);
+			else if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT)
+				depth_format = vk_format_depth_only(dst->image->vk_format);
+			struct blit2d_src_temps src_temps;
+			blit2d_bind_src(cmd_buffer, src_img, src_buf, &src_temps, src_type, depth_format, aspect_mask);

-		struct blit2d_dst_temps dst_temps;
-		blit2d_bind_dst(cmd_buffer, dst, rects[r].dst_x + rects[r].width,
-				rects[r].dst_y + rects[r].height, depth_format, &dst_temps);
+			struct blit2d_dst_temps dst_temps;
+			blit2d_bind_dst(cmd_buffer, dst, rects[r].dst_x + rects[r].width,
+					rects[r].dst_y + rects[r].height, depth_format, &dst_temps, aspect_mask);

-		float vertex_push_constants[4] = {
-			rects[r].src_x,
-			rects[r].src_y,
-			rects[r].src_x + rects[r].width,
-			rects[r].src_y + rects[r].height,
-		};
+			float vertex_push_constants[4] = {
+				rects[r].src_x,
+				rects[r].src_y,
+				rects[r].src_x + rects[r].width,
+				rects[r].src_y + rects[r].height,
+			};

-		radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
-				      device->meta_state.blit2d.p_layouts[src_type],
-				      VK_SHADER_STAGE_VERTEX_BIT, 0, 16,
-				      vertex_push_constants);
+			radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+					device->meta_state.blit2d.p_layouts[src_type],
+					VK_SHADER_STAGE_VERTEX_BIT, 0, 16,
+					vertex_push_constants);

-		if (dst->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) {
-			unsigned fs_key = radv_format_meta_fs_key(dst_temps.iview.vk_format);
+			if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) {
+				unsigned fs_key = radv_format_meta_fs_key(dst_temps.iview.vk_format);

-			radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
-						      &(VkRenderPassBeginInfo) {
-							      .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
-								      .renderPass = device->meta_state.blit2d.render_passes[fs_key],
-								      .framebuffer = dst_temps.fb,
-								      .renderArea = {
-								      .offset = { rects[r].dst_x, rects[r].dst_y, },
-								      .extent = { rects[r].width, rects[r].height },
-							      },
-								      .clearValueCount = 0,
-									       .pClearValues = NULL,
-									       }, VK_SUBPASS_CONTENTS_INLINE);
+				radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
+							&(VkRenderPassBeginInfo) {
+								.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+									.renderPass = device->meta_state.blit2d.render_passes[fs_key],
+									.framebuffer = dst_temps.fb,
+									.renderArea = {
+									.offset = { rects[r].dst_x, rects[r].dst_y, },
+									.extent = { rects[r].width, rects[r].height },
+								},
+									.clearValueCount = 0,
+										.pClearValues = NULL,
+										}, VK_SUBPASS_CONTENTS_INLINE);


-			bind_pipeline(cmd_buffer, src_type, fs_key);
-		} else if (dst->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
-			radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
-						      &(VkRenderPassBeginInfo) {
-							      .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
-								      .renderPass = device->meta_state.blit2d.depth_only_rp,
-								      .framebuffer = dst_temps.fb,
-								      .renderArea = {
-								      .offset = { rects[r].dst_x, rects[r].dst_y, },
-								      .extent = { rects[r].width, rects[r].height },
-							      },
-								      .clearValueCount = 0,
-									       .pClearValues = NULL,
-									       }, VK_SUBPASS_CONTENTS_INLINE);
+				bind_pipeline(cmd_buffer, src_type, fs_key);
+			} else if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
+				radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
+							&(VkRenderPassBeginInfo) {
+								.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+									.renderPass = device->meta_state.blit2d.depth_only_rp,
+									.framebuffer = dst_temps.fb,
+									.renderArea = {
+									.offset = { rects[r].dst_x, rects[r].dst_y, },
+									.extent = { rects[r].width, rects[r].height },
+								},
+									.clearValueCount = 0,
+										.pClearValues = NULL,
+										}, VK_SUBPASS_CONTENTS_INLINE);


-			bind_depth_pipeline(cmd_buffer, src_type);
+				bind_depth_pipeline(cmd_buffer, src_type);

-		} else if (dst->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
-			radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
-						      &(VkRenderPassBeginInfo) {
-							      .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
-								      .renderPass = device->meta_state.blit2d.stencil_only_rp,
-								      .framebuffer = dst_temps.fb,
-								      .renderArea = {
-								      .offset = { rects[r].dst_x, rects[r].dst_y, },
-								      .extent = { rects[r].width, rects[r].height },
-							      },
-								      .clearValueCount = 0,
-									       .pClearValues = NULL,
-									       }, VK_SUBPASS_CONTENTS_INLINE);
+			} else if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
+				radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
+							&(VkRenderPassBeginInfo) {
+								.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+									.renderPass = device->meta_state.blit2d.stencil_only_rp,
+									.framebuffer = dst_temps.fb,
+									.renderArea = {
+									.offset = { rects[r].dst_x, rects[r].dst_y, },
+									.extent = { rects[r].width, rects[r].height },
+								},
+									.clearValueCount = 0,
+										.pClearValues = NULL,
+										}, VK_SUBPASS_CONTENTS_INLINE);


-			bind_stencil_pipeline(cmd_buffer, src_type);
+				bind_stencil_pipeline(cmd_buffer, src_type);
+			} else
+				unreachable("Processing blit2d with multiple aspects.");
+
+			radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
+				.x = rects[r].dst_x,
+				.y = rects[r].dst_y,
+				.width = rects[r].width,
+				.height = rects[r].height,
+				.minDepth = 0.0f,
+				.maxDepth = 1.0f
+			});
+
+			radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) {
+				.offset = (VkOffset2D) { rects[r].dst_x, rects[r].dst_y },
+				.extent = (VkExtent2D) { rects[r].width, rects[r].height },
+			});
+
+
+
+			radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
+			radv_CmdEndRenderPass(radv_cmd_buffer_to_handle(cmd_buffer));
+
+			/* At the point where we emit the draw call, all data from the
+			* descriptor sets, etc. has been used.  We are free to delete it.
+			*/
+			blit2d_unbind_dst(cmd_buffer, &dst_temps);
 		}
-
-		radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
-			.x = rects[r].dst_x,
-			.y = rects[r].dst_y,
-			.width = rects[r].width,
-			.height = rects[r].height,
-			.minDepth = 0.0f,
-			.maxDepth = 1.0f
-		});
-
-		radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) {
-			.offset = (VkOffset2D) { rects[r].dst_x, rects[r].dst_y },
-			.extent = (VkExtent2D) { rects[r].width, rects[r].height },
-		});
-
-
-
-		radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
-		radv_CmdEndRenderPass(radv_cmd_buffer_to_handle(cmd_buffer));
-
-		/* At the point where we emit the draw call, all data from the
-		 * descriptor sets, etc. has been used.  We are free to delete it.
-		 */
-		blit2d_unbind_dst(cmd_buffer, &dst_temps);
 	}
 }

--- a/src/amd/vulkan/radv_meta_clear.c
+++ b/src/amd/vulkan/radv_meta_clear.c
@@ -1176,6 +1176,9 @@ radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer,
 {
 	VkDevice device_h = radv_device_to_handle(cmd_buffer->device);
 	struct radv_image_view iview;
+	uint32_t width = radv_minify(image->info.width, range->baseMipLevel + level);
+	uint32_t height = radv_minify(image->info.height, range->baseMipLevel + level);
+
 	radv_image_view_init(&iview, cmd_buffer->device,
 			     &(VkImageViewCreateInfo) {
 				     .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
@@ -1199,9 +1202,9 @@ radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer,
 					       .pAttachments = (VkImageView[]) {
 					       radv_image_view_to_handle(&iview),
 				       },
-					       .width = iview.extent.width,
-							.height = iview.extent.height,
-							.layers = 1
+					       .width = width,
+					       .height = height,
+					       .layers = 1
 			       },
 			       &cmd_buffer->pool->alloc,
 			       &fb);
@@ -1257,8 +1260,8 @@ radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer,
 						.renderArea = {
 						.offset = { 0, 0, },
 						.extent = {
-							.width = iview.extent.width,
-							.height = iview.extent.height,
+							.width = width,
+							.height = height,
 						},
 					},
 						.renderPass = pass,
@@ -1277,7 +1280,7 @@ radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer,
 	VkClearRect clear_rect = {
 		.rect = {
 			.offset = { 0, 0 },
-			.extent = { iview.extent.width, iview.extent.height },
+			.extent = { width, height },
 		},
 		.baseArrayLayer = range->baseArrayLayer,
 		.layerCount = 1, /* FINISHME: clear multi-layer framebuffer */
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -161,6 +161,7 @@ radv_optimize_nir(struct nir_shader *shader)
                if (nir_opt_trivial_continues(shader)) {
                        progress = true;
                        NIR_PASS(progress, shader, nir_copy_prop);
+                        NIR_PASS(progress, shader, nir_opt_remove_phis);
                        NIR_PASS(progress, shader, nir_opt_dce);
                }
                NIR_PASS(progress, shader, nir_opt_if);
--- a/src/amd/vulkan/radv_pipeline_cache.c
+++ b/src/amd/vulkan/radv_pipeline_cache.c
@@ -118,6 +118,9 @@ radv_pipeline_cache_search_unlocked(struct radv_pipeline_cache *cache,
 	const uint32_t mask = cache->table_size - 1;
 	const uint32_t start = (*(uint32_t *) sha1);

+	if (cache->table_size == 0)
+		return NULL;
+
 	for (uint32_t i = 0; i < cache->table_size; i++) {
 		const uint32_t index = (start + i) & mask;
 		struct cache_entry *entry = cache->hash_table[index];
--- a/src/amd/vulkan/radv_query.c
+++ b/src/amd/vulkan/radv_query.c
@@ -653,7 +653,7 @@ static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer,
 	struct radv_device *device = cmd_buffer->device;
 	struct radv_meta_saved_compute_state saved_state;

-	radv_meta_save_compute(&saved_state, cmd_buffer, 4);
+	radv_meta_save_compute(&saved_state, cmd_buffer, 16);

 	struct radv_buffer dst_buffer = {
 		.bo = dst_bo,
@@ -737,7 +737,7 @@ static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer,
 	                                RADV_CMD_FLAG_INV_VMEM_L1 |
 	                                RADV_CMD_FLAG_CS_PARTIAL_FLUSH;

-	radv_meta_restore_compute(&saved_state, cmd_buffer, 4);
+	radv_meta_restore_compute(&saved_state, cmd_buffer, 16);
 }

 VkResult radv_CreateQueryPool(
--- a/src/amd/vulkan/radv_radeon_winsys.h
+++ b/src/amd/vulkan/radv_radeon_winsys.h
@@ -51,7 +51,8 @@ enum radeon_bo_flag { /* bitfield */
 	RADEON_FLAG_GTT_WC =        (1 << 0),
 	RADEON_FLAG_CPU_ACCESS =    (1 << 1),
 	RADEON_FLAG_NO_CPU_ACCESS = (1 << 2),
-	RADEON_FLAG_VIRTUAL =       (1 << 3)
+	RADEON_FLAG_VIRTUAL =       (1 << 3),
+	RADEON_FLAG_VA_UNCACHED =   (1 << 4),
 };

 enum radeon_bo_usage { /* bitfield */
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
@@ -39,6 +39,23 @@

 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo);

+static int
+radv_amdgpu_bo_va_op(amdgpu_device_handle dev,
+		     amdgpu_bo_handle bo,
+		     uint64_t offset,
+		     uint64_t size,
+		     uint64_t addr,
+		     uint64_t flags,
+		     uint32_t ops)
+{
+	size = ALIGN(size, getpagesize());
+	flags |= (AMDGPU_VM_PAGE_READABLE |
+		  AMDGPU_VM_PAGE_WRITEABLE |
+		  AMDGPU_VM_PAGE_EXECUTABLE);
+	return amdgpu_bo_va_op_raw(dev, bo, offset, size, addr,
+				   flags, ops);
+}
+
 static void
 radv_amdgpu_winsys_virtual_map(struct radv_amdgpu_winsys_bo *bo,
                               const struct radv_amdgpu_map_range *range)
@@ -49,8 +66,8 @@ radv_amdgpu_winsys_virtual_map(struct radv_amdgpu_winsys_bo *bo,
 		return; /* TODO: PRT mapping */

 	p_atomic_inc(&range->bo->ref_count);
-	int r = amdgpu_bo_va_op(range->bo->bo, range->bo_offset, range->size,
-	                        range->offset + bo->va, 0, AMDGPU_VA_OP_MAP);
+	int r = radv_amdgpu_bo_va_op(bo->ws->dev, range->bo->bo, range->bo_offset, range->size,
+				     range->offset + bo->va, 0, AMDGPU_VA_OP_MAP);
 	if (r)
 		abort();
 }
@@ -64,8 +81,8 @@ radv_amdgpu_winsys_virtual_unmap(struct radv_amdgpu_winsys_bo *bo,
 	if (!range->bo)
 		return; /* TODO: PRT mapping */

-	int r = amdgpu_bo_va_op(range->bo->bo, range->bo_offset, range->size,
-	                        range->offset + bo->va, 0, AMDGPU_VA_OP_UNMAP);
+	int r = radv_amdgpu_bo_va_op(bo->ws->dev, range->bo->bo, range->bo_offset, range->size,
+				     range->offset + bo->va, 0, AMDGPU_VA_OP_UNMAP);
 	if (r)
 		abort();
 	radv_amdgpu_winsys_bo_destroy((struct radeon_winsys_bo *)range->bo);
@@ -235,7 +252,7 @@ static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo)
 			bo->ws->num_buffers--;
 			pthread_mutex_unlock(&bo->ws->global_bo_list_lock);
 		}
-		amdgpu_bo_va_op(bo->bo, 0, bo->size, bo->va, 0, AMDGPU_VA_OP_UNMAP);
+		radv_amdgpu_bo_va_op(bo->ws->dev, bo->bo, 0, bo->size, bo->va, 0, AMDGPU_VA_OP_UNMAP);
 		amdgpu_bo_free(bo->bo);
 	}
 	amdgpu_va_range_free(bo->va_handle);
@@ -323,7 +340,11 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
 		goto error_bo_alloc;
 	}

-	r = amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP);
+
+	uint32_t va_flags = 0;
+	if (flags & RADEON_FLAG_VA_UNCACHED)
+		va_flags |= AMDGPU_VM_MTYPE_UC;
+	r = radv_amdgpu_bo_va_op(ws->dev, buf_handle, 0, size, va, va_flags, AMDGPU_VA_OP_MAP);
 	if (r)
 		goto error_va_map;

@@ -399,7 +420,7 @@ radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws,
 	if (r)
 		goto error_query;

-	r = amdgpu_bo_va_op(result.buf_handle, 0, result.alloc_size, va, 0, AMDGPU_VA_OP_MAP);
+	r = radv_amdgpu_bo_va_op(ws->dev, result.buf_handle, 0, result.alloc_size, va, 0, AMDGPU_VA_OP_MAP);
 	if (r)
 		goto error_va_map;

--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c
@@ -46,11 +46,6 @@ do_winsys_init(struct radv_amdgpu_winsys *ws, int fd)
 	if (!ac_query_gpu_info(fd, ws->dev, &ws->info, &ws->amdinfo))
 		return false;

-	if (ws->info.chip_class >= GFX9) {
-		fprintf(stderr, "radv: VEGA support not completed.\n");
-		return false;
-	}
-
 	/* LLVM 5.0 is required for GFX9. */
 	if (ws->info.chip_class >= GFX9 && HAVE_LLVM < 0x0500) {
 		fprintf(stderr, "amdgpu: LLVM 5.0 is required, got LLVM %i.%i\n",
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -2661,12 +2661,14 @@ assign_attribute_or_color_locations(void *mem_ctx,
   } to_assign[32];
   assert(max_index <= 32);

-   /* Temporary array for the set of attributes that have locations assigned.
+   /* Temporary array for the set of attributes that have locations assigned,
+    * for the purpose of checking overlapping slots/components of (non-ES)
+    * fragment shader outputs.
    */
-   ir_variable *assigned[16];
+   ir_variable *assigned[12 * 4]; /* (max # of FS outputs) * # components */
+   unsigned assigned_attr = 0;

   unsigned num_attr = 0;
-   unsigned assigned_attr = 0;

   foreach_in_list(ir_instruction, node, sh->ir) {
      ir_variable *const var = node->as_variable();
@@ -2905,6 +2907,18 @@ assign_attribute_or_color_locations(void *mem_ctx,
               }
            }

+            if (target_index == MESA_SHADER_FRAGMENT && !prog->IsES) {
+               /* Only track assigned variables for non-ES fragment shaders
+                * to avoid overflowing the array.
+                *
+                * At most one variable per fragment output component should
+                * reach this.
+                */
+               assert(assigned_attr < ARRAY_SIZE(assigned));
+               assigned[assigned_attr] = var;
+               assigned_attr++;
+            }
+
            used_locations |= (use_mask << attr);

            /* From the GL 4.5 core spec, section 11.1.1 (Vertex Attributes):
@@ -2931,9 +2945,6 @@ assign_attribute_or_color_locations(void *mem_ctx,
               double_storage_locations |= (use_mask << attr);
         }

-         assigned[assigned_attr] = var;
-         assigned_attr++;
-
         continue;
      }

--- a/src/compiler/glsl/shader_cache.cpp
+++ b/src/compiler/glsl/shader_cache.cpp
@@ -74,11 +74,26 @@ compile_shaders(struct gl_context *ctx, struct gl_shader_program *prog) {
   }
 }

+static void
+get_struct_type_field_and_pointer_sizes(size_t *s_field_size,
+                                        size_t *s_field_ptrs)
+{
+   *s_field_size = sizeof(glsl_struct_field);
+   *s_field_ptrs =
+     sizeof(((glsl_struct_field *)0)->type) +
+     sizeof(((glsl_struct_field *)0)->name);
+}
+
 static void
 encode_type_to_blob(struct blob *blob, const glsl_type *type)
 {
   uint32_t encoding;

+   if (!type) {
+      blob_write_uint32(blob, 0);
+      return;
+   }
+
   switch (type->base_type) {
   case GLSL_TYPE_UINT:
   case GLSL_TYPE_INT:
@@ -122,11 +137,18 @@ encode_type_to_blob(struct blob *blob, const glsl_type *type)
      blob_write_uint32(blob, (type->base_type) << 24);
      blob_write_string(blob, type->name);
      blob_write_uint32(blob, type->length);
-      blob_write_bytes(blob, type->fields.structure,
-                       sizeof(glsl_struct_field) * type->length);
+
+      size_t s_field_size, s_field_ptrs;
+      get_struct_type_field_and_pointer_sizes(&s_field_size, &s_field_ptrs);
+
      for (unsigned i = 0; i < type->length; i++) {
         encode_type_to_blob(blob, type->fields.structure[i].type);
         blob_write_string(blob, type->fields.structure[i].name);
+
+         /* Write the struct field skipping the pointers */
+         blob_write_bytes(blob,
+                          ((char *)&type->fields.structure[i]) + s_field_ptrs,
+                          s_field_size - s_field_ptrs);
      }

      if (type->is_interface()) {
@@ -149,6 +171,11 @@ static const glsl_type *
 decode_type_from_blob(struct blob_reader *blob)
 {
   uint32_t u = blob_read_uint32(blob);
+
+   if (u == 0) {
+      return NULL;
+   }
+
   glsl_base_type base_type = (glsl_base_type) (u >> 24);

   switch (base_type) {
@@ -182,22 +209,33 @@ decode_type_from_blob(struct blob_reader *blob)
   case GLSL_TYPE_INTERFACE: {
      char *name = blob_read_string(blob);
      unsigned num_fields = blob_read_uint32(blob);
-      glsl_struct_field *fields = (glsl_struct_field *)
-         blob_read_bytes(blob, sizeof(glsl_struct_field) * num_fields);
+
+      size_t s_field_size, s_field_ptrs;
+      get_struct_type_field_and_pointer_sizes(&s_field_size, &s_field_ptrs);
+
+      glsl_struct_field *fields =
+         (glsl_struct_field *) malloc(s_field_size * num_fields);
      for (unsigned i = 0; i < num_fields; i++) {
         fields[i].type = decode_type_from_blob(blob);
         fields[i].name = blob_read_string(blob);
+
+         blob_copy_bytes(blob, ((uint8_t *) &fields[i]) + s_field_ptrs,
+                         s_field_size - s_field_ptrs);
      }

+      const glsl_type *t;
      if (base_type == GLSL_TYPE_INTERFACE) {
         enum glsl_interface_packing packing =
            (glsl_interface_packing) blob_read_uint32(blob);
         bool row_major = blob_read_uint32(blob);
-         return glsl_type::get_interface_instance(fields, num_fields,
-                                                  packing, row_major, name);
+         t = glsl_type::get_interface_instance(fields, num_fields, packing,
+                                               row_major, name);
      } else {
-         return glsl_type::get_record_instance(fields, num_fields, name);
+         t = glsl_type::get_record_instance(fields, num_fields, name);
      }
+
+      free(fields);
+      return t;
   }
   case GLSL_TYPE_VOID:
   case GLSL_TYPE_ERROR:
@@ -555,6 +593,17 @@ read_xfb(struct blob_reader *metadata, struct gl_shader_program *shProg)
                      MAX_FEEDBACK_BUFFERS);
 }

+static bool
+has_uniform_storage(struct gl_shader_program *prog, unsigned idx)
+{
+   if (!prog->data->UniformStorage[idx].builtin &&
+       !prog->data->UniformStorage[idx].is_shader_storage &&
+       prog->data->UniformStorage[idx].block_index == -1)
+      return true;
+
+   return false;
+}
+
 static void
 write_uniforms(struct blob *metadata, struct gl_shader_program *prog)
 {
@@ -566,8 +615,6 @@ write_uniforms(struct blob *metadata, struct gl_shader_program *prog)
      encode_type_to_blob(metadata, prog->data->UniformStorage[i].type);
      blob_write_uint32(metadata, prog->data->UniformStorage[i].array_elements);
      blob_write_string(metadata, prog->data->UniformStorage[i].name);
-      blob_write_uint32(metadata, prog->data->UniformStorage[i].storage -
-                                  prog->data->UniformDataSlots);
      blob_write_uint32(metadata, prog->data->UniformStorage[i].builtin);
      blob_write_uint32(metadata, prog->data->UniformStorage[i].remap_location);
      blob_write_uint32(metadata, prog->data->UniformStorage[i].block_index);
@@ -586,6 +633,12 @@ write_uniforms(struct blob *metadata, struct gl_shader_program *prog)
                        prog->data->UniformStorage[i].top_level_array_size);
      blob_write_uint32(metadata,
                        prog->data->UniformStorage[i].top_level_array_stride);
+
+     if (has_uniform_storage(prog, i)) {
+         blob_write_uint32(metadata, prog->data->UniformStorage[i].storage -
+                                     prog->data->UniformDataSlots);
+      }
+
      blob_write_bytes(metadata, prog->data->UniformStorage[i].opaque,
                       sizeof(prog->data->UniformStorage[i].opaque));
   }
@@ -597,9 +650,7 @@ write_uniforms(struct blob *metadata, struct gl_shader_program *prog)
    */
   blob_write_uint32(metadata, prog->data->NumHiddenUniforms);
   for (unsigned i = 0; i < prog->data->NumUniformStorage; i++) {
-      if (!prog->data->UniformStorage[i].builtin &&
-          !prog->data->UniformStorage[i].is_shader_storage &&
-          prog->data->UniformStorage[i].block_index == -1) {
+      if (has_uniform_storage(prog, i)) {
         unsigned vec_size =
            prog->data->UniformStorage[i].type->component_slots() *
            MAX2(prog->data->UniformStorage[i].array_elements, 1);
@@ -633,7 +684,6 @@ read_uniforms(struct blob_reader *metadata, struct gl_shader_program *prog)
      uniforms[i].type = decode_type_from_blob(metadata);
      uniforms[i].array_elements = blob_read_uint32(metadata);
      uniforms[i].name = ralloc_strdup(prog, blob_read_string (metadata));
-      uniforms[i].storage = data + blob_read_uint32(metadata);
      uniforms[i].builtin = blob_read_uint32(metadata);
      uniforms[i].remap_location = blob_read_uint32(metadata);
      uniforms[i].block_index = blob_read_uint32(metadata);
@@ -651,6 +701,10 @@ read_uniforms(struct blob_reader *metadata, struct gl_shader_program *prog)
      uniforms[i].top_level_array_stride = blob_read_uint32(metadata);
      prog->UniformHash->put(i, uniforms[i].name);

+      if (has_uniform_storage(prog, i)) {
+         uniforms[i].storage = data + blob_read_uint32(metadata);
+      }
+
      memcpy(uniforms[i].opaque,
             blob_read_bytes(metadata, sizeof(uniforms[i].opaque)),
             sizeof(uniforms[i].opaque));
@@ -659,9 +713,7 @@ read_uniforms(struct blob_reader *metadata, struct gl_shader_program *prog)
   /* Restore uniform values. */
   prog->data->NumHiddenUniforms = blob_read_uint32(metadata);
   for (unsigned i = 0; i < prog->data->NumUniformStorage; i++) {
-      if (!prog->data->UniformStorage[i].builtin &&
-          !prog->data->UniformStorage[i].is_shader_storage &&
-          prog->data->UniformStorage[i].block_index == -1) {
+      if (has_uniform_storage(prog, i)) {
         unsigned vec_size =
            prog->data->UniformStorage[i].type->component_slots() *
            MAX2(prog->data->UniformStorage[i].array_elements, 1);
@@ -867,6 +919,18 @@ write_shader_subroutine_index(struct blob *metadata,
   }
 }

+static void
+get_shader_var_and_pointer_sizes(size_t *s_var_size, size_t *s_var_ptrs,
+                                 const gl_shader_variable *var)
+{
+   *s_var_size = sizeof(gl_shader_variable);
+   *s_var_ptrs =
+      sizeof(var->type) +
+      sizeof(var->interface_type) +
+      sizeof(var->outermost_struct_type) +
+      sizeof(var->name);
+}
+
 static void
 write_program_resource_data(struct blob *metadata,
                            struct gl_shader_program *prog,
@@ -878,16 +942,19 @@ write_program_resource_data(struct blob *metadata,
   case GL_PROGRAM_INPUT:
   case GL_PROGRAM_OUTPUT: {
      const gl_shader_variable *var = (gl_shader_variable *)res->Data;
-      blob_write_bytes(metadata, var, sizeof(gl_shader_variable));
+
      encode_type_to_blob(metadata, var->type);
-
-      if (var->interface_type)
-         encode_type_to_blob(metadata, var->interface_type);
-
-      if (var->outermost_struct_type)
-         encode_type_to_blob(metadata, var->outermost_struct_type);
+      encode_type_to_blob(metadata, var->interface_type);
+      encode_type_to_blob(metadata, var->outermost_struct_type);

      blob_write_string(metadata, var->name);
+
+      size_t s_var_size, s_var_ptrs;
+      get_shader_var_and_pointer_sizes(&s_var_size, &s_var_ptrs, var);
+
+      /* Write gl_shader_variable skipping over the pointers */
+      blob_write_bytes(metadata, ((char *)var) + s_var_ptrs,
+                       s_var_size - s_var_ptrs);
      break;
   }
   case GL_UNIFORM_BLOCK:
@@ -978,17 +1045,18 @@ read_program_resource_data(struct blob_reader *metadata,
   case GL_PROGRAM_OUTPUT: {
      gl_shader_variable *var = ralloc(prog, struct gl_shader_variable);

-      blob_copy_bytes(metadata, (uint8_t *) var, sizeof(gl_shader_variable));
      var->type = decode_type_from_blob(metadata);
-
-      if (var->interface_type)
-         var->interface_type = decode_type_from_blob(metadata);
-
-      if (var->outermost_struct_type)
-         var->outermost_struct_type = decode_type_from_blob(metadata);
+      var->interface_type = decode_type_from_blob(metadata);
+      var->outermost_struct_type = decode_type_from_blob(metadata);

      var->name = ralloc_strdup(prog, blob_read_string(metadata));

+      size_t s_var_size, s_var_ptrs;
+      get_shader_var_and_pointer_sizes(&s_var_size, &s_var_ptrs, var);
+
+      blob_copy_bytes(metadata, ((uint8_t *) var) + s_var_ptrs,
+                      s_var_size - s_var_ptrs);
+
      res->Data = var;
      break;
   }
@@ -1148,18 +1216,20 @@ write_shader_metadata(struct blob *metadata, gl_linked_shader *shader)
   blob_write_bytes(metadata, glprog->sh.ImageUnits,
                    sizeof(glprog->sh.ImageUnits));

+   size_t ptr_size = sizeof(GLvoid *);
+
   blob_write_uint32(metadata, glprog->sh.NumBindlessSamplers);
   blob_write_uint32(metadata, glprog->sh.HasBoundBindlessSampler);
   for (i = 0; i < glprog->sh.NumBindlessSamplers; i++) {
      blob_write_bytes(metadata, &glprog->sh.BindlessSamplers[i],
-                       sizeof(struct gl_bindless_sampler));
+                       sizeof(struct gl_bindless_sampler) - ptr_size);
   }

   blob_write_uint32(metadata, glprog->sh.NumBindlessImages);
   blob_write_uint32(metadata, glprog->sh.HasBoundBindlessImage);
   for (i = 0; i < glprog->sh.NumBindlessImages; i++) {
      blob_write_bytes(metadata, &glprog->sh.BindlessImages[i],
-                       sizeof(struct gl_bindless_image));
+                       sizeof(struct gl_bindless_image) - ptr_size);
   }

   write_shader_parameters(metadata, glprog->Parameters);
@@ -1187,6 +1257,8 @@ read_shader_metadata(struct blob_reader *metadata,
   blob_copy_bytes(metadata, (uint8_t *) glprog->sh.ImageUnits,
                   sizeof(glprog->sh.ImageUnits));

+   size_t ptr_size = sizeof(GLvoid *);
+
   glprog->sh.NumBindlessSamplers = blob_read_uint32(metadata);
   glprog->sh.HasBoundBindlessSampler = blob_read_uint32(metadata);
   if (glprog->sh.NumBindlessSamplers > 0) {
@@ -1196,7 +1268,7 @@ read_shader_metadata(struct blob_reader *metadata,

      for (i = 0; i < glprog->sh.NumBindlessSamplers; i++) {
         blob_copy_bytes(metadata, (uint8_t *) &glprog->sh.BindlessSamplers[i],
-                         sizeof(struct gl_bindless_sampler));
+                         sizeof(struct gl_bindless_sampler) - ptr_size);
      }
   }

@@ -1209,7 +1281,7 @@ read_shader_metadata(struct blob_reader *metadata,

      for (i = 0; i < glprog->sh.NumBindlessImages; i++) {
         blob_copy_bytes(metadata, (uint8_t *) &glprog->sh.BindlessImages[i],
-                        sizeof(struct gl_bindless_image));
+                        sizeof(struct gl_bindless_image) - ptr_size);
      }
   }

@@ -1224,6 +1296,14 @@ create_binding_str(const char *key, unsigned value, void *closure)
   ralloc_asprintf_append(bindings_str, "%s:%u,", key, value);
 }

+static void
+get_shader_info_and_pointer_sizes(size_t *s_info_size, size_t *s_info_ptrs,
+                                  shader_info *info)
+{
+   *s_info_size = sizeof(shader_info);
+   *s_info_ptrs = sizeof(info->name) + sizeof(info->label);
+}
+
 static void
 create_linked_shader_and_program(struct gl_context *ctx,
                                 gl_shader_stage stage,
@@ -1242,12 +1322,16 @@ create_linked_shader_and_program(struct gl_context *ctx,

   read_shader_metadata(metadata, glprog, linked);

+   glprog->info.name = ralloc_strdup(glprog, blob_read_string(metadata));
+   glprog->info.label = ralloc_strdup(glprog, blob_read_string(metadata));
+
+   size_t s_info_size, s_info_ptrs;
+   get_shader_info_and_pointer_sizes(&s_info_size, &s_info_ptrs,
+                                     &glprog->info);
+
   /* Restore shader info */
-   blob_copy_bytes(metadata, (uint8_t *) &glprog->info, sizeof(shader_info));
-   if (glprog->info.name)
-      glprog->info.name = ralloc_strdup(glprog, blob_read_string(metadata));
-   if (glprog->info.label)
-      glprog->info.label = ralloc_strdup(glprog, blob_read_string(metadata));
+   blob_copy_bytes(metadata, ((uint8_t *) &glprog->info) + s_info_ptrs,
+                   s_info_size - s_info_ptrs);

   _mesa_reference_shader_program_data(ctx, &glprog->sh.data, prog->data);
   _mesa_reference_program(ctx, &linked->Program, glprog);
@@ -1286,14 +1370,24 @@ shader_cache_write_program_metadata(struct gl_context *ctx,
      if (sh) {
         write_shader_metadata(metadata, sh);

-         /* Store nir shader info */
-         blob_write_bytes(metadata, &sh->Program->info, sizeof(shader_info));
-
         if (sh->Program->info.name)
            blob_write_string(metadata, sh->Program->info.name);
+         else
+            blob_write_string(metadata, "");

         if (sh->Program->info.label)
            blob_write_string(metadata, sh->Program->info.label);
+         else
+            blob_write_string(metadata, "");
+
+         size_t s_info_size, s_info_ptrs;
+         get_shader_info_and_pointer_sizes(&s_info_size, &s_info_ptrs,
+                                           &sh->Program->info);
+
+         /* Store shader info */
+         blob_write_bytes(metadata,
+                          ((char *) &sh->Program->info) + s_info_ptrs,
+                          s_info_size - s_info_ptrs);
      }
   }

--- a/src/compiler/shader_info.h
+++ b/src/compiler/shader_info.h
@@ -32,14 +32,14 @@ extern "C" {
 #endif

 typedef struct shader_info {
-   /** The shader stage, such as MESA_SHADER_VERTEX. */
-   gl_shader_stage stage;
-
   const char *name;

   /* Descriptive name provided by the client; may be NULL */
   const char *label;

+   /** The shader stage, such as MESA_SHADER_VERTEX. */
+   gl_shader_stage stage;
+
   /* Number of textures used by this shader */
   unsigned num_textures;
   /* Number of uniform buffers used by this shader */
--- a/src/compiler/spirv/vtn_variables.c
+++ b/src/compiler/spirv/vtn_variables.c
@@ -1121,6 +1121,10 @@ vtn_get_builtin_location(struct vtn_builder *b,
      *location = FRAG_RESULT_DEPTH;
      assert(*mode == nir_var_shader_out);
      break;
+   case SpvBuiltInHelperInvocation:
+      *location = SYSTEM_VALUE_HELPER_INVOCATION;
+      set_mode_system_value(mode);
+      break;
   case SpvBuiltInNumWorkgroups:
      *location = SYSTEM_VALUE_NUM_WORK_GROUPS;
      set_mode_system_value(mode);
@@ -1161,7 +1165,6 @@ vtn_get_builtin_location(struct vtn_builder *b,
      *location = SYSTEM_VALUE_VIEW_INDEX;
      set_mode_system_value(mode);
      break;
-   case SpvBuiltInHelperInvocation:
   default:
      unreachable("unsupported builtin");
   }
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -629,6 +629,18 @@ dri2_setup_screen(_EGLDisplay *disp)
   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
   unsigned int api_mask;

+   /*
+    * EGL 1.5 specification defines the default value to 1. Moreover,
+    * eglSwapInterval() is required to clamp requested value to the supported
+    * range. Since the default value is implicitly assumed to be supported,
+    * use it as both minimum and maximum for the platforms that do not allow
+    * changing the interval. Platforms, which allow it (e.g. x11, wayland)
+    * override these values already.
+    */
+   dri2_dpy->min_swap_interval = 1;
+   dri2_dpy->max_swap_interval = 1;
+   dri2_dpy->default_swap_interval = 1;
+
   if (dri2_dpy->image_driver) {
      api_mask = dri2_dpy->image_driver->getAPIMask(dri2_dpy->dri_screen);
   } else if (dri2_dpy->dri2) {
@@ -945,9 +957,12 @@ dri2_display_destroy(_EGLDisplay *disp)
          zwp_linux_dmabuf_v1_destroy(dri2_dpy->wl_dmabuf);
      if (dri2_dpy->wl_shm)
          wl_shm_destroy(dri2_dpy->wl_shm);
-      wl_registry_destroy(dri2_dpy->wl_registry);
-      wl_event_queue_destroy(dri2_dpy->wl_queue);
-      wl_proxy_wrapper_destroy(dri2_dpy->wl_dpy_wrapper);
+      if (dri2_dpy->wl_registry)
+         wl_registry_destroy(dri2_dpy->wl_registry);
+      if (dri2_dpy->wl_queue)
+         wl_event_queue_destroy(dri2_dpy->wl_queue);
+      if (dri2_dpy->wl_dpy_wrapper)
+         wl_proxy_wrapper_destroy(dri2_dpy->wl_dpy_wrapper);
      u_vector_finish(&dri2_dpy->wl_modifiers.argb8888);
      u_vector_finish(&dri2_dpy->wl_modifiers.xrgb8888);
      u_vector_finish(&dri2_dpy->wl_modifiers.rgb565);
--- a/src/egl/drivers/dri2/egl_dri2_fallbacks.h
+++ b/src/egl/drivers/dri2/egl_dri2_fallbacks.h
@@ -59,7 +59,14 @@ static inline EGLBoolean
 dri2_fallback_swap_interval(_EGLDriver *drv, _EGLDisplay *dpy,
                            _EGLSurface *surf, EGLint interval)
 {
-   return EGL_FALSE;
+   if (interval > surf->Config->MaxSwapInterval)
+      interval = surf->Config->MaxSwapInterval;
+   else if (interval < surf->Config->MinSwapInterval)
+      interval = surf->Config->MinSwapInterval;
+
+   surf->SwapInterval = interval;
+
+   return EGL_TRUE;
 }

 static inline EGLBoolean
--- a/src/egl/drivers/dri2/platform_x11.c
+++ b/src/egl/drivers/dri2/platform_x11.c
@@ -1278,6 +1278,7 @@ dri2_x11_setup_swap_interval(struct dri2_egl_display *dri2_dpy)
    */
   dri2_dpy->min_swap_interval = 0;
   dri2_dpy->max_swap_interval = 0;
+   dri2_dpy->default_swap_interval = 0;

   if (!dri2_dpy->swap_available)
      return;
@@ -1319,6 +1320,7 @@ static const __DRIextension *dri3_image_loader_extensions[] = {
   &dri3_image_loader_extension.base,
   &image_lookup_extension.base,
   &use_invalidate.base,
+   &background_callable_extension.base,
   NULL,
 };

--- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
@@ -650,7 +650,13 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
         for (i = 0; i < format_desc->nr_channels; i++) {
            struct util_format_channel_description chan_desc = format_desc->channel[i];
            unsigned blockbits = type.width;
-            unsigned vec_nr = chan_desc.shift / type.width;
+            unsigned vec_nr;
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+            vec_nr = (format_desc->block.bits - (chan_desc.shift + chan_desc.size)) / type.width;
+#else
+            vec_nr = chan_desc.shift / type.width;
+#endif
            chan_desc.shift %= type.width;

            output[i] = lp_build_extract_soa_chan(&bld,
--- a/src/gallium/auxiliary/gallivm/lp_bld_gather.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_gather.c
@@ -234,13 +234,39 @@ lp_build_gather_elem_vec(struct gallivm_state *gallivm,
          */
         res = LLVMBuildZExt(gallivm->builder, res, dst_elem_type, "");

-         if (vector_justify) {
 #ifdef PIPE_ARCH_BIG_ENDIAN
+         if (vector_justify) {
         res = LLVMBuildShl(gallivm->builder, res,
                            LLVMConstInt(dst_elem_type,
                                         dst_type.width - src_width, 0), "");
-#endif
         }
+         if (src_width == 48) {
+            /* Load 3x16 bit vector.
+             * The sequence of loads on big-endian hardware proceeds as follows.
+             * 16-bit fields are denoted by X, Y, Z, and 0.  In memory, the sequence
+             * of three fields appears in the order X, Y, Z.
+             *
+             * Load 32-bit word: 0.0.X.Y
+             * Load 16-bit halfword: 0.0.0.Z
+             * Rotate left: 0.X.Y.0
+             * Bitwise OR: 0.X.Y.Z
+             *
+             * The order in which we need the fields in the result is 0.Z.Y.X,
+             * the same as on little-endian; permute 16-bit fields accordingly
+             * within 64-bit register:
+             */
+            LLVMValueRef shuffles[4] = {
+               lp_build_const_int32(gallivm, 2),
+               lp_build_const_int32(gallivm, 1),
+               lp_build_const_int32(gallivm, 0),
+               lp_build_const_int32(gallivm, 3),
+            };
+            res = LLVMBuildBitCast(gallivm->builder, res,
+                                   lp_build_vec_type(gallivm, lp_type_uint_vec(16, 4*16)), "");
+            res = LLVMBuildShuffleVector(gallivm->builder, res, res, LLVMConstVector(shuffles, 4), "");
+            res = LLVMBuildBitCast(gallivm->builder, res, dst_elem_type, "");
+         }
+#endif
      }
   }
   return res;
--- a/src/gallium/drivers/etnaviv/etnaviv_clear_blit.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_clear_blit.c
@@ -106,7 +106,7 @@ pack_rgba(enum pipe_format format, const float *rgba)
   union util_color uc;
   util_pack_color(rgba, format, &uc);
   if (util_format_get_blocksize(format) == 2)
-      return uc.ui[0] << 16 | uc.ui[0];
+      return uc.ui[0] << 16 | (uc.ui[0] & 0xffff);
   else
      return uc.ui[0];
 }
--- a/src/gallium/drivers/freedreno/freedreno_draw.c
+++ b/src/gallium/drivers/freedreno/freedreno_draw.c
@@ -428,13 +428,7 @@ fd_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
 	struct fd_batch *batch, *save_batch = NULL;
 	unsigned i;

-	/* TODO maybe we don't want to allocate and flush a batch each time?
-	 * We could use a special bogus (ie. won't match any fb state) key
-	 * in the batch-case for compute shaders, and rely on the rest of
-	 * the dependency tracking mechanism to tell us when the compute
-	 * batch needs to be flushed?
-	 */
-	batch = fd_bc_alloc_batch(&ctx->screen->batch_cache, ctx);
+	batch = fd_batch_create(ctx);
 	fd_batch_reference(&save_batch, ctx->batch);
 	fd_batch_reference(&ctx->batch, batch);

--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -227,6 +227,12 @@ llvmpipe_create_context(struct pipe_screen *screen, void *priv,

   lp_reset_counters();

+   /* If llvmpipe_set_scissor_states() is never called, we still need to
+    * make sure that derived scissor state is computed.
+    * See https://bugs.freedesktop.org/show_bug.cgi?id=101709
+    */
+   llvmpipe->dirty |= LP_NEW_SCISSOR;
+
   return &llvmpipe->pipe;

 fail:
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
@@ -346,6 +346,7 @@ nvc0_hw_get_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
   case PIPE_QUERY_PIPELINE_STATISTICS:
      for (i = 0; i < 10; ++i)
         res64[i] = data64[i * 2] - data64[24 + i * 2];
+      result->pipeline_statistics.cs_invocations = 0;
      break;
   case NVC0_HW_QUERY_TFB_BUFFER_OFFSET:
      res32[0] = hq->data[1];
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -557,6 +557,7 @@ struct r600_common_context {
 	unsigned			gpu_reset_counter;
 	unsigned			last_dirty_tex_counter;
 	unsigned			last_compressed_colortex_counter;
+	unsigned			last_num_draw_calls;

 	struct threaded_context		*tc;
 	struct u_suballocator		*allocator_zeroed_memory;
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -781,6 +781,11 @@ static void si_launch_grid(
 	    program->shader.compilation_failed)
 		return;

+	if (sctx->b.last_num_draw_calls != sctx->b.num_draw_calls) {
+		si_update_fb_dirtiness_after_rendering(sctx);
+		sctx->b.last_num_draw_calls = sctx->b.num_draw_calls;
+	}
+
 	si_decompress_compute_textures(sctx);

 	/* Add buffer sizes for memory checking in need_cs_space. */
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -59,7 +59,8 @@
 #define SI_CONTEXT_WRITEBACK_GLOBAL_L2	(R600_CONTEXT_PRIVATE_FLAG << 4)
 /* gaps */
 /* Framebuffer caches. */
-#define SI_CONTEXT_FLUSH_AND_INV_DB	(R600_CONTEXT_PRIVATE_FLAG << 7)
+#define SI_CONTEXT_FLUSH_AND_INV_DB	(R600_CONTEXT_PRIVATE_FLAG << 6)
+#define SI_CONTEXT_FLUSH_AND_INV_DB_META (R600_CONTEXT_PRIVATE_FLAG << 7)
 #define SI_CONTEXT_FLUSH_AND_INV_CB	(R600_CONTEXT_PRIVATE_FLAG << 8)
 /* Engine synchronization. */
 #define SI_CONTEXT_VS_PARTIAL_FLUSH	(R600_CONTEXT_PRIVATE_FLAG << 9)
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -83,10 +83,10 @@ static void si_build_ps_prolog_function(struct si_shader_context *ctx,
 static void si_build_ps_epilog_function(struct si_shader_context *ctx,
 					union si_shader_part_key *key);

-/* Ideally pass the sample mask input to the PS epilog as v13, which
+/* Ideally pass the sample mask input to the PS epilog as v14, which
 * is its usual location, so that the shader doesn't have to add v_mov.
 */
-#define PS_EPILOG_SAMPLEMASK_MIN_LOC 13
+#define PS_EPILOG_SAMPLEMASK_MIN_LOC 14

 enum {
 	CONST_ADDR_SPACE = 2,
@@ -174,20 +174,6 @@ unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index)
 	}
 }

-/**
- * Helper function that builds an LLVM IR PHI node and immediately adds
- * incoming edges.
- */
-static LLVMValueRef
-build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type,
-	  unsigned count_incoming, LLVMValueRef *values,
-	  LLVMBasicBlockRef *blocks)
-{
-	LLVMValueRef phi = LLVMBuildPhi(ctx->builder, type, "");
-	LLVMAddIncoming(phi, values, blocks, count_incoming);
-	return phi;
-}
-
 /**
 * Get the value of a shader input parameter and extract a bitfield.
 */
@@ -2732,15 +2718,15 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)

 		values[0] = rel_patch_id;
 		values[1] = LLVMGetUndef(ctx->i32);
-		rel_patch_id = build_phi(&ctx->ac, ctx->i32, 2, values, blocks);
+		rel_patch_id = ac_build_phi(&ctx->ac, ctx->i32, 2, values, blocks);

 		values[0] = tf_lds_offset;
 		values[1] = LLVMGetUndef(ctx->i32);
-		tf_lds_offset = build_phi(&ctx->ac, ctx->i32, 2, values, blocks);
+		tf_lds_offset = ac_build_phi(&ctx->ac, ctx->i32, 2, values, blocks);

 		values[0] = invocation_id;
 		values[1] = ctx->i32_1; /* cause the epilog to skip threads */
-		invocation_id = build_phi(&ctx->ac, ctx->i32, 2, values, blocks);
+		invocation_id = ac_build_phi(&ctx->ac, ctx->i32, 2, values, blocks);
 	}

 	/* Return epilog parameters from this function. */
@@ -4420,6 +4406,7 @@ static void create_function(struct si_shader_context *ctx)
 		params[SI_PARAM_FRONT_FACE] = ctx->i32;
 		shader->info.face_vgpr_index = 20;
 		params[SI_PARAM_ANCILLARY] = ctx->i32;
+		shader->info.ancillary_vgpr_index = 21;
 		params[SI_PARAM_SAMPLE_COVERAGE] = ctx->f32;
 		params[SI_PARAM_POS_FIXED_PT] = ctx->i32;
 		num_params = SI_PARAM_POS_FIXED_PT+1;
@@ -4495,6 +4482,7 @@ static void create_function(struct si_shader_context *ctx)
 				      S_0286D0_LINEAR_CENTER_ENA(1) |
 				      S_0286D0_LINEAR_CENTROID_ENA(1) |
 				      S_0286D0_FRONT_FACE_ENA(1) |
+				      S_0286D0_ANCILLARY_ENA(1) |
 				      S_0286D0_POS_FIXED_PT_ENA(1));
 	}

@@ -5649,6 +5637,7 @@ static void si_get_ps_prolog_key(struct si_shader *shader,
 		 key->ps_prolog.states.force_linear_center_interp ||
 		 key->ps_prolog.states.bc_optimize_for_persp ||
 		 key->ps_prolog.states.bc_optimize_for_linear);
+	key->ps_prolog.ancillary_vgpr_index = shader->info.ancillary_vgpr_index;

 	if (info->colors_read) {
 		unsigned *color = shader->selector->color_attr_index;
@@ -5758,7 +5747,8 @@ static bool si_need_ps_prolog(const union si_shader_part_key *key)
 	       key->ps_prolog.states.force_linear_center_interp ||
 	       key->ps_prolog.states.bc_optimize_for_persp ||
 	       key->ps_prolog.states.bc_optimize_for_linear ||
-	       key->ps_prolog.states.poly_stipple;
+	       key->ps_prolog.states.poly_stipple ||
+	       key->ps_prolog.states.samplemask_log_ps_iter;
 }

 /**
@@ -6405,6 +6395,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
 	if (ctx.type == PIPE_SHADER_FRAGMENT) {
 		shader->info.num_input_vgprs = 0;
 		shader->info.face_vgpr_index = -1;
+		shader->info.ancillary_vgpr_index = -1;

 		if (G_0286CC_PERSP_SAMPLE_ENA(shader->config.spi_ps_input_addr))
 			shader->info.num_input_vgprs += 2;
@@ -6434,8 +6425,10 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
 			shader->info.face_vgpr_index = shader->info.num_input_vgprs;
 			shader->info.num_input_vgprs += 1;
 		}
-		if (G_0286CC_ANCILLARY_ENA(shader->config.spi_ps_input_addr))
+		if (G_0286CC_ANCILLARY_ENA(shader->config.spi_ps_input_addr)) {
+			shader->info.ancillary_vgpr_index = shader->info.num_input_vgprs;
 			shader->info.num_input_vgprs += 1;
+		}
 		if (G_0286CC_SAMPLE_COVERAGE_ENA(shader->config.spi_ps_input_addr))
 			shader->info.num_input_vgprs += 1;
 		if (G_0286CC_POS_FIXED_PT_ENA(shader->config.spi_ps_input_addr))
@@ -7079,6 +7072,54 @@ static void si_build_ps_prolog_function(struct si_shader_context *ctx,
 		}
 	}

+	/* Section 15.2.2 (Shader Inputs) of the OpenGL 4.5 (Core Profile) spec
+	 * says:
+	 *
+	 *    "When per-sample shading is active due to the use of a fragment
+	 *     input qualified by sample or due to the use of the gl_SampleID
+	 *     or gl_SamplePosition variables, only the bit for the current
+	 *     sample is set in gl_SampleMaskIn. When state specifies multiple
+	 *     fragment shader invocations for a given fragment, the sample
+	 *     mask for any single fragment shader invocation may specify a
+	 *     subset of the covered samples for the fragment. In this case,
+	 *     the bit corresponding to each covered sample will be set in
+	 *     exactly one fragment shader invocation."
+	 *
+	 * The samplemask loaded by hardware is always the coverage of the
+	 * entire pixel/fragment, so mask bits out based on the sample ID.
+	 */
+	if (key->ps_prolog.states.samplemask_log_ps_iter) {
+		/* The bit pattern matches that used by fixed function fragment
+		 * processing. */
+		static const uint16_t ps_iter_masks[] = {
+			0xffff, /* not used */
+			0x5555,
+			0x1111,
+			0x0101,
+			0x0001,
+		};
+		assert(key->ps_prolog.states.samplemask_log_ps_iter < ARRAY_SIZE(ps_iter_masks));
+
+		uint32_t ps_iter_mask = ps_iter_masks[key->ps_prolog.states.samplemask_log_ps_iter];
+		unsigned ancillary_vgpr = key->ps_prolog.num_input_sgprs +
+					  key->ps_prolog.ancillary_vgpr_index;
+		LLVMValueRef sampleid = unpack_param(ctx, ancillary_vgpr, 8, 4);
+		LLVMValueRef samplemask = LLVMGetParam(func, ancillary_vgpr + 1);
+
+		samplemask = LLVMBuildBitCast(gallivm->builder, samplemask, ctx->i32, "");
+		samplemask = LLVMBuildAnd(
+			gallivm->builder,
+			samplemask,
+			LLVMBuildShl(gallivm->builder,
+				     LLVMConstInt(ctx->i32, ps_iter_mask, false),
+				     sampleid, ""),
+			"");
+		samplemask = LLVMBuildBitCast(gallivm->builder, samplemask, ctx->f32, "");
+
+		ret = LLVMBuildInsertValue(gallivm->builder, ret, samplemask,
+					   ancillary_vgpr + 1, "");
+	}
+
 	/* Tell LLVM to insert WQM instruction sequence when needed. */
 	if (key->ps_prolog.wqm) {
 		LLVMAddTargetDependentFunctionAttr(func,
@@ -7276,6 +7317,12 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
 		assert(G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_addr));
 	}

+	/* Samplemask fixup requires the sample ID. */
+	if (shader->key.part.ps.prolog.samplemask_log_ps_iter) {
+		shader->config.spi_ps_input_ena |= S_0286CC_ANCILLARY_ENA(1);
+		assert(G_0286CC_ANCILLARY_ENA(shader->config.spi_ps_input_addr));
+	}
+
 	/* The sample mask input is always enabled, because the API shader always
 	 * passes it through to the epilog. Disable it here if it's unused.
 	 */
@@ -7353,6 +7400,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
 		shader->info.num_input_sgprs = mainp->info.num_input_sgprs;
 		shader->info.num_input_vgprs = mainp->info.num_input_vgprs;
 		shader->info.face_vgpr_index = mainp->info.face_vgpr_index;
+		shader->info.ancillary_vgpr_index = mainp->info.ancillary_vgpr_index;
 		memcpy(shader->info.vs_output_param_offset,
 		       mainp->info.vs_output_param_offset,
 		       sizeof(mainp->info.vs_output_param_offset));
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -416,6 +416,7 @@ struct si_ps_prolog_bits {
 	unsigned	force_linear_center_interp:1;
 	unsigned	bc_optimize_for_persp:1;
 	unsigned	bc_optimize_for_linear:1;
+	unsigned	samplemask_log_ps_iter:3;
 };

 /* Common PS bits between the shader key and the epilog key. */
@@ -457,6 +458,7 @@ union si_shader_part_key {
 		unsigned	colors_read:8; /* color input components read */
 		unsigned	num_interp_inputs:5; /* BCOLOR is at this location */
 		unsigned	face_vgpr_index:5;
+		unsigned	ancillary_vgpr_index:5;
 		unsigned	wqm:1;
 		char		color_attr_index[2];
 		char		color_interp_vgpr_index[2]; /* -1 == constant */
@@ -549,7 +551,8 @@ struct si_shader_info {
 	ubyte			vs_output_param_offset[SI_MAX_VS_OUTPUTS];
 	ubyte			num_input_sgprs;
 	ubyte			num_input_vgprs;
-	char			face_vgpr_index;
+	signed char		face_vgpr_index;
+	signed char		ancillary_vgpr_index;
 	bool			uses_instanceid;
 	ubyte			nr_pos_exports;
 	ubyte			nr_param_exports;
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -1470,12 +1470,22 @@ static void tex_fetch_args(
 	if (target == TGSI_TEXTURE_CUBE ||
 	    target == TGSI_TEXTURE_CUBE_ARRAY ||
 	    target == TGSI_TEXTURE_SHADOWCUBE ||
-	    target == TGSI_TEXTURE_SHADOWCUBE_ARRAY)
+	    target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
 		ac_prepare_cube_coords(&ctx->ac,
 				       opcode == TGSI_OPCODE_TXD,
 				       target == TGSI_TEXTURE_CUBE_ARRAY ||
 				       target == TGSI_TEXTURE_SHADOWCUBE_ARRAY,
+				       opcode == TGSI_OPCODE_LODQ,
 				       coords, derivs);
+	} else if (tgsi_is_array_sampler(target) &&
+		   opcode != TGSI_OPCODE_TXF &&
+		   opcode != TGSI_OPCODE_TXF_LZ &&
+		   ctx->screen->b.chip_class <= VI) {
+		unsigned array_coord = target == TGSI_TEXTURE_1D_ARRAY ? 1 : 2;
+		coords[array_coord] =
+			ac_build_intrinsic(&ctx->ac, "llvm.rint.f32", ctx->f32,
+					   &coords[array_coord], 1, 0);
+	}

 	if (opcode == TGSI_OPCODE_TXD)
 		for (int i = 0; i < num_deriv_channels * 2; i++)
@@ -1677,12 +1687,21 @@ static void tex_fetch_args(
 *
 * The workaround is to subtract 0.5 from the unnormalized coordinates,
 * or (0.5 / size) from the normalized coordinates.
+ *
+ * However, cube textures with 8_8_8_8 data formats require a different
+ * workaround of overriding the num format to USCALED/SSCALED. This would lose
+ * precision in 32-bit data formats, so it needs to be applied dynamically at
+ * runtime. In this case, return an i1 value that indicates whether the
+ * descriptor was overridden (and hence a fixup of the sampler result is needed).
 */
-static void si_lower_gather4_integer(struct si_shader_context *ctx,
-				     struct ac_image_args *args,
-				     unsigned target)
+static LLVMValueRef
+si_lower_gather4_integer(struct si_shader_context *ctx,
+			 struct ac_image_args *args,
+			 unsigned target,
+			 enum tgsi_return_type return_type)
 {
 	LLVMBuilderRef builder = ctx->gallivm.builder;
+	LLVMValueRef wa_8888 = NULL;
 	LLVMValueRef coord = args->addr;
 	LLVMValueRef half_texel[2];
 	/* Texture coordinates start after:
@@ -1692,12 +1711,54 @@ static void si_lower_gather4_integer(struct si_shader_context *ctx,
 	unsigned coord_vgpr_index = (int)args->offset + (int)args->compare;
 	int c;

+	assert(return_type == TGSI_RETURN_TYPE_SINT ||
+	       return_type == TGSI_RETURN_TYPE_UINT);
+
+	if (target == TGSI_TEXTURE_CUBE ||
+	    target == TGSI_TEXTURE_CUBE_ARRAY) {
+		LLVMValueRef formats;
+		LLVMValueRef data_format;
+		LLVMValueRef wa_formats;
+
+		formats = LLVMBuildExtractElement(builder, args->resource, ctx->i32_1, "");
+
+		data_format = LLVMBuildLShr(builder, formats,
+					    LLVMConstInt(ctx->i32, 20, false), "");
+		data_format = LLVMBuildAnd(builder, data_format,
+					   LLVMConstInt(ctx->i32, (1u << 6) - 1, false), "");
+		wa_8888 = LLVMBuildICmp(
+			builder, LLVMIntEQ, data_format,
+			LLVMConstInt(ctx->i32, V_008F14_IMG_DATA_FORMAT_8_8_8_8, false),
+			"");
+
+		uint32_t wa_num_format =
+			return_type == TGSI_RETURN_TYPE_UINT ?
+			S_008F14_NUM_FORMAT_GFX6(V_008F14_IMG_NUM_FORMAT_USCALED) :
+			S_008F14_NUM_FORMAT_GFX6(V_008F14_IMG_NUM_FORMAT_SSCALED);
+		wa_formats = LLVMBuildAnd(builder, formats,
+					  LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT_GFX6, false),
+					  "");
+		wa_formats = LLVMBuildOr(builder, wa_formats,
+					LLVMConstInt(ctx->i32, wa_num_format, false), "");
+
+		formats = LLVMBuildSelect(builder, wa_8888, wa_formats, formats, "");
+		args->resource = LLVMBuildInsertElement(
+			builder, args->resource, formats, ctx->i32_1, "");
+	}
+
 	if (target == TGSI_TEXTURE_RECT ||
 	    target == TGSI_TEXTURE_SHADOWRECT) {
+		assert(!wa_8888);
 		half_texel[0] = half_texel[1] = LLVMConstReal(ctx->f32, -0.5);
 	} else {
 		struct tgsi_full_instruction txq_inst = {};
 		struct lp_build_emit_data txq_emit_data = {};
+		struct lp_build_if_state if_ctx;
+
+		if (wa_8888) {
+			/* Skip the texture size query entirely if we don't need it. */
+			lp_build_if(&if_ctx, &ctx->gallivm, LLVMBuildNot(builder, wa_8888, ""));
+		}

 		/* Query the texture size. */
 		txq_inst.Texture.Texture = target;
@@ -1720,6 +1781,18 @@ static void si_lower_gather4_integer(struct si_shader_context *ctx,
 			half_texel[c] = LLVMBuildFMul(builder, half_texel[c],
 						      LLVMConstReal(ctx->f32, -0.5), "");
 		}
+
+		if (wa_8888) {
+			lp_build_endif(&if_ctx);
+
+			LLVMBasicBlockRef bb[2] = { if_ctx.true_block, if_ctx.entry_block };
+
+			for (c = 0; c < 2; c++) {
+				LLVMValueRef values[2] = { half_texel[c], ctx->ac.f32_0 };
+				half_texel[c] = ac_build_phi(&ctx->ac, ctx->f32, 2,
+							     values, bb);
+			}
+		}
 	}

 	for (c = 0; c < 2; c++) {
@@ -1734,6 +1807,42 @@ static void si_lower_gather4_integer(struct si_shader_context *ctx,
 	}

 	args->addr = coord;
+
+	return wa_8888;
+}
+
+/* The second half of the cube texture 8_8_8_8 integer workaround: adjust the
+ * result after the gather operation.
+ */
+static LLVMValueRef
+si_fix_gather4_integer_result(struct si_shader_context *ctx,
+			   LLVMValueRef result,
+			   enum tgsi_return_type return_type,
+			   LLVMValueRef wa)
+{
+	LLVMBuilderRef builder = ctx->gallivm.builder;
+
+	assert(return_type == TGSI_RETURN_TYPE_SINT ||
+	       return_type == TGSI_RETURN_TYPE_UINT);
+
+	for (unsigned chan = 0; chan < 4; ++chan) {
+		LLVMValueRef chanv = LLVMConstInt(ctx->i32, chan, false);
+		LLVMValueRef value;
+		LLVMValueRef wa_value;
+
+		value = LLVMBuildExtractElement(builder, result, chanv, "");
+
+		if (return_type == TGSI_RETURN_TYPE_UINT)
+			wa_value = LLVMBuildFPToUI(builder, value, ctx->i32, "");
+		else
+			wa_value = LLVMBuildFPToSI(builder, value, ctx->i32, "");
+		wa_value = LLVMBuildBitCast(builder, wa_value, ctx->f32, "");
+		value = LLVMBuildSelect(builder, wa, wa_value, value, "");
+
+		result = LLVMBuildInsertElement(builder, result, value, chanv, "");
+	}
+
+	return result;
 }

 static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
@@ -1808,17 +1917,30 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
 	}

 	/* The hardware needs special lowering for Gather4 with integer formats. */
+	LLVMValueRef gather4_int_result_workaround = NULL;
+
 	if (ctx->screen->b.chip_class <= VI &&
 	    opcode == TGSI_OPCODE_TG4) {
 		assert(inst->Texture.ReturnType != TGSI_RETURN_TYPE_UNKNOWN);

 		if (inst->Texture.ReturnType == TGSI_RETURN_TYPE_SINT ||
-		    inst->Texture.ReturnType == TGSI_RETURN_TYPE_UINT)
-			si_lower_gather4_integer(ctx, &args, target);
+		    inst->Texture.ReturnType == TGSI_RETURN_TYPE_UINT) {
+			gather4_int_result_workaround =
+				si_lower_gather4_integer(ctx, &args, target,
+							 inst->Texture.ReturnType);
+		}
 	}

-	emit_data->output[emit_data->chan] =
+	LLVMValueRef result =
 		ac_build_image_opcode(&ctx->ac, &args);
+
+	if (gather4_int_result_workaround) {
+		result = si_fix_gather4_integer_result(ctx, result,
+						       inst->Texture.ReturnType,
+						       gather4_int_result_workaround);
+	}
+
+	emit_data->output[emit_data->chan] = result;
 }

 static void si_llvm_emit_txqs(
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
@@ -1193,7 +1193,7 @@ void si_llvm_context_init(struct si_shader_context *ctx,
 	ctx->gallivm.builder = lp_create_builder(ctx->gallivm.context,
 						 float_mode);

-	ac_llvm_context_init(&ctx->ac, ctx->gallivm.context);
+	ac_llvm_context_init(&ctx->ac, ctx->gallivm.context, sscreen->b.chip_class);
 	ctx->ac.module = ctx->gallivm.module;
 	ctx->ac.builder = ctx->gallivm.builder;

--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2579,6 +2579,14 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 		sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
 				 SI_CONTEXT_INV_GLOBAL_L2 |
 				 SI_CONTEXT_FLUSH_AND_INV_DB;
+	} else if (sctx->b.chip_class == GFX9) {
+		/* It appears that DB metadata "leaks" in a sequence of:
+		 *  - depth clear
+		 *  - DCC decompress for shader image writes (with DB disabled)
+		 *  - render with DEPTH_BEFORE_SHADER=1
+		 * Flushing DB metadata works around the problem.
+		 */
+		sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB_META;
 	}

 	/* Take the maximum of the old and new count. If the new count is lower,
@@ -3701,7 +3709,7 @@ static void *si_create_sampler_state(struct pipe_context *ctx,
 			  S_008F38_XY_MAG_FILTER(eg_tex_filter(state->mag_img_filter, max_aniso)) |
 			  S_008F38_XY_MIN_FILTER(eg_tex_filter(state->min_img_filter, max_aniso)) |
 			  S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)) |
-			  S_008F38_MIP_POINT_PRECLAMP(1) |
+			  S_008F38_MIP_POINT_PRECLAMP(0) |
 			  S_008F38_DISABLE_LSB_CEIL(sctx->b.chip_class <= VI) |
 			  S_008F38_FILTER_PREC_FIX(1) |
 			  S_008F38_ANISO_OVERRIDE(sctx->b.chip_class >= VI));
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -910,7 +910,8 @@ void si_emit_cache_flush(struct si_context *sctx)
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
 		radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0));
 	}
-	if (rctx->flags & SI_CONTEXT_FLUSH_AND_INV_DB) {
+	if (rctx->flags & (SI_CONTEXT_FLUSH_AND_INV_DB |
+			   SI_CONTEXT_FLUSH_AND_INV_DB_META)) {
 		/* Flush HTILE. SURFACE_SYNC will wait for idle. */
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
 		radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0));
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1412,6 +1412,12 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
 							     sctx->framebuffer.nr_samples <= 1;
 			key->part.ps.epilog.clamp_color = rs->clamp_fragment_color;

+			if (sctx->ps_iter_samples > 1 &&
+			    sel->info.reads_samplemask) {
+				key->part.ps.prolog.samplemask_log_ps_iter =
+					util_logbase2(util_next_power_of_two(sctx->ps_iter_samples));
+			}
+
 			if (rs->force_persample_interp &&
 			    rs->multisample_enable &&
 			    sctx->framebuffer.nr_samples > 1 &&
--- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
+++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
@@ -139,6 +139,14 @@ def parse_ir_builder(input_file):

                    ignore = False

+                    # The following functions need to be ignored in openswr.
+                    # API change in llvm-5.0 breaks baked autogen files
+                    if (
+                        (func_name == 'CreateFence' or
+                         func_name == 'CreateAtomicCmpXchg' or
+                         func_name == 'CreateAtomicRMW')):
+                        ignore = True
+
                    # The following functions need to be ignored.
                    if (func_name == 'CreateInsertNUWNSWBinOp' or
                        func_name == 'CreateMaskedIntrinsic' or
--- a/src/gallium/drivers/vc4/vc4_bufmgr.c
+++ b/src/gallium/drivers/vc4/vc4_bufmgr.c
@@ -123,6 +123,8 @@ vc4_bo_from_cache(struct vc4_screen *screen, uint32_t size, const char *name)
 struct vc4_bo *
 vc4_bo_alloc(struct vc4_screen *screen, uint32_t size, const char *name)
 {
+        bool cleared_and_retried = false;
+        struct drm_vc4_create_bo create;
        struct vc4_bo *bo;
        int ret;

@@ -149,12 +151,8 @@ vc4_bo_alloc(struct vc4_screen *screen, uint32_t size, const char *name)
        bo->private = true;

 retry:
-        ;
-
-        bool cleared_and_retried = false;
-        struct drm_vc4_create_bo create = {
-                .size = size
-        };
+        memset(&create, 0, sizeof(create));
+        create.size = size;

        ret = vc4_ioctl(screen->fd, DRM_IOCTL_VC4_CREATE_BO, &create);
        bo->handle = create.handle;
--- a/src/gallium/drivers/vc4/vc4_context.h
+++ b/src/gallium/drivers/vc4/vc4_context.h
@@ -84,6 +84,13 @@ struct vc4_sampler_view {
        uint32_t texture_p0;
        uint32_t texture_p1;
        bool force_first_level;
+        /**
+         * Resource containing the actual texture that will be sampled.
+         *
+         * We may need to rebase the .base.texture resource to work around the
+         * lack of GL_TEXTURE_BASE_LEVEL, or to upload the texture as tiled.
+         */
+        struct pipe_resource *texture;
 };

 struct vc4_sampler_state {
--- a/src/gallium/drivers/vc4/vc4_draw.c
+++ b/src/gallium/drivers/vc4/vc4_draw.c
@@ -116,12 +116,13 @@ vc4_predraw_check_textures(struct pipe_context *pctx,
        struct vc4_context *vc4 = vc4_context(pctx);

        for (int i = 0; i < stage_tex->num_textures; i++) {
-                struct pipe_sampler_view *view = stage_tex->textures[i];
+                struct vc4_sampler_view *view =
+                        vc4_sampler_view(stage_tex->textures[i]);
                if (!view)
                        continue;
-                struct vc4_resource *rsc = vc4_resource(view->texture);
-                if (rsc->shadow_parent)
-                        vc4_update_shadow_baselevel_texture(pctx, view);
+
+                if (view->texture != view->base.texture)
+                        vc4_update_shadow_baselevel_texture(pctx, &view->base);

                vc4_flush_jobs_writing_resource(vc4, view->texture);
        }
@@ -502,6 +503,37 @@ vc4_clear(struct pipe_context *pctx, unsigned buffers,
        struct vc4_context *vc4 = vc4_context(pctx);
        struct vc4_job *job = vc4_get_job_for_fbo(vc4);

+        if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
+                struct vc4_resource *rsc =
+                        vc4_resource(vc4->framebuffer.zsbuf->texture);
+                unsigned zsclear = buffers & PIPE_CLEAR_DEPTHSTENCIL;
+
+                /* Clearing ZS will clear both Z and stencil, so if we're
+                 * trying to clear just one then we need to draw a quad to do
+                 * it instead.  We need to do this before setting up
+                 * tile-based clears in vc4->job, because the blitter may
+                 * submit the current job.
+                 */
+                if ((zsclear == PIPE_CLEAR_DEPTH ||
+                     zsclear == PIPE_CLEAR_STENCIL) &&
+                    (rsc->initialized_buffers & ~(zsclear | job->cleared)) &&
+                    util_format_is_depth_and_stencil(vc4->framebuffer.zsbuf->format)) {
+                        perf_debug("Partial clear of Z+stencil buffer, "
+                                   "drawing a quad instead of fast clearing\n");
+                        vc4_blitter_save(vc4);
+                        util_blitter_clear(vc4->blitter,
+                                           vc4->framebuffer.width,
+                                           vc4->framebuffer.height,
+                                           1,
+                                           zsclear,
+                                           NULL, depth, stencil);
+                        buffers &= ~zsclear;
+                        if (!buffers)
+                                return;
+                        job = vc4_get_job_for_fbo(vc4);
+                }
+        }
+
        /* We can't flag new buffers for clearing once we've queued draws.  We
         * could avoid this by using the 3d engine to clear.
         */
@@ -537,29 +569,6 @@ vc4_clear(struct pipe_context *pctx, unsigned buffers,
        if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
                struct vc4_resource *rsc =
                        vc4_resource(vc4->framebuffer.zsbuf->texture);
-                unsigned zsclear = buffers & PIPE_CLEAR_DEPTHSTENCIL;
-
-                /* Clearing ZS will clear both Z and stencil, so if we're
-                 * trying to clear just one then we need to draw a quad to do
-                 * it instead.
-                 */
-                if ((zsclear == PIPE_CLEAR_DEPTH ||
-                     zsclear == PIPE_CLEAR_STENCIL) &&
-                    (rsc->initialized_buffers & ~(zsclear | job->cleared)) &&
-                    util_format_is_depth_and_stencil(vc4->framebuffer.zsbuf->format)) {
-                        perf_debug("Partial clear of Z+stencil buffer, "
-                                   "drawing a quad instead of fast clearing\n");
-                        vc4_blitter_save(vc4);
-                        util_blitter_clear(vc4->blitter,
-                                           vc4->framebuffer.width,
-                                           vc4->framebuffer.height,
-                                           1,
-                                           zsclear,
-                                           NULL, depth, stencil);
-                        buffers &= ~zsclear;
-                        if (!buffers)
-                                return;
-                }

                /* Though the depth buffer is stored with Z in the high 24,
                 * for this field we just need to store it in the low 24.
@@ -571,7 +580,7 @@ vc4_clear(struct pipe_context *pctx, unsigned buffers,
                if (buffers & PIPE_CLEAR_STENCIL)
                        job->clear_stencil = stencil;

-                rsc->initialized_buffers |= zsclear;
+                rsc->initialized_buffers |= (buffers & PIPE_CLEAR_DEPTHSTENCIL);
        }

        job->draw_min_x = 0;
--- a/src/gallium/drivers/vc4/vc4_job.c
+++ b/src/gallium/drivers/vc4/vc4_job.c
@@ -118,12 +118,17 @@ vc4_flush_jobs_reading_resource(struct vc4_context *vc4,
                struct vc4_job *job = entry->data;

                struct vc4_bo **referenced_bos = job->bo_pointers.base;
+                bool found = false;
                for (int i = 0; i < cl_offset(&job->bo_handles) / 4; i++) {
                        if (referenced_bos[i] == rsc->bo) {
-                                vc4_job_submit(vc4, job);
-                                continue;
+                                found = true;
+                                break;
                        }
                }
+                if (found) {
+                        vc4_job_submit(vc4, job);
+                        continue;
+                }

                /* Also check for the Z/color buffers, since the references to
                 * those are only added immediately before submit.
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -2762,11 +2762,11 @@ vc4_update_compiled_fs(struct vc4_context *vc4, uint8_t prim_mode)
        vc4->dirty |= VC4_DIRTY_COMPILED_FS;

        if (vc4->rasterizer->base.flatshade &&
-            old_fs && vc4->prog.fs->color_inputs != old_fs->color_inputs) {
+            (!old_fs || vc4->prog.fs->color_inputs != old_fs->color_inputs)) {
                vc4->dirty |= VC4_DIRTY_FLAT_SHADE_FLAGS;
        }

-        if (old_fs && vc4->prog.fs->fs_inputs != old_fs->fs_inputs)
+        if (!old_fs || vc4->prog.fs->fs_inputs != old_fs->fs_inputs)
                vc4->dirty |= VC4_DIRTY_FS_INPUTS;
 }

@@ -2876,6 +2876,7 @@ fs_inputs_compare(const void *key1, const void *key2)

 static void
 delete_from_cache_if_matches(struct hash_table *ht,
+                             struct vc4_compiled_shader **last_compile,
                             struct hash_entry *entry,
                             struct vc4_uncompiled_shader *so)
 {
@@ -2885,6 +2886,10 @@ delete_from_cache_if_matches(struct hash_table *ht,
                struct vc4_compiled_shader *shader = entry->data;
                _mesa_hash_table_remove(ht, entry);
                vc4_bo_unreference(&shader->bo);
+
+                if (shader == *last_compile)
+                        *last_compile = NULL;
+
                ralloc_free(shader);
        }
 }
@@ -2896,10 +2901,14 @@ vc4_shader_state_delete(struct pipe_context *pctx, void *hwcso)
        struct vc4_uncompiled_shader *so = hwcso;

        struct hash_entry *entry;
-        hash_table_foreach(vc4->fs_cache, entry)
-                delete_from_cache_if_matches(vc4->fs_cache, entry, so);
-        hash_table_foreach(vc4->vs_cache, entry)
-                delete_from_cache_if_matches(vc4->vs_cache, entry, so);
+        hash_table_foreach(vc4->fs_cache, entry) {
+                delete_from_cache_if_matches(vc4->fs_cache, &vc4->prog.fs,
+                                             entry, so);
+        }
+        hash_table_foreach(vc4->vs_cache, entry) {
+                delete_from_cache_if_matches(vc4->vs_cache, &vc4->prog.vs,
+                                             entry, so);
+        }

        ralloc_free(so->base.ir.nir);
        free(so);
--- a/src/gallium/drivers/vc4/vc4_resource.c
+++ b/src/gallium/drivers/vc4/vc4_resource.c
@@ -373,7 +373,6 @@ vc4_resource_destroy(struct pipe_screen *pscreen,
 {
        struct vc4_screen *screen = vc4_screen(pscreen);
        struct vc4_resource *rsc = vc4_resource(prsc);
-        pipe_resource_reference(&rsc->shadow_parent, NULL);
        vc4_bo_unreference(&rsc->bo);

        if (rsc->scanout)
@@ -1078,19 +1077,21 @@ vc4_flush_resource(struct pipe_context *pctx, struct pipe_resource *resource)

 void
 vc4_update_shadow_baselevel_texture(struct pipe_context *pctx,
-                                    struct pipe_sampler_view *view)
+                                    struct pipe_sampler_view *pview)
 {
+        struct vc4_sampler_view *view = vc4_sampler_view(pview);
        struct vc4_resource *shadow = vc4_resource(view->texture);
-        struct vc4_resource *orig = vc4_resource(shadow->shadow_parent);
-        assert(orig);
+        struct vc4_resource *orig = vc4_resource(pview->texture);
+
+        assert(view->texture != pview->texture);

        if (shadow->writes == orig->writes && orig->bo->private)
                return;

        perf_debug("Updating %dx%d@%d shadow texture due to %s\n",
                   orig->base.width0, orig->base.height0,
-                   view->u.tex.first_level,
-                   view->u.tex.first_level ? "base level" : "raster layout");
+                   pview->u.tex.first_level,
+                   pview->u.tex.first_level ? "base level" : "raster layout");

        for (int i = 0; i <= shadow->base.last_level; i++) {
                unsigned width = u_minify(shadow->base.width0, i);
@@ -1111,7 +1112,7 @@ vc4_update_shadow_baselevel_texture(struct pipe_context *pctx,
                        },
                        .src = {
                                .resource = &orig->base,
-                                .level = view->u.tex.first_level + i,
+                                .level = pview->u.tex.first_level + i,
                                .box = {
                                        .x = 0,
                                        .y = 0,
--- a/src/gallium/drivers/vc4/vc4_resource.h
+++ b/src/gallium/drivers/vc4/vc4_resource.h
@@ -81,20 +81,6 @@ struct vc4_resource {
         * buffer) may get marked.
         */
        uint32_t initialized_buffers;
-
-        /**
-         * Resource containing the non-GL_TEXTURE_BASE_LEVEL-rebased texture
-         * contents, or the 4-byte index buffer.
-         *
-         * If the parent is set for an texture, then this resource is actually
-         * the texture contents just starting from the sampler_view's
-         * first_level.
-         *
-         * If the parent is set for an index index buffer, then this resource
-         * is actually a shadow containing a 2-byte index buffer starting from
-         * the ib's offset.
-         */
-        struct pipe_resource *shadow_parent;
 };

 static inline struct vc4_resource *
--- a/src/gallium/drivers/vc4/vc4_state.c
+++ b/src/gallium/drivers/vc4/vc4_state.c
@@ -556,6 +556,9 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
        so->base = *cso;

        pipe_reference(NULL, &prsc->reference);
+        so->base.texture = prsc;
+        so->base.reference.count = 1;
+        so->base.context = pctx;

        /* There is no hardware level clamping, and the start address of a
         * texture may be misaligned, so in that case we have to copy to a
@@ -567,33 +570,36 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
        if ((cso->u.tex.first_level &&
             (cso->u.tex.first_level != cso->u.tex.last_level)) ||
            rsc->vc4_format == VC4_TEXTURE_TYPE_RGBA32R) {
-                struct vc4_resource *shadow_parent = vc4_resource(prsc);
-                struct pipe_resource tmpl = shadow_parent->base;
-                struct vc4_resource *clone;
+                struct vc4_resource *shadow_parent = rsc;
+                struct pipe_resource tmpl = *prsc;

                tmpl.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
                tmpl.width0 = u_minify(tmpl.width0, cso->u.tex.first_level);
                tmpl.height0 = u_minify(tmpl.height0, cso->u.tex.first_level);
                tmpl.last_level = cso->u.tex.last_level - cso->u.tex.first_level;

+                /* Create the shadow texture.  The rest of the texture
+                 * parameter setup will use the shadow.
+                 */
                prsc = vc4_resource_create(pctx->screen, &tmpl);
                if (!prsc) {
                        free(so);
                        return NULL;
                }
                rsc = vc4_resource(prsc);
-                clone = vc4_resource(prsc);
-                clone->shadow_parent = &shadow_parent->base;
-                /* Flag it as needing update of the contents from the parent. */
-                clone->writes = shadow_parent->writes - 1;

-                assert(clone->vc4_format != VC4_TEXTURE_TYPE_RGBA32R);
-        } else if (cso->u.tex.first_level) {
-                so->force_first_level = true;
+                /* Flag it as needing update of the contents from the parent. */
+                rsc->writes = shadow_parent->writes - 1;
+                assert(rsc->vc4_format != VC4_TEXTURE_TYPE_RGBA32R);
+
+                so->texture = prsc;
+        } else {
+                pipe_resource_reference(&so->texture, prsc);
+
+                if (cso->u.tex.first_level) {
+                        so->force_first_level = true;
+                }
        }
-        so->base.texture = prsc;
-        so->base.reference.count = 1;
-        so->base.context = pctx;

        so->texture_p0 =
                (VC4_SET_FIELD(rsc->slices[0].offset >> 12, VC4_TEX_P0_OFFSET) |
@@ -617,8 +623,10 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,

 static void
 vc4_sampler_view_destroy(struct pipe_context *pctx,
-                         struct pipe_sampler_view *view)
+                         struct pipe_sampler_view *pview)
 {
+        struct vc4_sampler_view *view = vc4_sampler_view(pview);
+        pipe_resource_reference(&pview->texture, NULL);
        pipe_resource_reference(&view->texture, NULL);
        free(view);
 }
--- a/src/gallium/drivers/vc4/vc4_uniforms.c
+++ b/src/gallium/drivers/vc4/vc4_uniforms.c
@@ -35,7 +35,7 @@ write_texture_p0(struct vc4_job *job,
 {
        struct vc4_sampler_view *sview =
                vc4_sampler_view(texstate->textures[unit]);
-        struct vc4_resource *rsc = vc4_resource(sview->base.texture);
+        struct vc4_resource *rsc = vc4_resource(sview->texture);

        cl_reloc(job, &job->uniforms, uniforms, rsc->bo, sview->texture_p0);
 }
--- a/src/gallium/state_trackers/va/postproc.c
+++ b/src/gallium/state_trackers/va/postproc.c
@@ -35,7 +35,7 @@
 #include "va_private.h"

 static const VARectangle *
-vlVaRegionDefault(const VARectangle *region, struct pipe_video_buffer *buf,
+vlVaRegionDefault(const VARectangle *region, vlVaSurface *surf,
 		  VARectangle *def)
 {
   if (region)
@@ -43,8 +43,8 @@ vlVaRegionDefault(const VARectangle *region, struct pipe_video_buffer *buf,

   def->x = 0;
   def->y = 0;
-   def->width = buf->width;
-   def->height = buf->height;
+   def->width = surf->templat.width;
+   def->height = surf->templat.height;

   return def;
 }
@@ -230,7 +230,7 @@ vlVaHandleVAProcPipelineParameterBufferType(vlVaDriver *drv, vlVaContext *contex
   const VARectangle *src_region, *dst_region;
   VAProcPipelineParameterBuffer *param;
   struct pipe_video_buffer *src;
-   vlVaSurface *src_surface;
+   vlVaSurface *src_surface, *dst_surface;
   unsigned i;

   if (!drv || !context)
@@ -245,6 +245,8 @@ vlVaHandleVAProcPipelineParameterBufferType(vlVaDriver *drv, vlVaContext *contex
   param = buf->data;

   src_surface = handle_table_get(drv->htab, param->surface);
+   dst_surface = handle_table_get(drv->htab, context->target_id);
+
   if (!src_surface || !src_surface->buffer)
      return VA_STATUS_ERROR_INVALID_SURFACE;

@@ -290,8 +292,8 @@ vlVaHandleVAProcPipelineParameterBufferType(vlVaDriver *drv, vlVaContext *contex
      }
   }

-   src_region = vlVaRegionDefault(param->surface_region, src_surface->buffer, &def_src_region);
-   dst_region = vlVaRegionDefault(param->output_region, context->target, &def_dst_region);
+   src_region = vlVaRegionDefault(param->surface_region, src_surface, &def_src_region);
+   dst_region = vlVaRegionDefault(param->output_region, dst_surface, &def_dst_region);

   if (context->target->buffer_format != PIPE_FORMAT_NV12 &&
       context->target->buffer_format != PIPE_FORMAT_P016)
--- a/src/intel/compiler/brw_vec4_nir.cpp
+++ b/src/intel/compiler/brw_vec4_nir.cpp
@@ -804,9 +804,17 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
      break;
   }

-   case nir_intrinsic_atomic_counter_read:
   case nir_intrinsic_atomic_counter_inc:
-   case nir_intrinsic_atomic_counter_dec: {
+   case nir_intrinsic_atomic_counter_dec:
+   case nir_intrinsic_atomic_counter_read:
+   case nir_intrinsic_atomic_counter_add:
+   case nir_intrinsic_atomic_counter_min:
+   case nir_intrinsic_atomic_counter_max:
+   case nir_intrinsic_atomic_counter_and:
+   case nir_intrinsic_atomic_counter_or:
+   case nir_intrinsic_atomic_counter_xor:
+   case nir_intrinsic_atomic_counter_exchange:
+   case nir_intrinsic_atomic_counter_comp_swap: {
      unsigned surf_index = prog_data->base.binding_table.abo_start +
         (unsigned) instr->const_index[0];
      const vec4_builder bld =
--- a/src/intel/compiler/brw_vec4_surface_builder.cpp
+++ b/src/intel/compiler/brw_vec4_surface_builder.cpp
@@ -212,10 +212,15 @@ namespace brw {
         const unsigned size = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
         const dst_reg srcs = bld.vgrf(BRW_REGISTER_TYPE_UD);

-         if (size >= 1)
-            bld.MOV(writemask(srcs, WRITEMASK_X), src0);
-         if (size >= 2)
-            bld.MOV(writemask(srcs, WRITEMASK_Y), src1);
+         if (size >= 1) {
+            bld.MOV(writemask(srcs, WRITEMASK_X),
+                    swizzle(src0, BRW_SWIZZLE_XXXX));
+         }
+
+         if (size >= 2) {
+            bld.MOV(writemask(srcs, WRITEMASK_Y),
+                    swizzle(src1, BRW_SWIZZLE_XXXX));
+         }

         return emit_send(bld, SHADER_OPCODE_UNTYPED_ATOMIC, src_reg(),
                          emit_insert(bld, addr, dims, has_simd4x2),
--- a/src/intel/vulkan/anv_descriptor_set.c
+++ b/src/intel/vulkan/anv_descriptor_set.c
@@ -764,7 +764,7 @@ void anv_UpdateDescriptorSets(

   for (uint32_t i = 0; i < descriptorCopyCount; i++) {
      const VkCopyDescriptorSet *copy = &pDescriptorCopies[i];
-      ANV_FROM_HANDLE(anv_descriptor_set, src, copy->dstSet);
+      ANV_FROM_HANDLE(anv_descriptor_set, src, copy->srcSet);
      ANV_FROM_HANDLE(anv_descriptor_set, dst, copy->dstSet);

      const struct anv_descriptor_set_binding_layout *src_layout =
--- a/src/intel/vulkan/anv_formats.c
+++ b/src/intel/vulkan/anv_formats.c
@@ -250,6 +250,15 @@ static const struct anv_format anv_formats[] = {

 #undef fmt

+static bool
+format_supported(VkFormat vk_format)
+{
+   if (vk_format >= ARRAY_SIZE(anv_formats))
+      return false;
+
+   return anv_formats[vk_format].isl_format != ISL_FORMAT_UNSUPPORTED;
+}
+
 /**
 * Exactly one bit must be set in \a aspect.
 */
@@ -257,10 +266,10 @@ struct anv_format
 anv_get_format(const struct gen_device_info *devinfo, VkFormat vk_format,
               VkImageAspectFlags aspect, VkImageTiling tiling)
 {
-   struct anv_format format = anv_formats[vk_format];
+   if (!format_supported(vk_format))
+      return anv_formats[VK_FORMAT_UNDEFINED];

-   if (format.isl_format == ISL_FORMAT_UNSUPPORTED)
-      return format;
+   struct anv_format format = anv_formats[vk_format];

   if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT) {
      assert(vk_format_aspects(vk_format) & VK_IMAGE_ASPECT_STENCIL_BIT);
@@ -391,7 +400,7 @@ anv_physical_device_get_format_properties(struct anv_physical_device *physical_d
      gen += 5;

   VkFormatFeatureFlags linear = 0, tiled = 0, buffer = 0;
-   if (anv_formats[format].isl_format == ISL_FORMAT_UNSUPPORTED) {
+   if (!format_supported(format)) {
      /* Nothing to do here */
   } else if (vk_format_is_depth_or_stencil(format)) {
      tiled |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT;
@@ -489,7 +498,7 @@ anv_get_image_format_properties(
   uint32_t maxArraySize;
   VkSampleCountFlags sampleCounts = VK_SAMPLE_COUNT_1_BIT;

-   if (anv_formats[info->format].isl_format == ISL_FORMAT_UNSUPPORTED)
+   if (!format_supported(info->format))
      goto unsupported;

   anv_physical_device_get_format_properties(physical_device, info->format,
--- a/src/intel/vulkan/gen8_cmd_buffer.c
+++ b/src/intel/vulkan/gen8_cmd_buffer.c
@@ -49,10 +49,10 @@ gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer)
      struct GENX(SF_CLIP_VIEWPORT) sf_clip_viewport = {
         .ViewportMatrixElementm00 = vp->width / 2,
         .ViewportMatrixElementm11 = vp->height / 2,
-         .ViewportMatrixElementm22 = 1.0,
+         .ViewportMatrixElementm22 = vp->maxDepth - vp->minDepth,
         .ViewportMatrixElementm30 = vp->x + vp->width / 2,
         .ViewportMatrixElementm31 = vp->y + vp->height / 2,
-         .ViewportMatrixElementm32 = 0.0,
+         .ViewportMatrixElementm32 = vp->minDepth,
         .XMinClipGuardband = -1.0f,
         .XMaxClipGuardband = 1.0f,
         .YMinClipGuardband = -1.0f,
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -133,6 +133,8 @@ blorp_surf_for_miptree(struct brw_context *brw,
                       unsigned start_layer, unsigned num_layers,
                       struct isl_surf tmp_surfs[1])
 {
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
+
   if (mt->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY) {
      const unsigned num_samples = mt->surf.samples;
      for (unsigned i = 0; i < num_layers; i++) {
@@ -163,6 +165,10 @@ blorp_surf_for_miptree(struct brw_context *brw,
   else if (mt->hiz_buf)
      aux_surf = &mt->hiz_buf->surf;

+   if (mt->format == MESA_FORMAT_S_UINT8 && is_render_target &&
+       devinfo->gen <= 7)
+      mt->r8stencil_needs_update = true;
+
   if (surf->aux_usage == ISL_AUX_USAGE_HIZ &&
       !intel_miptree_level_has_hiz(mt, *level))
      surf->aux_usage = ISL_AUX_USAGE_NONE;
--- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
+++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
@@ -953,8 +953,10 @@ brw_bo_map(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
    * We skip MAP_RAW because we want to avoid map_gtt's fence detiling.
    */
   if (!map && !(flags & MAP_RAW)) {
-      perf_debug("Fallback GTT mapping for %s with access flags %x\n",
-                 bo->name, flags);
+      if (brw) {
+         perf_debug("Fallback GTT mapping for %s with access flags %x\n",
+                    bo->name, flags);
+      }
      map = brw_bo_map_gtt(brw, bo, flags);
   }

--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1617,6 +1617,13 @@ enum brw_pixel_shader_coverage_mask_mode {
 # define GEN8_HIZ_PMA_MASK_BITS \
   REG_MASK(GEN8_HIZ_NP_PMA_FIX_ENABLE | GEN8_HIZ_NP_EARLY_Z_FAILS_DISABLE)

+#define GEN7_GT_MODE                    0x7008
+# define GEN9_SUBSLICE_HASHING_8x8      (0 << 8)
+# define GEN9_SUBSLICE_HASHING_16x4     (1 << 8)
+# define GEN9_SUBSLICE_HASHING_8x4      (2 << 8)
+# define GEN9_SUBSLICE_HASHING_16x16    (3 << 8)
+# define GEN9_SUBSLICE_HASHING_MASK_BITS REG_MASK(3 << 8)
+
 /* Predicate registers */
 #define MI_PREDICATE_SRC0               0x2400
 #define MI_PREDICATE_SRC1               0x2408
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -72,6 +72,15 @@ brw_upload_initial_gpu_state(struct brw_context *brw)
                GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE |
                GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
      ADVANCE_BATCH();
+
+      if (brw->is_broxton) {
+         BEGIN_BATCH(3);
+         OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
+         OUT_BATCH(GEN7_GT_MODE);
+         OUT_BATCH(GEN9_SUBSLICE_HASHING_MASK_BITS |
+                   GEN9_SUBSLICE_HASHING_16x16);
+         ADVANCE_BATCH();
+      }
   }

   if (brw->gen >= 8) {
--- a/src/mesa/drivers/dri/nouveau/nouveau_gldefs.h
+++ b/src/mesa/drivers/dri/nouveau/nouveau_gldefs.h
@@ -238,6 +238,25 @@ nvgl_wrap_mode(unsigned wrap)
 	}
 }

+static inline unsigned
+nvgl_wrap_mode_nv20(unsigned wrap)
+{
+	switch (wrap) {
+	case GL_REPEAT:
+		return 0x1;
+	case GL_MIRRORED_REPEAT:
+		return 0x2;
+	case GL_CLAMP:
+		return 0x5;
+	case GL_CLAMP_TO_EDGE:
+		return 0x3;
+	case GL_CLAMP_TO_BORDER:
+		return 0x4;
+	default:
+		unreachable("Bad GL texture wrap mode");
+	}
+}
+
 static inline unsigned
 nvgl_filter_mode(unsigned filter)
 {
--- a/src/mesa/drivers/dri/nouveau/nv20_state_tex.c
+++ b/src/mesa/drivers/dri/nouveau/nv20_state_tex.c
@@ -193,9 +193,19 @@ nv20_emit_tex_obj(struct gl_context *ctx, int emit)
 		| NV20_3D_TEX_FORMAT_NO_BORDER
 		| 1 << 16;

-	tx_wrap = nvgl_wrap_mode(sa->WrapR) << 16
-		| nvgl_wrap_mode(sa->WrapT) << 8
-		| nvgl_wrap_mode(sa->WrapS) << 0;
+	switch (t->Target) {
+	case GL_TEXTURE_1D:
+		tx_wrap = NV20_3D_TEX_WRAP_R_CLAMP_TO_EDGE
+			| NV20_3D_TEX_WRAP_T_CLAMP_TO_EDGE
+			| nvgl_wrap_mode_nv20(sa->WrapS) << 0;
+		break;
+
+	default:
+		tx_wrap = nvgl_wrap_mode_nv20(sa->WrapR) << 16
+			| nvgl_wrap_mode_nv20(sa->WrapT) << 8
+			| nvgl_wrap_mode_nv20(sa->WrapS) << 0;
+		break;
+	}

 	tx_filter = nvgl_filter_mode(sa->MagFilter) << 24
 		| nvgl_filter_mode(sa->MinFilter) << 16
--- a/src/mesa/main/context.c
+++ b/src/mesa/main/context.c
@@ -1324,6 +1324,8 @@ _mesa_free_context_data( struct gl_context *ctx )
   _mesa_reference_program(ctx, &ctx->FragmentProgram._Current, NULL);
   _mesa_reference_program(ctx, &ctx->FragmentProgram._TexEnvProgram, NULL);

+   _mesa_reference_program(ctx, &ctx->ComputeProgram._Current, NULL);
+
   _mesa_reference_vao(ctx, &ctx->Array.VAO, NULL);
   _mesa_reference_vao(ctx, &ctx->Array.DefaultVAO, NULL);

--- a/src/mesa/state_tracker/st_cb_readpixels.c
+++ b/src/mesa/state_tracker/st_cb_readpixels.c
@@ -175,7 +175,7 @@ try_pbo_readpixels(struct st_context *st, struct st_renderbuffer *strb,

      if (view_target != PIPE_TEXTURE_3D) {
         templ.u.tex.first_layer = surface->u.tex.first_layer;
-         templ.u.tex.last_layer = templ.u.tex.last_layer;
+         templ.u.tex.last_layer = templ.u.tex.first_layer;
      } else {
         addr.constants.layer_offset = surface->u.tex.first_layer;
      }
--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@@ -198,9 +198,14 @@ st_draw_vbo(struct gl_context *ctx,

   /* do actual drawing */
   for (i = 0; i < nr_prims; i++) {
+      info.count = prims[i].count;
+
+      /* Skip no-op draw calls. */
+      if (!info.count && !tfb_vertcount)
+         continue;
+
      info.mode = translate_prim(ctx, prims[i].mode);
      info.start = start + prims[i].start;
-      info.count = prims[i].count;
      info.start_instance = prims[i].base_instance;
      info.instance_count = prims[i].num_instances;
      info.index_bias = prims[i].basevertex;
--- a/src/mesa/state_tracker/st_glsl_to_nir.cpp.orig
+++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp.orig
@@ -1,479 +0,0 @@
-/*
- * Copyright © 2015 Red Hat
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "st_nir.h"
-
-#include "pipe/p_defines.h"
-#include "pipe/p_screen.h"
-#include "pipe/p_context.h"
-
-#include "program/program.h"
-#include "program/prog_statevars.h"
-#include "program/prog_parameter.h"
-#include "program/ir_to_mesa.h"
-#include "main/mtypes.h"
-#include "main/errors.h"
-#include "main/shaderapi.h"
-#include "main/uniforms.h"
-#include "util/string_to_uint_map.h"
-
-#include "st_context.h"
-#include "st_program.h"
-
-#include "compiler/nir/nir.h"
-#include "compiler/glsl_types.h"
-#include "compiler/glsl/glsl_to_nir.h"
-#include "compiler/glsl/ir.h"
-
-
-static int
-type_size(const struct glsl_type *type)
-{
-   return type->count_attribute_slots(false);
-}
-
-/* Depending on PIPE_CAP_TGSI_TEXCOORD (st->needs_texcoord_semantic) we
- * may need to fix up varying slots so the glsl->nir path is aligned
- * with the anything->tgsi->nir path.
- */
-static void
-st_nir_fixup_varying_slots(struct st_context *st, struct exec_list *var_list)
-{
-   if (st->needs_texcoord_semantic)
-      return;
-
-   nir_foreach_variable(var, var_list) {
-      if (var->data.location >= VARYING_SLOT_VAR0) {
-         var->data.location += 9;
-      } else if ((var->data.location >= VARYING_SLOT_TEX0) &&
-               (var->data.location <= VARYING_SLOT_TEX7)) {
-         var->data.location += VARYING_SLOT_VAR0 - VARYING_SLOT_TEX0;
-      }
-   }
-}
-
-/* input location assignment for VS inputs must be handled specially, so
- * that it is aligned w/ st's vbo state.
- * (This isn't the case with, for ex, FS inputs, which only need to agree
- * on varying-slot w/ the VS outputs)
- */
-static void
-st_nir_assign_vs_in_locations(struct gl_program *prog, nir_shader *nir)
-{
-   unsigned attr, num_inputs = 0;
-   unsigned input_to_index[VERT_ATTRIB_MAX] = {0};
-
-   /* TODO de-duplicate w/ similar code in st_translate_vertex_program()? */
-   for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
-      if ((prog->info.inputs_read & BITFIELD64_BIT(attr)) != 0) {
-         input_to_index[attr] = num_inputs;
-         num_inputs++;
-         if ((prog->info.double_inputs_read & BITFIELD64_BIT(attr)) != 0) {
-            /* add placeholder for second part of a double attribute */
-            num_inputs++;
-         }
-      } else {
-         input_to_index[attr] = ~0;
-      }
-   }
-
-   /* bit of a hack, mirroring st_translate_vertex_program */
-   input_to_index[VERT_ATTRIB_EDGEFLAG] = num_inputs;
-
-   nir->num_inputs = 0;
-   nir_foreach_variable_safe(var, &nir->inputs) {
-      attr = var->data.location;
-      assert(attr < ARRAY_SIZE(input_to_index));
-
-      if (input_to_index[attr] != ~0u) {
-         var->data.driver_location = input_to_index[attr];
-         nir->num_inputs++;
-      } else {
-         /* Move unused input variables to the globals list (with no
-          * initialization), to avoid confusing drivers looking through the
-          * inputs array and expecting to find inputs with a driver_location
-          * set.
-          */
-         exec_node_remove(&var->node);
-         var->data.mode = nir_var_global;
-         exec_list_push_tail(&nir->globals, &var->node);
-      }
-   }
-}
-
-static int
-st_nir_lookup_parameter_index(const struct gl_program_parameter_list *params,
-                              const char *name)
-{
-   int loc = _mesa_lookup_parameter_index(params, name);
-
-   /* is there a better way to do this?  If we have something like:
-    *
-    *    struct S {
-    *           float f;
-    *           vec4 v;
-    *    };
-    *    uniform S color;
-    *
-    * Then what we get in prog->Parameters looks like:
-    *
-    *    0: Name=color.f, Type=6, DataType=1406, Size=1
-    *    1: Name=color.v, Type=6, DataType=8b52, Size=4
-    *
-    * So the name doesn't match up and _mesa_lookup_parameter_index()
-    * fails.  In this case just find the first matching "color.*"..
-    *
-    * Note for arrays you could end up w/ color[n].f, for example.
-    *
-    * glsl_to_tgsi works slightly differently in this regard.  It is
-    * emitting something more low level, so it just translates the
-    * params list 1:1 to CONST[] regs.  Going from GLSL IR to TGSI,
-    * it just calculates the additional offset of struct field members
-    * in glsl_to_tgsi_visitor::visit(ir_dereference_record *ir) or
-    * glsl_to_tgsi_visitor::visit(ir_dereference_array *ir).  It never
-    * needs to work backwards to get base var loc from the param-list
-    * which already has them separated out.
-    */
-   if (loc < 0) {
-      int namelen = strlen(name);
-      for (unsigned i = 0; i < params->NumParameters; i++) {
-         struct gl_program_parameter *p = &params->Parameters[i];
-         if ((strncmp(p->Name, name, namelen) == 0) &&
-             ((p->Name[namelen] == '.') || (p->Name[namelen] == '['))) {
-            loc = i;
-            break;
-         }
-      }
-   }
-
-   return loc;
-}
-
-static void
-st_nir_assign_uniform_locations(struct gl_program *prog,
-                                struct gl_shader_program *shader_program,
-                                struct exec_list *uniform_list, unsigned *size)
-{
-   int max = 0;
-   int shaderidx = 0;
-
-   nir_foreach_variable(uniform, uniform_list) {
-      int loc;
-
-      /*
-       * UBO's have their own address spaces, so don't count them towards the
-       * number of global uniforms
-       */
-      if ((uniform->data.mode == nir_var_uniform || uniform->data.mode == nir_var_shader_storage) &&
-          uniform->interface_type != NULL)
-         continue;
-
-      if (uniform->type->is_sampler()) {
-         unsigned val = 0;
-         bool found = shader_program->UniformHash->get(val, uniform->name);
-         loc = shaderidx++;
-         assert(found);
-         (void) found; /* silence unused var warning */
-         /* this ensure that nir_lower_samplers looks at the correct
-          * shader_program->UniformStorage[location]:
-          */
-         uniform->data.location = val;
-      } else if (strncmp(uniform->name, "gl_", 3) == 0) {
-         const gl_state_index *const stateTokens = (gl_state_index *)uniform->state_slots[0].tokens;
-         /* This state reference has already been setup by ir_to_mesa, but we'll
-          * get the same index back here.
-          */
-         loc = _mesa_add_state_reference(prog->Parameters, stateTokens);
-      } else {
-         loc = st_nir_lookup_parameter_index(prog->Parameters, uniform->name);
-      }
-
-      uniform->data.driver_location = loc;
-
-      max = MAX2(max, loc + type_size(uniform->type));
-   }
-   *size = max;
-}
-
-extern "C" {
-
-/* First half of converting glsl_to_nir.. this leaves things in a pre-
- * nir_lower_io state, so that shader variants can more easily insert/
- * replace variables, etc.
- */
-nir_shader *
-st_glsl_to_nir(struct st_context *st, struct gl_program *prog,
-               struct gl_shader_program *shader_program,
-               gl_shader_stage stage)
-{
-   struct pipe_screen *pscreen = st->pipe->screen;
-   enum pipe_shader_type ptarget = pipe_shader_type_from_mesa(stage);
-   const nir_shader_compiler_options *options;
-   nir_shader *nir;
-
-   assert(pscreen->get_compiler_options);   /* drivers using NIR must implement this */
-
-   options = (const nir_shader_compiler_options *)
-      pscreen->get_compiler_options(pscreen, PIPE_SHADER_IR_NIR, ptarget);
-   assert(options);
-
-   if (prog->nir)
-      return prog->nir;
-
-   nir = glsl_to_nir(shader_program, stage, options);
-
-   NIR_PASS_V(nir, nir_lower_io_to_temporaries,
-         nir_shader_get_entrypoint(nir),
-         true, true);
-   NIR_PASS_V(nir, nir_lower_global_vars_to_local);
-   NIR_PASS_V(nir, nir_split_var_copies);
-   NIR_PASS_V(nir, nir_lower_var_copies);
-   NIR_PASS_V(nir, st_nir_lower_builtin);
-   NIR_PASS_V(nir, nir_lower_atomics, shader_program);
-
-   /* fragment shaders may need : */
-   if (stage == MESA_SHADER_FRAGMENT) {
-      static const gl_state_index wposTransformState[STATE_LENGTH] = {
-         STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM
-      };
-      nir_lower_wpos_ytransform_options wpos_options = { { 0 } };
-      struct pipe_screen *pscreen = st->pipe->screen;
-
-      memcpy(wpos_options.state_tokens, wposTransformState,
-             sizeof(wpos_options.state_tokens));
-      wpos_options.fs_coord_origin_upper_left =
-         pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
-      wpos_options.fs_coord_origin_lower_left =
-         pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
-      wpos_options.fs_coord_pixel_center_integer =
-         pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
-      wpos_options.fs_coord_pixel_center_half_integer =
-         pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER);
-
-      if (nir_lower_wpos_ytransform(nir, &wpos_options)) {
-         nir_validate_shader(nir);
-         _mesa_add_state_reference(prog->Parameters, wposTransformState);
-      }
-   }
-
-   if (st->ctx->_Shader->Flags & GLSL_DUMP) {
-      _mesa_log("\n");
-      _mesa_log("NIR IR for linked %s program %d:\n",
-             _mesa_shader_stage_to_string(stage),
-             shader_program->Name);
-      nir_print_shader(nir, _mesa_get_log_file());
-      _mesa_log("\n\n");
-   }
-
-   prog->nir = nir;
-
-   return nir;
-}
-
-/* TODO any better helper somewhere to sort a list? */
-
-static void
-insert_sorted(struct exec_list *var_list, nir_variable *new_var)
-{
-   nir_foreach_variable(var, var_list) {
-      if (var->data.location > new_var->data.location) {
-         exec_node_insert_node_before(&var->node, &new_var->node);
-         return;
-      }
-   }
-   exec_list_push_tail(var_list, &new_var->node);
-}
-
-static void
-sort_varyings(struct exec_list *var_list)
-{
-   struct exec_list new_list;
-   exec_list_make_empty(&new_list);
-   nir_foreach_variable_safe(var, var_list) {
-      exec_node_remove(&var->node);
-      insert_sorted(&new_list, var);
-   }
-   exec_list_move_nodes_to(&new_list, var_list);
-}
-
-/* Second half of preparing nir from glsl, which happens after shader
- * variant lowering.
- */
-void
-st_finalize_nir(struct st_context *st, struct gl_program *prog, nir_shader *nir)
-{
-   struct pipe_screen *screen = st->pipe->screen;
-
-   NIR_PASS_V(nir, nir_split_var_copies);
-   NIR_PASS_V(nir, nir_lower_var_copies);
-   NIR_PASS_V(nir, nir_lower_io_types);
-
-   if (nir->stage == MESA_SHADER_VERTEX) {
-      /* Needs special handling so drvloc matches the vbo state: */
-      st_nir_assign_vs_in_locations(prog, nir);
-      /* Re-lower global vars, to deal with any dead VS inputs. */
-      NIR_PASS_V(nir, nir_lower_global_vars_to_local);
-
-      sort_varyings(&nir->outputs);
-      nir_assign_var_locations(&nir->outputs,
-                               &nir->num_outputs,
-                               type_size);
-      st_nir_fixup_varying_slots(st, &nir->outputs);
-   } else if (nir->stage == MESA_SHADER_FRAGMENT) {
-      sort_varyings(&nir->inputs);
-      nir_assign_var_locations(&nir->inputs,
-                               &nir->num_inputs,
-                               type_size);
-      st_nir_fixup_varying_slots(st, &nir->inputs);
-      nir_assign_var_locations(&nir->outputs,
-                               &nir->num_outputs,
-                               type_size);
-   } else if (nir->stage == MESA_SHADER_COMPUTE) {
-       /* TODO? */
-   } else {
-      unreachable("invalid shader type for tgsi bypass\n");
-   }
-
-   struct gl_shader_program *shader_program;
-   switch (nir->stage) {
-   case MESA_SHADER_VERTEX:
-      shader_program = ((struct st_vertex_program *)prog)->shader_program;
-      break;
-   case MESA_SHADER_FRAGMENT:
-      shader_program = ((struct st_fragment_program *)prog)->shader_program;
-      break;
-   case MESA_SHADER_COMPUTE:
-      shader_program = ((struct st_compute_program *)prog)->shader_program;
-      break;
-   default:
-      assert(!"should not be reached");
-      return;
-   }
-
-   NIR_PASS_V(nir, nir_lower_atomics_to_ssbo,
-         st->ctx->Const.Program[nir->stage].MaxAtomicBuffers);
-
-   st_nir_assign_uniform_locations(prog, shader_program,
-                                   &nir->uniforms, &nir->num_uniforms);
-
-   NIR_PASS_V(nir, nir_lower_system_values);
-
-   if (screen->get_param(screen, PIPE_CAP_NIR_SAMPLERS_AS_DEREF))
-      NIR_PASS_V(nir, nir_lower_samplers_as_deref, shader_program);
-   else
-      NIR_PASS_V(nir, nir_lower_samplers, shader_program);
-}
-
-struct gl_program *
-st_nir_get_mesa_program(struct gl_context *ctx,
-                        struct gl_shader_program *shader_program,
-                        struct gl_linked_shader *shader)
-{
-   struct gl_program *prog;
-
-   validate_ir_tree(shader->ir);
-
-   prog = shader->Program;
-
-   prog->Parameters = _mesa_new_parameter_list();
-
-   do_set_program_inouts(shader->ir, prog, shader->Stage);
-
-   _mesa_copy_linked_program_data(shader_program, shader);
-   _mesa_generate_parameters_list_for_uniforms(ctx, shader_program, shader,
-                                               prog->Parameters);
-
-   /* Make a pass over the IR to add state references for any built-in
-    * uniforms that are used.  This has to be done now (during linking).
-    * Code generation doesn't happen until the first time this shader is
-    * used for rendering.  Waiting until then to generate the parameters is
-    * too late.  At that point, the values for the built-in uniforms won't
-    * get sent to the shader.
-    */
-   foreach_in_list(ir_instruction, node, shader->ir) {
-      ir_variable *var = node->as_variable();
-
-      if ((var == NULL) || (var->data.mode != ir_var_uniform) ||
-          (strncmp(var->name, "gl_", 3) != 0))
-         continue;
-
-      const ir_state_slot *const slots = var->get_state_slots();
-      assert(slots != NULL);
-
-      for (unsigned int i = 0; i < var->get_num_state_slots(); i++) {
-         _mesa_add_state_reference(prog->Parameters,
-                                   (gl_state_index *) slots[i].tokens);
-      }
-   }
-
-   if (ctx->_Shader->Flags & GLSL_DUMP) {
-      _mesa_log("\n");
-      _mesa_log("GLSL IR for linked %s program %d:\n",
-             _mesa_shader_stage_to_string(shader->Stage),
-             shader_program->Name);
-      _mesa_print_ir(_mesa_get_log_file(), shader->ir, NULL);
-      _mesa_log("\n\n");
-   }
-
-   prog->ShadowSamplers = shader->shadow_samplers;
-   prog->ExternalSamplersUsed = gl_external_samplers(prog);
-   _mesa_update_shader_textures_used(shader_program, prog);
-
-   /* Avoid reallocation of the program parameter list, because the uniform
-    * storage is only associated with the original parameter list.
-    * This should be enough for Bitmap and DrawPixels constants.
-    */
-   _mesa_reserve_parameter_storage(prog->Parameters, 8);
-
-   /* This has to be done last.  Any operation the can cause
-    * prog->ParameterValues to get reallocated (e.g., anything that adds a
-    * program constant) has to happen before creating this linkage.
-    */
-   _mesa_associate_uniform_storage(ctx, shader_program, prog, true);
-
-   struct st_vertex_program *stvp;
-   struct st_fragment_program *stfp;
-   struct st_compute_program *stcp;
-
-   switch (shader->Stage) {
-   case MESA_SHADER_VERTEX:
-      stvp = (struct st_vertex_program *)prog;
-      stvp->shader_program = shader_program;
-      break;
-   case MESA_SHADER_FRAGMENT:
-      stfp = (struct st_fragment_program *)prog;
-      stfp->shader_program = shader_program;
-      break;
-   case MESA_SHADER_COMPUTE:
-      stcp = (struct st_compute_program *)prog;
-      stcp->shader_program = shader_program;
-      break;
-   default:
-      assert(!"should not be reached");
-      return NULL;
-   }
-
-   return prog;
-}
-
-} /* extern "C" */
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -456,6 +456,7 @@ public:
   st_src_reg st_src_reg_for_double(double val);
   st_src_reg st_src_reg_for_float(float val);
   st_src_reg st_src_reg_for_int(int val);
+   st_src_reg st_src_reg_for_int64(int64_t val);
   st_src_reg st_src_reg_for_type(enum glsl_base_type type, int val);

   /**
@@ -1225,6 +1226,19 @@ glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
   return src;
 }

+st_src_reg
+glsl_to_tgsi_visitor::st_src_reg_for_int64(int64_t val)
+{
+   st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT64);
+   union gl_constant_value uval[2];
+
+   memcpy(uval, &val, sizeof(uval));
+   src.index = add_constant(src.file, uval, 1, GL_DOUBLE, &src.swizzle);
+   src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y);
+
+   return src;
+}
+
 st_src_reg
 glsl_to_tgsi_visitor::st_src_reg_for_type(enum glsl_base_type type, int val)
 {
@@ -2460,7 +2474,7 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
      break;
   }
   case ir_unop_i642b:
-      emit_asm(ir, TGSI_OPCODE_U64SNE, result_dst, op[0], st_src_reg_for_int(0));
+      emit_asm(ir, TGSI_OPCODE_U64SNE, result_dst, op[0], st_src_reg_for_int64(0));
      break;
   case ir_unop_i642f:
      emit_asm(ir, TGSI_OPCODE_I642F, result_dst, op[0]);
@@ -2843,7 +2857,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
   ir->array->accept(this);
   src = this->result;

-   if (ir->array->ir_type != ir_type_dereference_array) {
+   if (!src.has_index2) {
      switch (this->prog->Target) {
      case GL_TESS_CONTROL_PROGRAM_NV:
         is_2D = (src.file == PROGRAM_INPUT || src.file == PROGRAM_OUTPUT) &&
--- a/src/mesa/vbo/vbo_minmax_index.c
+++ b/src/mesa/vbo/vbo_minmax_index.c
@@ -245,6 +245,7 @@ vbo_get_minmax_index(struct gl_context *ctx,
      _mesa_primitive_restart_index(ctx, ib->index_size);
   const char *indices;
   GLuint i;
+   GLintptr offset = 0;

   indices = (char *) ib->ptr + prim->start * ib->index_size;
   if (_mesa_is_bufferobj(ib->obj)) {
@@ -254,7 +255,8 @@ vbo_get_minmax_index(struct gl_context *ctx,
                                count, min_index, max_index))
         return;

-      indices = ctx->Driver.MapBufferRange(ctx, (GLintptr) indices, size,
+      offset = (GLintptr) indices;
+      indices = ctx->Driver.MapBufferRange(ctx, offset, size,
                                           GL_MAP_READ_BIT, ib->obj,
                                           MAP_INTERNAL);
   }
@@ -337,8 +339,8 @@ vbo_get_minmax_index(struct gl_context *ctx,
   }

   if (_mesa_is_bufferobj(ib->obj)) {
-      vbo_minmax_cache_store(ctx, ib->obj, ib->index_size, prim->start, count,
-                             *min_index, *max_index);
+      vbo_minmax_cache_store(ctx, ib->obj, ib->index_size, offset,
+                             count, *min_index, *max_index);
      ctx->Driver.UnmapBuffer(ctx, ib->obj, MAP_INTERNAL);
   }
 }
--- a/src/util/Makefile.am
+++ b/src/util/Makefile.am
@@ -46,8 +46,10 @@ libmesautil_la_SOURCES = \

 libmesautil_la_LIBADD = \
 	$(CLOCK_LIB) \
-	$(ZLIB_LIBS)
+	$(ZLIB_LIBS) \
+	$(LIBATOMIC_LIBS)

+u_atomic_test_LDADD = libmesautil.la
 roundeven_test_LDADD = -lm

 check_PROGRAMS = u_atomic_test roundeven_test
--- a/src/util/strtod.c
+++ b/src/util/strtod.c
@@ -26,12 +26,12 @@

 #include <stdlib.h>

-#ifdef _GNU_SOURCE
+#if defined(_GNU_SOURCE) && defined(HAVE_STRTOD_L)
 #include <locale.h>
 #ifdef HAVE_XLOCALE_H
 #include <xlocale.h>
-static locale_t loc;
 #endif
+static locale_t loc;
 #endif

 #include "strtod.h"
@@ -40,7 +40,7 @@ static locale_t loc;
 void
 _mesa_locale_init(void)
 {
-#if defined(_GNU_SOURCE) && defined(HAVE_XLOCALE_H)
+#if defined(_GNU_SOURCE) && defined(HAVE_STRTOD_L)
   loc = newlocale(LC_CTYPE_MASK, "C", NULL);
 #endif
 }
@@ -48,7 +48,7 @@ _mesa_locale_init(void)
 void
 _mesa_locale_fini(void)
 {
-#if defined(_GNU_SOURCE) && defined(HAVE_XLOCALE_H)
+#if defined(_GNU_SOURCE) && defined(HAVE_STRTOD_L)
   freelocale(loc);
 #endif
 }
@@ -60,7 +60,7 @@ _mesa_locale_fini(void)
 double
 _mesa_strtod(const char *s, char **end)
 {
-#if defined(_GNU_SOURCE) && defined(HAVE_XLOCALE_H)
+#if defined(_GNU_SOURCE) && defined(HAVE_STRTOD_L)
   return strtod_l(s, end, loc);
 #else
   return strtod(s, end);
@@ -75,7 +75,7 @@ _mesa_strtod(const char *s, char **end)
 float
 _mesa_strtof(const char *s, char **end)
 {
-#if defined(_GNU_SOURCE) && defined(HAVE_XLOCALE_H)
+#if defined(_GNU_SOURCE) && defined(HAVE_STRTOD_L)
   return strtof_l(s, end, loc);
 #elif defined(HAVE_STRTOF)
   return strtof(s, end);
--- a/src/util/u_atomic.c
+++ b/src/util/u_atomic.c
@@ -60,6 +60,20 @@ __sync_sub_and_fetch_8(uint64_t *ptr, uint64_t val)
   return r;
 }

+WEAK uint64_t
+__sync_val_compare_and_swap_8(uint64_t *ptr, uint64_t oldval, uint64_t newval)
+{
+   uint64_t r;
+
+   pthread_mutex_lock(&sync_mutex);
+   r = *ptr;
+   if (*ptr == oldval)
+      *ptr = newval;
+   pthread_mutex_unlock(&sync_mutex);
+
+   return r;
+}
+
 WEAK uint64_t
 __atomic_fetch_add_8(uint64_t *ptr, uint64_t val, int memorder)
 {
--- a/src/vulkan/wsi/wsi_common_wayland.c
+++ b/src/vulkan/wsi/wsi_common_wayland.c
@@ -58,6 +58,9 @@ struct wsi_wl_display {
   struct u_vector                            formats;

   uint32_t                                     capabilities;
+
+   /* Only used for displays created by wsi_wl_display_create */
+   uint32_t                                     refcount;
 };

 struct wsi_wayland {
@@ -66,10 +69,6 @@ struct wsi_wayland {
   const VkAllocationCallbacks *alloc;
   VkPhysicalDevice physical_device;

-   pthread_mutex_t                              mutex;
-   /* Hash table of wl_display -> wsi_wl_display mappings */
-   struct hash_table *                          displays;
-
   const struct wsi_callbacks *cbs;
 };

@@ -98,7 +97,6 @@ wsi_wl_display_add_vk_format(struct wsi_wl_display *display, VkFormat format)
 static void
 drm_handle_device(void *data, struct wl_drm *drm, const char *name)
 {
-   fprintf(stderr, "wl_drm.device(%s)\n", name);
 }

 static uint32_t
@@ -149,6 +147,8 @@ static void
 drm_handle_format(void *data, struct wl_drm *drm, uint32_t wl_format)
 {
   struct wsi_wl_display *display = data;
+   if (display->formats.element_size == 0)
+      return;

   switch (wl_format) {
 #if 0
@@ -250,8 +250,10 @@ static const struct wl_registry_listener registry_listener = {
 };

 static void
-wsi_wl_display_destroy(struct wsi_wayland *wsi, struct wsi_wl_display *display)
+wsi_wl_display_finish(struct wsi_wl_display *display)
 {
+   assert(display->refcount == 0);
+
   u_vector_finish(&display->formats);
   if (display->drm)
      wl_drm_destroy(display->drm);
@@ -259,113 +261,136 @@ wsi_wl_display_destroy(struct wsi_wayland *wsi, struct wsi_wl_display *display)
      wl_proxy_wrapper_destroy(display->wl_display_wrapper);
   if (display->queue)
      wl_event_queue_destroy(display->queue);
-   vk_free(wsi->alloc, display);
 }

-static struct wsi_wl_display *
-wsi_wl_display_create(struct wsi_wayland *wsi, struct wl_display *wl_display)
+static VkResult
+wsi_wl_display_init(struct wsi_wayland *wsi_wl,
+                    struct wsi_wl_display *display,
+                    struct wl_display *wl_display,
+                    bool get_format_list)
 {
-   struct wsi_wl_display *display =
-      vk_alloc(wsi->alloc, sizeof(*display), 8,
-               VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
-   if (!display)
-      return NULL;
-
+   VkResult result = VK_SUCCESS;
   memset(display, 0, sizeof(*display));

-   display->wsi_wl = wsi;
+   display->wsi_wl = wsi_wl;
   display->wl_display = wl_display;

-   if (!u_vector_init(&display->formats, sizeof(VkFormat), 8))
-      goto fail;
+   if (get_format_list) {
+      if (!u_vector_init(&display->formats, sizeof(VkFormat), 8)) {
+         result = VK_ERROR_OUT_OF_HOST_MEMORY;
+         goto fail;
+      }
+   }

   display->queue = wl_display_create_queue(wl_display);
-   if (!display->queue)
+   if (!display->queue) {
+      result = VK_ERROR_OUT_OF_HOST_MEMORY;
      goto fail;
+   }

   display->wl_display_wrapper = wl_proxy_create_wrapper(wl_display);
-   if (!display->wl_display_wrapper)
+   if (!display->wl_display_wrapper) {
+      result = VK_ERROR_OUT_OF_HOST_MEMORY;
      goto fail;
+   }

   wl_proxy_set_queue((struct wl_proxy *) display->wl_display_wrapper,
                      display->queue);

   struct wl_registry *registry =
      wl_display_get_registry(display->wl_display_wrapper);
-   if (!registry)
+   if (!registry) {
+      result = VK_ERROR_OUT_OF_HOST_MEMORY;
      goto fail;
+   }

   wl_registry_add_listener(registry, &registry_listener, display);

   /* Round-trip to get the wl_drm global */
   wl_display_roundtrip_queue(display->wl_display, display->queue);

-   if (!display->drm)
+   if (!display->drm) {
+      result = VK_ERROR_SURFACE_LOST_KHR;
      goto fail_registry;
+   }

   /* Round-trip to get wl_drm formats and capabilities */
   wl_display_roundtrip_queue(display->wl_display, display->queue);

   /* We need prime support */
-   if (!(display->capabilities & WL_DRM_CAPABILITY_PRIME))
+   if (!(display->capabilities & WL_DRM_CAPABILITY_PRIME)) {
+      result = VK_ERROR_SURFACE_LOST_KHR;
      goto fail_registry;
+   }

   /* We don't need this anymore */
   wl_registry_destroy(registry);

-   return display;
+   display->refcount = 0;
+
+   return VK_SUCCESS;

 fail_registry:
   if (registry)
      wl_registry_destroy(registry);

 fail:
-   wsi_wl_display_destroy(wsi, display);
-   return NULL;
+   wsi_wl_display_finish(display);
+   return result;
+}
+
+static VkResult
+wsi_wl_display_create(struct wsi_wayland *wsi, struct wl_display *wl_display,
+                      struct wsi_wl_display **display_out)
+{
+   struct wsi_wl_display *display =
+      vk_alloc(wsi->alloc, sizeof(*display), 8,
+               VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
+   if (!display)
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+   VkResult result = wsi_wl_display_init(wsi, display, wl_display, true);
+   if (result != VK_SUCCESS) {
+      vk_free(wsi->alloc, display);
+      return result;
+   }
+
+   display->refcount++;
+   *display_out = display;
+
+   return result;
 }

 static struct wsi_wl_display *
-wsi_wl_get_display(struct wsi_device *wsi_device,
-                   struct wl_display *wl_display)
+wsi_wl_display_ref(struct wsi_wl_display *display)
 {
-   struct wsi_wayland *wsi =
-      (struct wsi_wayland *)wsi_device->wsi[VK_ICD_WSI_PLATFORM_WAYLAND];
+   display->refcount++;
+   return display;
+}

-   pthread_mutex_lock(&wsi->mutex);
+static void
+wsi_wl_display_unref(struct wsi_wl_display *display)
+{
+   if (display->refcount-- > 1)
+      return;

-   struct hash_entry *entry = _mesa_hash_table_search(wsi->displays,
-                                                      wl_display);
-   if (!entry) {
-      /* We're about to make a bunch of blocking calls.  Let's drop the
-       * mutex for now so we don't block up too badly.
-       */
-      pthread_mutex_unlock(&wsi->mutex);
-
-      struct wsi_wl_display *display = wsi_wl_display_create(wsi, wl_display);
-      if (!display)
-         return NULL;
-
-      pthread_mutex_lock(&wsi->mutex);
-
-      entry = _mesa_hash_table_search(wsi->displays, wl_display);
-      if (entry) {
-         /* Oops, someone raced us to it */
-         wsi_wl_display_destroy(wsi, display);
-      } else {
-         entry = _mesa_hash_table_insert(wsi->displays, wl_display, display);
-      }
-   }
-
-   pthread_mutex_unlock(&wsi->mutex);
-
-   return entry->data;
+   struct wsi_wayland *wsi = display->wsi_wl;
+   wsi_wl_display_finish(display);
+   vk_free(wsi->alloc, display);
 }

 VkBool32
 wsi_wl_get_presentation_support(struct wsi_device *wsi_device,
 				struct wl_display *wl_display)
 {
-   return wsi_wl_get_display(wsi_device, wl_display) != NULL;
+   struct wsi_wayland *wsi =
+      (struct wsi_wayland *)wsi_device->wsi[VK_ICD_WSI_PLATFORM_WAYLAND];
+
+   struct wsi_wl_display display;
+   int ret = wsi_wl_display_init(wsi, &display, wl_display, false);
+   wsi_wl_display_finish(&display);
+
+   return ret == 0;
 }

 static VkResult
@@ -439,21 +464,25 @@ wsi_wl_surface_get_formats(VkIcdSurfaceBase *icd_surface,
                           VkSurfaceFormatKHR* pSurfaceFormats)
 {
   VkIcdSurfaceWayland *surface = (VkIcdSurfaceWayland *)icd_surface;
-   struct wsi_wl_display *display =
-      wsi_wl_get_display(wsi_device, surface->display);
-   if (!display)
-      return VK_ERROR_OUT_OF_HOST_MEMORY;
+   struct wsi_wayland *wsi =
+      (struct wsi_wayland *)wsi_device->wsi[VK_ICD_WSI_PLATFORM_WAYLAND];
+
+   struct wsi_wl_display display;
+   if (wsi_wl_display_init(wsi, &display, surface->display, true))
+      return VK_ERROR_SURFACE_LOST_KHR;

   VK_OUTARRAY_MAKE(out, pSurfaceFormats, pSurfaceFormatCount);

   VkFormat *disp_fmt;
-   u_vector_foreach(disp_fmt, &display->formats) {
+   u_vector_foreach(disp_fmt, &display.formats) {
      vk_outarray_append(&out, out_fmt) {
         out_fmt->format = *disp_fmt;
         out_fmt->colorSpace = VK_COLORSPACE_SRGB_NONLINEAR_KHR;
      }
   }

+   wsi_wl_display_finish(&display);
+
   return vk_outarray_status(&out);
 }

@@ -465,21 +494,25 @@ wsi_wl_surface_get_formats2(VkIcdSurfaceBase *icd_surface,
                            VkSurfaceFormat2KHR* pSurfaceFormats)
 {
   VkIcdSurfaceWayland *surface = (VkIcdSurfaceWayland *)icd_surface;
-   struct wsi_wl_display *display =
-      wsi_wl_get_display(wsi_device, surface->display);
-   if (!display)
-      return VK_ERROR_OUT_OF_HOST_MEMORY;
+   struct wsi_wayland *wsi =
+      (struct wsi_wayland *)wsi_device->wsi[VK_ICD_WSI_PLATFORM_WAYLAND];
+
+   struct wsi_wl_display display;
+   if (wsi_wl_display_init(wsi, &display, surface->display, true))
+      return VK_ERROR_SURFACE_LOST_KHR;

   VK_OUTARRAY_MAKE(out, pSurfaceFormats, pSurfaceFormatCount);

   VkFormat *disp_fmt;
-   u_vector_foreach(disp_fmt, &display->formats) {
+   u_vector_foreach(disp_fmt, &display.formats) {
      vk_outarray_append(&out, out_fmt) {
         out_fmt->surfaceFormat.format = *disp_fmt;
         out_fmt->surfaceFormat.colorSpace = VK_COLORSPACE_SRGB_NONLINEAR_KHR;
      }
   }

+   wsi_wl_display_finish(&display);
+
   return vk_outarray_status(&out);
 }

@@ -532,8 +565,8 @@ struct wsi_wl_image {
 struct wsi_wl_swapchain {
   struct wsi_swapchain                        base;

-   struct wsi_wl_display *                      display;
-   struct wl_event_queue *                      queue;
+   struct wsi_wl_display                        *display;
+
   struct wl_surface *                          surface;
   uint32_t                                     surface_version;
   struct wl_drm *                              drm_wrapper;
@@ -584,7 +617,7 @@ wsi_wl_swapchain_acquire_next_image(struct wsi_swapchain *wsi_chain,
   struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)wsi_chain;

   int ret = wl_display_dispatch_queue_pending(chain->display->wl_display,
-                                               chain->queue);
+                                               chain->display->queue);
   /* XXX: I'm not sure if out-of-date is the right error here.  If
    * wl_display_dispatch_queue_pending fails it most likely means we got
    * kicked by the server so this seems more-or-less correct.
@@ -606,7 +639,7 @@ wsi_wl_swapchain_acquire_next_image(struct wsi_swapchain *wsi_chain,
       * anywhere until we get an event.
       */
      int ret = wl_display_roundtrip_queue(chain->display->wl_display,
-                                           chain->queue);
+                                           chain->display->queue);
      if (ret < 0)
         return VK_ERROR_OUT_OF_DATE_KHR;
   }
@@ -637,7 +670,7 @@ wsi_wl_swapchain_queue_present(struct wsi_swapchain *wsi_chain,
   if (chain->base.present_mode == VK_PRESENT_MODE_FIFO_KHR) {
      while (!chain->fifo_ready) {
         int ret = wl_display_dispatch_queue(chain->display->wl_display,
-                                             chain->queue);
+                                             chain->display->queue);
         if (ret < 0)
            return VK_ERROR_OUT_OF_DATE_KHR;
      }
@@ -757,8 +790,9 @@ wsi_wl_swapchain_destroy(struct wsi_swapchain *wsi_chain,
      wl_proxy_wrapper_destroy(chain->surface);
   if (chain->drm_wrapper)
      wl_proxy_wrapper_destroy(chain->drm_wrapper);
-   if (chain->queue)
-      wl_event_queue_destroy(chain->queue);
+
+   if (chain->display)
+      wsi_wl_display_unref(chain->display);

   vk_free(pAllocator, chain);

@@ -776,6 +810,8 @@ wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
                                struct wsi_swapchain **swapchain_out)
 {
   VkIcdSurfaceWayland *surface = (VkIcdSurfaceWayland *)icd_surface;
+   struct wsi_wayland *wsi =
+      (struct wsi_wayland *)wsi_device->wsi[VK_ICD_WSI_PLATFORM_WAYLAND];
   struct wsi_wl_swapchain *chain;
   VkResult result;

@@ -794,7 +830,6 @@ wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
    */
   for (uint32_t i = 0; i < num_images; i++)
      chain->images[i].buffer = NULL;
-   chain->queue = NULL;
   chain->surface = NULL;
   chain->drm_wrapper = NULL;
   chain->frame = NULL;
@@ -815,32 +850,35 @@ wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
   chain->vk_format = pCreateInfo->imageFormat;
   chain->drm_format = wl_drm_format_for_vk_format(chain->vk_format, alpha);

-   chain->display = wsi_wl_get_display(wsi_device, surface->display);
-   if (!chain->display) {
-      result = VK_ERROR_INITIALIZATION_FAILED;
-      goto fail;
-   }
-
-   chain->queue = wl_display_create_queue(chain->display->wl_display);
-   if (!chain->queue) {
-      result = VK_ERROR_INITIALIZATION_FAILED;
-      goto fail;
+   if (pCreateInfo->oldSwapchain) {
+      /* If we have an oldSwapchain parameter, copy the display struct over
+       * from the old one so we don't have to fully re-initialize it.
+       */
+      struct wsi_wl_swapchain *old_chain = (void *)pCreateInfo->oldSwapchain;
+      chain->display = wsi_wl_display_ref(old_chain->display);
+   } else {
+      chain->display = NULL;
+      result = wsi_wl_display_create(wsi, surface->display, &chain->display);
+      if (result != VK_SUCCESS)
+         goto fail;
   }

   chain->surface = wl_proxy_create_wrapper(surface->surface);
   if (!chain->surface) {
-      result = VK_ERROR_INITIALIZATION_FAILED;
+      result = VK_ERROR_OUT_OF_HOST_MEMORY;
      goto fail;
   }
-   wl_proxy_set_queue((struct wl_proxy *) chain->surface, chain->queue);
+   wl_proxy_set_queue((struct wl_proxy *) chain->surface,
+                      chain->display->queue);
   chain->surface_version = wl_proxy_get_version((void *)surface->surface);

   chain->drm_wrapper = wl_proxy_create_wrapper(chain->display->drm);
   if (!chain->drm_wrapper) {
-      result = VK_ERROR_INITIALIZATION_FAILED;
+      result = VK_ERROR_OUT_OF_HOST_MEMORY;
      goto fail;
   }
-   wl_proxy_set_queue((struct wl_proxy *) chain->drm_wrapper, chain->queue);
+   wl_proxy_set_queue((struct wl_proxy *) chain->drm_wrapper,
+                      chain->display->queue);

   chain->fifo_ready = true;

@@ -881,24 +919,6 @@ wsi_wl_init_wsi(struct wsi_device *wsi_device,
   wsi->physical_device = physical_device;
   wsi->alloc = alloc;
   wsi->cbs = cbs;
-   int ret = pthread_mutex_init(&wsi->mutex, NULL);
-   if (ret != 0) {
-      if (ret == ENOMEM) {
-         result = VK_ERROR_OUT_OF_HOST_MEMORY;
-      } else {
-         /* FINISHME: Choose a better error. */
-         result = VK_ERROR_OUT_OF_HOST_MEMORY;
-      }
-
-      goto fail_alloc;
-   }
-
-   wsi->displays = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
-                                           _mesa_key_pointer_equal);
-   if (!wsi->displays) {
-      result = VK_ERROR_OUT_OF_HOST_MEMORY;
-      goto fail_mutex;
-   }

   wsi->base.get_support = wsi_wl_surface_get_support;
   wsi->base.get_capabilities = wsi_wl_surface_get_capabilities;
@@ -912,11 +932,6 @@ wsi_wl_init_wsi(struct wsi_device *wsi_device,

   return VK_SUCCESS;

-fail_mutex:
-   pthread_mutex_destroy(&wsi->mutex);
-
-fail_alloc:
-   vk_free(alloc, wsi);
 fail:
   wsi_device->wsi[VK_ICD_WSI_PLATFORM_WAYLAND] = NULL;

@@ -929,16 +944,8 @@ wsi_wl_finish_wsi(struct wsi_device *wsi_device,
 {
   struct wsi_wayland *wsi =
      (struct wsi_wayland *)wsi_device->wsi[VK_ICD_WSI_PLATFORM_WAYLAND];
+   if (!wsi)
+      return;

-   if (wsi) {
-      struct hash_entry *entry;
-      hash_table_foreach(wsi->displays, entry)
-         wsi_wl_display_destroy(wsi, entry->data);
-
-      _mesa_hash_table_destroy(wsi->displays, NULL);
-
-      pthread_mutex_destroy(&wsi->mutex);
-
-      vk_free(alloc, wsi);
-   }
+   vk_free(alloc, wsi);
 }