docs: add sha256 checksums for 13.0.6

Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
docs: add release notes for 13.0.6
2017-03-20 11:54:35 +00:00 · 2017-03-20 11:42:19 +00:00 · 2017-03-17 18:13:30 +00:00 · 2017-03-15 23:59:57 +00:00 · 2017-03-15 18:02:32 +00:00 · 2017-03-15 18:02:32 +00:00
334 changed files with 8724 additions and 3061 deletions
--- a/Android.common.mk
+++ b/Android.common.mk
@@ -82,11 +82,13 @@ LOCAL_CFLAGS += \
 	-D__STDC_LIMIT_MACROS
 endif

+ifneq ($(LOCAL_IS_HOST_MODULE),true)
 # add libdrm if there are hardware drivers
 ifneq ($(filter-out swrast,$(MESA_GPU_DRIVERS)),)
 LOCAL_CFLAGS += -DHAVE_LIBDRM
 LOCAL_SHARED_LIBRARIES += libdrm
 endif
+endif

 LOCAL_CPPFLAGS += \
 	$(if $(filter true,$(MESA_LOLLIPOP_BUILD)),-D_USING_LIBCXX) \
--- a/Makefile.am
+++ b/Makefile.am
@@ -40,7 +40,7 @@ AM_DISTCHECK_CONFIGURE_FLAGS = \
 	--enable-vdpau \
 	--enable-xa \
 	--enable-xvmc \
-	--disable-llvm-shared-libs \
+	--enable-llvm-shared-libs \
 	--with-egl-platforms=x11,wayland,drm,surfaceless \
 	--with-dri-drivers=i915,i965,nouveau,radeon,r200,swrast \
 	--with-gallium-drivers=i915,ilo,nouveau,r300,r600,radeonsi,freedreno,svga,swrast,vc4,virgl,swr \
@@ -62,6 +62,7 @@ noinst_HEADERS = \
 	include/c99_math.h \
 	include/c11 \
 	include/D3D9 \
+	include/GL/wglext.h \
 	include/HaikuGL \
 	include/no_extern_c.h \
 	include/pci_ids
--- a/2
+++ b/2
@@ -1 +1 @@
-12.1.0-devel
+13.0.6
--- a/bin/.cherry-ignore
+++ b/bin/.cherry-ignore
@@ -0,0 +1,28 @@
+# Commit was picked with -x
+907ace57986733add2aebfa9dd7c83c67efed70e mapi: automake: set VISIBILITY_CFLAGS for shared glapi
+
+# Commit was reverted shortly after it landed in master
+a39ad185932eab4f25a0cb2b112c10d8700ef242 configure.ac: honour LLVM_LIBDIR when linking against LLVM
+
+# Commit fixes an earlier patch which is quite invasive to be considered for stable.
+157971e450c34ec430c295ff922c2e597294aba3 i965/blit: Fix the src dimension sanity check in miptree_copy
+
+# Similar to the above - depends on the series which introduce intel_miptree_copy
+b18cd8ce2c07c2d1a666fbff1f0d92d17dd5b22c i965/miptree: Use intel_miptree_copy for maps
+
+# The commit is a backport of an identical anv one. The latter is not in stable
+# and so does this one since they depend on functionality which is not in stable.
+65cbb993d33976d9ee24eff01ade8ed9013617ca radv: Call nir_lower_constant_initializers.
+
+# Commit causes regression on i915, and Nicolai requested that we drop it all together.
+963311b71fd9900351a4a9dd1cd5f5db391f7e1b mesa/main: fix version/extension checks in _mesa_ClampColor
+
+# Misnominated (only previous commit was meant to be for stable)
+36b9976e1f99e8070c67cb8a255793939db77d02 egl/wayland: Avoid race conditions when on non-main thread
+
+# The optimisation itself is broken and was removed completely
+a4393bd97fe62e8299273bae769201c5c9c816ea i965/fs: Fix the inline nir_op_pack_double optimization
+
+# There is no ANV fast_clear support in branch
+42b10b175d5e8dfb9c4c46edbc306e7fac6bd3ec anv/blorp/clear_subpass: Only set surface clear color for fast clears
+6b644e571e2344691e4d58ff0bba3ddc059c1a5d anv: Stall before fast-clear operations
--- a/bin/get-extra-pick-list.sh
+++ b/bin/get-extra-pick-list.sh
@@ -10,26 +10,28 @@
 # $ bin/get-extra-pick-list.sh | tee picklist

 # Use the last branchpoint as our limit for the search
-# XXX: there should be a better way for this
-latest_branchpoint=`git branch | grep \* | cut -c 3-`-branchpoint
+latest_branchpoint=`git merge-base origin/master HEAD`

 # Grep for commits with "cherry picked from commit" in the commit message.
 git log --reverse --grep="cherry picked from commit" $latest_branchpoint..HEAD |\
 	grep "cherry picked from commit" |\
-	sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' |\
-	cut -c -8 |\
+	sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//'  > already_picked
+
+# For each cherry-picked commit...
+cat already_picked | cut -c -8 |\
 while read sha
 do
-	# Check if the original commit is referenced in master
+	# ... check if it's referenced (fixed by another) patch
 	git log -n1 --pretty=oneline --grep=$sha $latest_branchpoint..origin/master |\
 		cut -c -8 |\
 	while read candidate
 	do
-		# Check if the potential fix, hasn't landed in branch yet.
-		found=`git log -n1 --pretty=oneline --reverse --grep=$candidate $latest_branchpoint..HEAD |wc -l`
-		if test $found = 0
-		then
-			echo Commit $candidate might need to be picked, as it references $sha
+		# And flag up if it hasn't landed in branch yet.
+		if grep -q ^$candidate already_picked ; then
+			continue
 		fi
+		echo Commit $candidate references $sha
 	done
 done
+
+rm -f already_picked
--- a/bin/get-fixes-pick-list.sh
+++ b/bin/get-fixes-pick-list.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+
+# Script for generating a list of candidates [referenced by a Fixes tag] for
+# cherry-picking to a stable branch
+#
+# Usage examples:
+#
+# $ bin/get-fixes-pick-list.sh
+# $ bin/get-fixes-pick-list.sh > picklist
+# $ bin/get-fixes-pick-list.sh | tee picklist
+
+# Use the last branchpoint as our limit for the search
+latest_branchpoint=`git merge-base origin/master HEAD`
+
+# List all the commits between day 1 and the branch point...
+git log --reverse --pretty=%H $latest_branchpoint > already_landed
+
+# ... and the ones cherry-picked.
+git log --reverse --grep="cherry picked from commit" $latest_branchpoint..HEAD |\
+	grep "cherry picked from commit" |\
+	sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//'  > already_picked
+
+# Grep for commits with Fixes tag
+git log --reverse --pretty=%H -i --grep="fixes:" $latest_branchpoint..origin/master |\
+while read sha
+do
+	# For each one try to extract the tag
+	fixes_count=`git show $sha | grep -i "fixes:" | wc -l`
+	if [ "x$fixes_count" != x1 ] ; then
+		echo WARNING: Commit $sha has nore than one Fixes tag
+	fi
+	fixes=`git show $sha | grep -i "fixes:" | head -n 1`
+	# The following sed/cut combination is borrowed from GregKH
+	id=`echo ${fixes} | sed -e 's/^[ \t]*//' | cut -f 2 -d ':' | sed -e 's/^[ \t]*//' | cut -f 1 -d ' '`
+
+	# Bail out if we cannot find suitable id.
+	# Any specific validation the $id is valid and not some junk, is
+	# implied with the follow up code
+	if [ "x$id" = x ] ; then
+		continue
+	fi
+
+	# Check if the offending commit is in branch.
+
+	# Be that cherry-picked ...
+	# ... or landed before the branchpoint.
+	if grep -q ^$id already_picked ||
+	   grep -q ^$id already_landed ; then
+
+		# Finally nominate the fix if it hasn't landed yet.
+		if grep -q ^$sha already_picked ; then
+			continue
+		fi
+
+		echo Commit $sha fixes $id
+	fi
+
+done
+
+rm -f already_picked
+rm -f already_landed
--- a/bin/get-pick-list.sh
+++ b/bin/get-pick-list.sh
@@ -8,13 +8,16 @@
 # $ bin/get-pick-list.sh > picklist
 # $ bin/get-pick-list.sh | tee picklist

+# Use the last branchpoint as our limit for the search
+latest_branchpoint=`git merge-base origin/master HEAD`
+
 # Grep for commits with "cherry picked from commit" in the commit message.
-git log --reverse --grep="cherry picked from commit" origin/master..HEAD |\
+git log --reverse --grep="cherry picked from commit" $latest_branchpoint..HEAD |\
 	grep "cherry picked from commit" |\
 	sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked

 # Grep for commits that were marked as a candidate for the stable tree.
-git log --reverse --pretty=%H -i --grep='^\([[:space:]]*NOTE: .*[Cc]andidate\|CC:.*mesa-stable\)' HEAD..origin/master |\
+git log --reverse --pretty=%H -i --grep='^CC:.*mesa-stable' $latest_branchpoint..origin/master |\
 while read sha
 do
 	# Check to see whether the patch is on the ignore list.
--- a/bin/get-typod-pick-list.sh
+++ b/bin/get-typod-pick-list.sh
@@ -0,0 +1,42 @@
+#!/bin/sh
+
+# Script for generating a list of candidates which have typos in the nomination line
+#
+# Usage examples:
+#
+# $ bin/get-typod-pick-list.sh
+# $ bin/get-typod-pick-list.sh > picklist
+# $ bin/get-typod-pick-list.sh | tee picklist
+
+# NB:
+# This script intentionally _never_ checks for specific version tag
+# Should we consider folding it with the original get-pick-list.sh
+
+# Use the last branchpoint as our limit for the search
+latest_branchpoint=`git merge-base origin/master HEAD`
+
+# Grep for commits with "cherry picked from commit" in the commit message.
+git log --reverse --grep="cherry picked from commit" $latest_branchpoint..HEAD |\
+	grep "cherry picked from commit" |\
+	sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked
+
+# Grep for commits that were marked as a candidate for the stable tree.
+git log --reverse --pretty=%H -i --grep='^CC:.*mesa-dev' $latest_branchpoint..origin/master |\
+while read sha
+do
+	# Check to see whether the patch is on the ignore list.
+	if [ -f bin/.cherry-ignore ] ; then
+		if grep -q ^$sha bin/.cherry-ignore ; then
+			continue
+		fi
+	fi
+
+	# Check to see if it has already been picked over.
+	if grep -q ^$sha already_picked ; then
+		continue
+	fi
+
+	git log -n1 --pretty=oneline $sha | cat
+done
+
+rm -f already_picked
--- a/configure.ac
+++ b/configure.ac
@@ -1377,6 +1377,9 @@ AC_ARG_ENABLE([driglx-direct],
 dnl
 dnl libGL configuration per driver
 dnl
+if test "x$enable_glx" != xno; then
+    PKG_CHECK_MODULES([GLPROTO], [glproto >= $GLPROTO_REQUIRED])
+fi
 case "x$enable_glx" in
 xxlib | xgallium-xlib)
    # Xlib-based GLX
@@ -1390,7 +1393,6 @@ xxlib | xgallium-xlib)
    ;;
 xdri)
    # DRI-based GLX
-    PKG_CHECK_MODULES([GLPROTO], [glproto >= $GLPROTO_REQUIRED])

    # find the DRI deps for libGL
    dri_modules="x11 xext xdamage xfixes x11-xcb xcb xcb-glx >= $XCBGLX_REQUIRED"
@@ -1648,7 +1650,7 @@ fi
 AC_ARG_WITH([vulkan-drivers],
    [AS_HELP_STRING([--with-vulkan-drivers@<:@=DIRS...@:>@],
        [comma delimited Vulkan drivers list, e.g.
-        "intel"
+        "intel,radeon"
        @<:@default=no@:>@])],
    [with_vulkan_drivers="$withval"],
    [with_vulkan_drivers="no"])
@@ -1667,13 +1669,6 @@ AC_ARG_WITH([vulkan-icddir],
    [VULKAN_ICD_INSTALL_DIR='${datarootdir}/vulkan/icd.d'])
 AC_SUBST([VULKAN_ICD_INSTALL_DIR])

-AC_ARG_ENABLE([vulkan-icd-full-driver-path],
-   [AS_HELP_STRING([--disable-vulkan-icd-full-driver-path],
-                   [create Vulkan ICD files with just a .so name and no path])],
-   [vulkan_icd_driver_path="$enableval"],
-   [vulkan_icd_driver_path="yes"])
-AM_CONDITIONAL(VULKAN_ICD_DRIVER_PATH, test "x$vulkan_icd_driver_path" = xyes)
-
 if test -n "$with_vulkan_drivers"; then
    VULKAN_DRIVERS=`IFS=', '; echo $with_vulkan_drivers`
    for driver in $VULKAN_DRIVERS; do
@@ -1979,6 +1974,21 @@ if test "x$enable_opencl" = xyes; then
    if test "x$have_libelf" != xyes; then
       AC_MSG_ERROR([Clover requires libelf])
    fi
+
+    if test "x${ac_cv_cxx_compiler_gnu}" = xyes; then
+        altivec_enabled=no
+        AC_COMPILE_IFELSE([AC_LANG_SOURCE([
+        #if !defined(__VEC__) || !defined(__ALTIVEC__)
+        #error "AltiVec not enabled"
+        #endif
+        ])], altivec_enabled=yes)
+
+        if test "$altivec_enabled" = yes; then
+            CLOVER_STD_OVERRIDE="-std=gnu++11"
+        fi
+        AC_SUBST([CLOVER_STD_OVERRIDE])
+    fi
+
 fi
 AM_CONDITIONAL(HAVE_CLOVER, test "x$enable_opencl" = xyes)
 AM_CONDITIONAL(HAVE_CLOVER_ICD, test "x$enable_opencl_icd" = xyes)
--- a/docs/relnotes/13.0.0.html
+++ b/docs/relnotes/13.0.0.html
@@ -14,7 +14,7 @@
 <iframe src="../contents.html"></iframe>
 <div class="content">

-<h1>Mesa 13.0.0 Release Notes / TBD</h1>
+<h1>Mesa 13.0.0 Release Notes / November 1, 2016</h1>

 <p>
 Mesa 13.0.0 is a new development release.
@@ -33,7 +33,8 @@ because compatibility contexts are not supported.

 <h2>SHA256 checksums</h2>
 <pre>
-TBD.
+4a54d7cdc1a94a8dae05a75ccff48356406d51b0d6a64cbdc641c266e3e008eb  mesa-13.0.0.tar.gz
+94edb4ebff82066a68be79d9c2627f15995e1fe10f67ab3fc63deb842027d727  mesa-13.0.0.tar.xz
 </pre>


@@ -74,11 +75,236 @@ Note: some of the new features are only available with certain drivers.

 <h2>Bug fixes</h2>

-TBD.
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=61907">Bug 61907</a> - Indirect rendering of multi-texture vertex arrays broken</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=69622">Bug 69622</a> - eglTerminate then eglMakeCurrent crahes</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=71759">Bug 71759</a> - Intel driver fails with &quot;intel_do_flush_locked failed: No such file or directory&quot; if buffer imported with EGL_NATIVE_PIXMAP_KHR</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=83036">Bug 83036</a> - [ILK]Piglit spec_ARB_copy_image_arb_copy_image-formats fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89599">Bug 89599</a> - symbol 'x86_64_entry_start' is already defined when building with LLVM/clang</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90513">Bug 90513</a> - Odd gray and red flicker in The Talos Principle on GK104</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91342">Bug 91342</a> - Very dark textures on some objects in indoors environments in Postal 2</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92306">Bug 92306</a> - GL Excess demo renders incorrectly on nv43</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94148">Bug 94148</a> - Framebuffer considered invalid when a draw call is done before glCheckFramebufferStatus</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94354">Bug 94354</a> - R9285 Unigine Valley perf regression since radeonsi: use re-Z</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94561">Bug 94561</a> - [llvmpipe] PIPE_CAP_VIDEO_MEMORY reports negative value on 32 bits (with 16GB ram)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94627">Bug 94627</a> - Game Risen on wine black grass</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94681">Bug 94681</a> - dEQP-GLES31.functional.ssbo.layout.random.all_shared_buffer.23 takes 25 minutes to compile</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95000">Bug 95000</a> - deqp: assert in dEQP-GLES3.functional.vertex_arrays.single_attribute.strides.fixed.user_ptr_stride17_components2_quads1</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95130">Bug 95130</a> - Derivatives of gl_Color wrong when helper pixels used</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95246">Bug 95246</a> - Segfault in glBindFramebuffer()</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95419">Bug 95419</a> - [HSW][regression][bisect] RPG Maker game gives &quot;invalid floating point operation&quot; at startup</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95462">Bug 95462</a> - [BXT,BSW] arb_gpu_shader_fp64 causes gpu hang</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95529">Bug 95529</a> - [regression, bisected] Image corruption in Chrome</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96235">Bug 96235</a> - st_nir.h:34: error: redefinition of typedef ‘nir_shader’</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96274">Bug 96274</a> - [NVC0] Failure when compiling compute shader: Assertion `bb-&gt;getFirst()-&gt;serial &lt;= bb-&gt;getExit()-&gt;serial' failed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96285">Bug 96285</a> - Mesa build broken</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96299">Bug 96299</a> - [vulkan] 64 regressions due to mesa d5f2f32</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96343">Bug 96343</a> - oom since st/mesa: implement PBO downloads for ReadPixels</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96346">Bug 96346</a> - [SNB,CTS] es2-cts.gtf.gl.atan regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96349">Bug 96349</a> - [CTS,SKL,BSW,BDW,KBL,BXT] es31-cts.arrays_of_arrays.interactionuniformbuffers3</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96351">Bug 96351</a> - [CTS,SKL,KBL,BXT] es2-cts.gtf.gl2extensiontests.egl_image.egl_image</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96358">Bug 96358</a> - SSO: wrong interface validation between GS and VS (regresion due to latest gles 3.1)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96425">Bug 96425</a> - [bisected] occasional dark render in The Talos Principle</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96484">Bug 96484</a> - [vulkan] deqp-vk.glsl.builtin.precision.sin / cos regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96504">Bug 96504</a> - [vulkancts] compute tests crash</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96516">Bug 96516</a> - [bisected: 482526] &quot;clover: Update OpenCL version string to match OpenGL&quot;: clover's build fails because of missing git_sha1.h</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96528">Bug 96528</a> - Location qualifier segfaults during shader compilation</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96541">Bug 96541</a> - Tonga Unreal elemental bad rendering since radeonsi: Decompress DCC textures in a render feedback loop</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96565">Bug 96565</a> - Clive Barker's Jericho displays strange,vivid colors when motion blur enabled</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96607">Bug 96607</a> - [bisected] texture misrender / flicker in The Talos Principle on SKL</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96617">Bug 96617</a> - gl_SecondaryFragDataEXT doesn't work for extended blend func</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96629">Bug 96629</a> - dEQP-GLES2.functional.texture.completeness.cube.not_positive_level_0: Assertion `width &gt;= 1' failed.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96639">Bug 96639</a> - st/mesa: transfer_map with too-high level with dEQP-GLES2.functional.texture.completeness.cube.extra_level</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96674">Bug 96674</a> - [SNB, ILK] spec.ext_image_dma_buf_import.ext_image_dma_buf_import-sample_nv1</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96729">Bug 96729</a> - Wrong shader compilation error message</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96762">Bug 96762</a> - [radeonsi,apitrace] Firewatch: nothing rendered in scrollable (text) areas</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96765">Bug 96765</a> - BindFragDataLocationIndexed on array fragment shader output.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96770">Bug 96770</a> - include/GL/mesa_glinterop.h:62: error: redefinition of typedef ‘GLXContext’</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96782">Bug 96782</a> - [regression bisected] R600 fp64 and glsl-4.00 piglit failures</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96791">Bug 96791</a> - Cannot use image from swapchains for sampling</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96825">Bug 96825</a> - anv_device.c:31:27: fatal error: anv_timestamp.h: No such file or directory</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96835">Bug 96835</a> - &quot;gallium: Force blend color to 16-byte alignment&quot; crash with &quot;-march=native -O3&quot; causes some 32bit games to crash</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96850">Bug 96850</a> - Crucible tests fail for 32bit mesa</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96878">Bug 96878</a> - [Bisected: cc2d0e6][HSW] &quot;GPU HANG&quot; msg after autologin to gnome-session</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96908">Bug 96908</a> - [radeonsi] MSAA causes graphical artifacts</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96911">Bug 96911</a> - webgl2 conformance2/textures/misc/tex-mipmap-levels.html crashes 12.1 Intel driver</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96949">Bug 96949</a> - [regression] Piglit numSamples assertion failures with 9a23a177b90</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96950">Bug 96950</a> - Another regression from bc4e0c486: vbo: Use a bitmask to track the active arrays in vbo_exec*.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96971">Bug 96971</a> - invariant qualifier is not valid for shader inputs</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97019">Bug 97019</a> - [clover] build failure in llvm/codegen/native.cpp:129:52</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97032">Bug 97032</a> - [BDW,SKL] piglit.spec.arb_gpu_shader5.arb_gpu_shader5-interpolateatcentroid-flat</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97033">Bug 97033</a> - [BDW,SKL] piglit.spec.arb_gpu_shader_fp64.varying-packing.simple regressions</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97039">Bug 97039</a> - The Talos Principle and Serious Sam 3 GPU faults</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97083">Bug 97083</a> - [IVB,BYT] GPU hang on deqp-gles31.functional.separate.shader.random</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97140">Bug 97140</a> - dd_draw.c:949:11: error: implicit declaration of function 'fmemopen' is invalid in C99 [-Werror,-Wimplicit-function-declaration]</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97207">Bug 97207</a> - [IVY BRIDGE] Fragment shader discard writing to depth</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97214">Bug 97214</a> - X not running with error &quot;Failed to make EGL context current&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97225">Bug 97225</a> - [i965 on HD4600 Haswell] xcom switch to ingame cinematics cause segmentation fault</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97231">Bug 97231</a> - GL_DEPTH_CLAMP doesn't clamp to the far plane</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97233">Bug 97233</a> - vkQuake VkSpecializationMapEntry related bug</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97260">Bug 97260</a> - R9 290 low performance in Linux 4.7</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97267">Bug 97267</a> - [BDW] GL45-CTS.texture_cube_map_array.sampling asserts inside brw_fs.cpp</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97278">Bug 97278</a> - [vulkancts,HSW] all vulkancts tests assert on HSW</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97285">Bug 97285</a> - Darkness in Dota 2 after Patch &quot;Make Gallium's BlitFramebuffer follow the GL 4.4 sRGB rules&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97286">Bug 97286</a> - `make check` fails uniform-initializer-test</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97305">Bug 97305</a> - Gallium: TBOs and images set the offset in elements, not bytes</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97307">Bug 97307</a> - glsl/glcpp/tests/glcpp-test regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97309">Bug 97309</a> - piglit.spec.glsl-1_30.compiler.switch-statement.switch-case-duplicated.vert regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97322">Bug 97322</a> - GenerateMipmap creates wrong mipmap for sRGB texture</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97331">Bug 97331</a> - glDrawElementsBaseVertex doesn't work in display list on i915</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97351">Bug 97351</a> - DrawElementsBaseVertex with VBO ignores base vertex on Intel GMA 9xx in some cases</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97413">Bug 97413</a> - BioShock Infinite crashes on startup with Mesa Git version, R7 370</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97426">Bug 97426</a> - glScissor gives vertically inverted result</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97448">Bug 97448</a> - [HSW] deqp-vk.api_.copy_and_blit.image_to_image_stencil regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97476">Bug 97476</a> - Shader binaries should not be stored in the PipelineCache</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97477">Bug 97477</a> - i915g: gl_FragCoord is always (0.0, max_y)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97513">Bug 97513</a> - clover reports wrong device pointer size</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97549">Bug 97549</a> - [SNB, BXT] up to 40% perf drop from &quot;loader/dri3: Overhaul dri3_update_num_back&quot; commit</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97587">Bug 97587</a> - make check nir/tests/control_flow_tests regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97761">Bug 97761</a> - es2-cts.gtf.gl2extensiontests.egl_image_external.testsimpleunassociated crashes</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97773">Bug 97773</a> - New Mesa master now results in warnings in glrender (and subsurfaces and simple-egl), black screen</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97779">Bug 97779</a> - [regression, bisected][BDW, GPU hang] stuck on render ring, always reproducible</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97790">Bug 97790</a> - Vulkan cts regressions due to 24be63066</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97804">Bug 97804</a> - Later precision statement isn't overriding earlier one</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97808">Bug 97808</a> - &quot;tgsi/scan: don't set interp flags for inputs only used by INTERP instructions&quot; causes glitches in wine with gallium nine</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97887">Bug 97887</a> - llvm segfault in janusvr -render vive</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97894">Bug 97894</a> - Crash in u_transfer_unmap_vtbl when unmapping a buffer mapped in different context</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97952">Bug 97952</a> - /usr/include/string.h:518:12: error: exception specification in declaration does not match previous declaration</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97969">Bug 97969</a> - [radeonsi, bisected: fb827c0] Video decoding shows green artifacts</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97976">Bug 97976</a> - VCE regression BO to small for addr since winsys/amdgpu: enable buffer allocation from slabs</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98005">Bug 98005</a> - VCE dual instance encoding inconsistent since st/va: enable dual instances encode by sync surface</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98025">Bug 98025</a> - [radeonsi] incorrect primitive restart index used</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98128">Bug 98128</a> - nir/tests/control_flow_tests.cpp:79:73: error: ‘nir_loop_first_cf_node’ was not declared in this scope</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98131">Bug 98131</a> - Compiler should reject lowp/mediump qualifiers on atomic_uints</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98133">Bug 98133</a> - GetSynciv should raise an error if bufSize &lt; 0</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98134">Bug 98134</a> - dEQP-GLES31.functional.debug.negative_coverage.get_error.buffer.draw_buffers wants a different GL error code</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98135">Bug 98135</a> - dEQP-GLES31.functional.debug.negative_coverage.get_error.shader.transform_feedback_varyings wants a different GL error code</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98167">Bug 98167</a> - [vulkan, radv] missing libgcrypt and openssl devel results in linker error in libvulkan_common</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98172">Bug 98172</a> - Concurrent call to glClientWaitSync results in segfault in one of the waiters.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98244">Bug 98244</a> - dEQP: textureOffset(sampler2DArrayShadow, ...) should not exist.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98264">Bug 98264</a> - Build broken for i965 due to multiple deifnitions of intelFenceExtension</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98307">Bug 98307</a> - &quot;st/glsl_to_tgsi: explicitly track all input and output declaration&quot; broke flightgear colors on rs780</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98326">Bug 98326</a> - [dEQP, EGL] pbuffer depth/stencil tests fail</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98415">Bug 98415</a> - Vulkan Driver JSON file contains incorrect field</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98431">Bug 98431</a> - UnrealEngine v4 demos startup fails to blorp blit assert</li>
+
+</ul>
+

 <h2>Changes</h2>

-TBD.
+Mesa no longer depends on libudev.

 </div>
 </body>
--- a/docs/relnotes/13.0.1.html
+++ b/docs/relnotes/13.0.1.html
@@ -0,0 +1,188 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 13.0.1 Release Notes / November 14, 2016</h1>
+
+<p>
+Mesa 13.0.1 is a bug fix release which fixes bugs found since the 13.0.0 release.
+</p>
+<p>
+Mesa 13.0.1 implements the OpenGL 4.4 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.4.  OpenGL
+4.4 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+7cbb91dead05cde279ee95f86e8321c8e1c8fc9deb88f12e0f587672a10d88c5  mesa-13.0.1.tar.gz
+71962fb2bf77d33b0ad4a565b490dbbeaf4619099c6d9722f04a73187957a731  mesa-13.0.1.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97715">Bug 97715</a> - [ILK,G45,G965] piglit.spec.arb_separate_shader_objects.misc api error checks</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98012">Bug 98012</a> - [IVB] Segfault when running Dolphin twice with Vulkan</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98512">Bug 98512</a> - radeon r600 vdpau: Invalid command stream: texture bo too small</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Adam Jackson (2):</p>
+<ul>
+  <li>glx/glvnd: Don't modify the dummy slot in the dispatch table</li>
+  <li>glx/glvnd: Fix dispatch function names and indices</li>
+</ul>
+
+<p>Andreas Boll (1):</p>
+<ul>
+  <li>glx/windows: Add wgl.h to the sources list</li>
+</ul>
+
+<p>Anuj Phogat (1):</p>
+<ul>
+  <li>i965: Fix GPU hang related to multiple render targets and alpha testing</li>
+</ul>
+
+<p>Chih-Wei Huang (1):</p>
+<ul>
+  <li>android: avoid using libdrm with host modules</li>
+</ul>
+
+<p>Darren Salt (1):</p>
+<ul>
+  <li>radv/pipeline: Don't dereference NULL dynamic state pointers</li>
+</ul>
+
+<p>Dave Airlie (8):</p>
+<ul>
+  <li>radv: expose xlib platform extension</li>
+  <li>radv: fix dual source blending</li>
+  <li>Revert "st/vdpau: use linear layout for output surfaces"</li>
+  <li>radv: emit correct last export when Z/stencil export is enabled</li>
+  <li>ac/nir: add support for discard_if intrinsic (v2)</li>
+  <li>nir: add conditional discard optimisation (v4)</li>
+  <li>radv: enable conditional discard optimisation on radv.</li>
+  <li>radv: fix GetFenceStatus for signaled fences</li>
+</ul>
+
+<p>Emil Velikov (6):</p>
+<ul>
+  <li>docs: add sha256 checksums for 13.0.0</li>
+  <li>amd/addrlib: limit fastcall/regparm to GCC i386</li>
+  <li>anv: use correct .specVersion for extensions</li>
+  <li>radv: use correct .specVersion for extensions</li>
+  <li>radv: Suffix the radeon_icd file with the host CPU</li>
+  <li>Update version to 13.0.1</li>
+</ul>
+
+<p>Eric Anholt (1):</p>
+<ul>
+  <li>vc4: Use Newton-Raphson on the 1/W write to fix glmark2 terrain.</li>
+</ul>
+
+<p>Francisco Jerez (1):</p>
+<ul>
+  <li>nir: Flip gl_SamplePosition in nir_lower_wpos_ytransform().</li>
+</ul>
+
+<p>Fredrik Höglund (1):</p>
+<ul>
+  <li>radv: add support for anisotropic filtering on VI+</li>
+</ul>
+
+<p>Jason Ekstrand (21):</p>
+<ul>
+  <li>anv/device: Return DEVICE_LOST if execbuf2 fails</li>
+  <li>vulkan/wsi/x11: Better handle wsi_x11_connection_create failure</li>
+  <li>vulkan/wsi/x11: Clean up connections in finish_wsi</li>
+  <li>anv: Better handle return codes from anv_physical_device_init</li>
+  <li>intel/blorp: Use wm_prog_data instead of hand-rolling our own</li>
+  <li>intel/blorp: Pass a brw_stage_prog_data to upload_shader</li>
+  <li>anv/pipeline: Put actual pointers in anv_shader_bin</li>
+  <li>anv/pipeline: Properly cache prog_data::param</li>
+  <li>intel/blorp: Emit all the binding tables</li>
+  <li>anv/device: Add an execbuf wrapper</li>
+  <li>anv: Add a cmd_buffer_execbuf helper</li>
+  <li>anv: Don't presume to know what address is in a surface relocation</li>
+  <li>anv: Add a new bo_pool_init helper</li>
+  <li>anv/allocator: Simplify anv_scratch_pool</li>
+  <li>anv: Initialize anv_bo::offset to -1</li>
+  <li>anv/batch_chain: Improve write_reloc</li>
+  <li>anv: Add an anv_execbuf helper struct</li>
+  <li>anv/batch: Move last_ss_pool_bo_offset to the command buffer</li>
+  <li>anv: Move relocation handling from EndCommandBuffer to QueueSubmit</li>
+  <li>anv/cmd_buffer: Take a command buffer instead of a batch in two helpers</li>
+  <li>anv/cmd_buffer: Enable a CS stall workaround for Sky Lake gt4</li>
+</ul>
+
+<p>Kenneth Graunke (2):</p>
+<ul>
+  <li>glsl: Update deref types when resizing implicitly sized arrays.</li>
+  <li>mesa: Fix pixel shader scratch space allocation on Gen9+ platforms.</li>
+</ul>
+
+<p>Kristian Høgsberg (1):</p>
+<ul>
+  <li>anv: Do relocations in userspace before execbuf ioctl</li>
+</ul>
+
+<p>Marek Olšák (4):</p>
+<ul>
+  <li>egl: use util/macros.h</li>
+  <li>egl: make interop ABI visible again</li>
+  <li>glx: make interop ABI visible again</li>
+  <li>radeonsi: fix an assertion failure in si_decompress_sampler_color_textures</li>
+</ul>
+
+<p>Nicolai Hähnle (4):</p>
+<ul>
+  <li>radeonsi: fix BFE/BFI lowering for GLSL semantics</li>
+  <li>glsl: fix lowering of UBO references of named blocks</li>
+  <li>st/glsl_to_tgsi: fix dvec[34] loads from SSBO</li>
+  <li>st/mesa: fix the layer of VDPAU surface samplers</li>
+</ul>
+
+<p>Steven Toth (3):</p>
+<ul>
+  <li>gallium/hud: fix a problem where objects are free'd while in use.</li>
+  <li>gallium/hud: close a previously opened handle</li>
+  <li>gallium/hud: protect against and initialization race</li>
+</ul>
+
+<p>Timothy Arceri (1):</p>
+<ul>
+  <li>mesa/glsl: delete previously linked shaders earlier when linking</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/13.0.2.html
+++ b/docs/relnotes/13.0.2.html
@@ -0,0 +1,189 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 13.0.2 Release Notes / November 28, 2016</h1>
+
+<p>
+Mesa 13.0.2 is a bug fix release which fixes bugs found since the 13.0.1 release.
+</p>
+<p>
+Mesa 13.0.2 implements the OpenGL 4.4 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.4.  OpenGL
+4.4 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+6014233a5db6032ab8de4881384871bbe029de684502707794ce7b3e6beec308  mesa-13.0.2.tar.gz
+a6ed622645f4ed61da418bf65adde5bcc4bb79023c36ba7d6b45b389da4416d5  mesa-13.0.2.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97321">Bug 97321</a> - Query INFO_LOG_LENGTH for empty info log should return 0</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97420">Bug 97420</a> - &quot;#version 0&quot; crashes glsl_compiler</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98632">Bug 98632</a> - Fix build on Hurd without PATH_MAX</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Ben Widawsky (3):</p>
+<ul>
+  <li>i965: Add some APL and KBL SKU strings</li>
+  <li>i965: Reorder PCI ID list to match release order</li>
+  <li>i965/glk: Add basic Geminilake support</li>
+</ul>
+
+<p>Dave Airlie (14):</p>
+<ul>
+  <li>radv: fix texturesamples to handle single sample case</li>
+  <li>wsi: fix VK_INCOMPLETE for vkGetSwapchainImagesKHR</li>
+  <li>radv: don't crash on null swapchain destroy.</li>
+  <li>ac/nir/llvm: fix channel in texture gather lowering code.</li>
+  <li>radv: make sure to flush input attachments correctly.</li>
+  <li>radv: fix image view creation for depth and stencil only</li>
+  <li>radv: spir-v allows texture size query with and without lod.</li>
+  <li>vulkan/wsi/x11: handle timeouts properly in next image acquire (v1.1)</li>
+  <li>vulkan/wsi: store present mode in swapchain base class</li>
+  <li>vulkan/wsi/x11: add support for IMMEDIATE present mode</li>
+  <li>radv: fix texel fetch offset with 2d arrays.</li>
+  <li>radv/si: fix optimal micro tile selection</li>
+  <li>radv/ac/llvm: shadow samplers only return one value.</li>
+  <li>radv: fix 3D clears with baseMiplevel</li>
+</ul>
+
+<p>Eduardo Lima Mitev (2):</p>
+<ul>
+  <li>vulkan/wsi/x11: Fix behavior of vkGetPhysicalDeviceSurfaceFormatsKHR</li>
+  <li>vulkan/wsi/x11: Fix behavior of vkGetPhysicalDeviceSurfacePresentModesKHR</li>
+</ul>
+
+<p>Emil Velikov (5):</p>
+<ul>
+  <li>docs: add sha256 checksums for 13.0.1</li>
+  <li>cherry-ignore: add reverted LLVM_LIBDIR patch</li>
+  <li>anv: fix enumeration of properties</li>
+  <li>radv: honour the number of properties available</li>
+  <li>Update version to 13.0.2</li>
+</ul>
+
+<p>Eric Anholt (3):</p>
+<ul>
+  <li>vc4: Don't abort when a shader compile fails.</li>
+  <li>vc4: Clamp the shadow comparison value.</li>
+  <li>vc4: Fix register class handling of DDX/DDY arguments.</li>
+</ul>
+
+<p>Gwan-gyeong Mun (2):</p>
+<ul>
+  <li>util/disk_cache: close a previously opened handle in disk_cache_put (v2)</li>
+  <li>anv: Fix unintentional integer overflow in anv_CreateDmaBufImageINTEL</li>
+</ul>
+
+<p>Iago Toral Quiroga (1):</p>
+<ul>
+  <li>anv/format: handle unsupported formats properly</li>
+</ul>
+
+<p>Ian Romanick (2):</p>
+<ul>
+  <li>glcpp: Handle '#version 0' and other invalid values</li>
+  <li>glsl: Parse 0 as a preprocessor INTCONSTANT</li>
+</ul>
+
+<p>Jason Ekstrand (15):</p>
+<ul>
+  <li>anv/gen8: Stall when needed in Cmd(Set|Reset)Event</li>
+  <li>anv/wsi: Set the fence to signaled in AcquireNextImageKHR</li>
+  <li>anv: Rework fences</li>
+  <li>vulkan/wsi/wayland: Include pthread.h</li>
+  <li>vulkan/wsi/wayland: Clean up some error handling paths</li>
+  <li>vulkan/wsi: Report the correct min/maxImageCount</li>
+  <li>i965/gs: Allow primitive id to be a system value</li>
+  <li>anv: Handle null in all destructors</li>
+  <li>anv/fence: Handle ANV_FENCE_CREATE_SIGNALED_BIT</li>
+  <li>nir/spirv: Fix handling of gl_PrimitiveId</li>
+  <li>anv/blorp: Ignore clears for attachments first used as resolve destinations</li>
+  <li>anv: Implement a depth stall restriction on gen7</li>
+  <li>anv/cmd_buffer: Handle running out of binding tables in compute shaders</li>
+  <li>anv/cmd_buffer: Emit a CS stall before setting a CS pipeline</li>
+  <li>vulkan/wsi/x11: Implement FIFO mode.</li>
+</ul>
+
+<p>Jordan Justen (2):</p>
+<ul>
+  <li>isl: Fix height calculation in isl_msaa_interleaved_scale_px_to_sa</li>
+  <li>i965/hsw: Set integer mode in sampling state for stencil texturing</li>
+</ul>
+
+<p>Kenneth Graunke (4):</p>
+<ul>
+  <li>intel: Set min_ds_entries on Broxton.</li>
+  <li>i965: Fix compute shader crash.</li>
+  <li>mesa: Drop PATH_MAX usage.</li>
+  <li>i965: Fix GS push inputs with enhanced layouts.</li>
+</ul>
+
+<p>Kevin Strasser (1):</p>
+<ul>
+  <li>vulkan/wsi: Add a thread-safe queue implementation</li>
+</ul>
+
+<p>Lionel Landwerlin (1):</p>
+<ul>
+  <li>anv: fix multi level clears with VK_REMAINING_MIP_LEVELS</li>
+</ul>
+
+<p>Lucas Stach (1):</p>
+<ul>
+  <li>gbm: request correct version of the DRI2_FENCE extension</li>
+</ul>
+
+<p>Nicolai Hähnle (2):</p>
+<ul>
+  <li>radeonsi: store group_size_variable in struct si_compute</li>
+  <li>glsl/lower_output_reads: fix geometry shader output handling with conditional emit</li>
+</ul>
+
+<p>Steinar H. Gunderson (1):</p>
+<ul>
+  <li>Fix races during _mesa_HashWalk().</li>
+</ul>
+
+<p>Tapani Pälli (1):</p>
+<ul>
+  <li>mesa: fix empty program log length</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/13.0.3.html
+++ b/docs/relnotes/13.0.3.html
@@ -0,0 +1,177 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 13.0.3 Release Notes / January 5, 2017</h1>
+
+<p>
+Mesa 13.0.3 is a bug fix release which fixes bugs found since the 13.0.2 release.
+</p>
+<p>
+Mesa 13.0.3 implements the OpenGL 4.4 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.4.  OpenGL
+4.4 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+55b07d056f9b855ba9d7c8b2ddc7d3b220a61c6ab1bdc73cbfc2f607721094c2  mesa-13.0.3.tar.gz
+d9aa8be5c176d00d0cd503cb2f64a5a403ea471ec819c022581414860d7ba40e  mesa-13.0.3.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=77662">Bug 77662</a> - Fail to render to different faces of depth-stencil cube map</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92234">Bug 92234</a> - [BDW] GPU hang in Shogun2</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98329">Bug 98329</a> - [dEQP, EGL, SKL, BDW, BSW] dEQP-EGL.functional.image.render_multiple_contexts.gles2_renderbuffer_depth16_depth_buffer</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99038">Bug 99038</a> - [dEQP, EGL, SKL, BDW, BSW] dEQP-EGL.functional.negative_api.create_pixmap_surface crashes</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Chad Versace (2):</p>
+<ul>
+  <li>i965/mt: Disable aux surfaces after making miptree shareable</li>
+  <li>egl: Fix crashes in eglCreate*Surface()</li>
+</ul>
+
+<p>Dave Airlie (4):</p>
+<ul>
+  <li>anv: set maxFragmentDualSrcAttachments to 1</li>
+  <li>radv: set maxFragmentDualSrcAttachments to 1</li>
+  <li>radv: fix another regression since shadow fixes.</li>
+  <li>radv: add missing license file to radv_meta_bufimage.</li>
+</ul>
+
+<p>Emil Velikov (5):</p>
+<ul>
+  <li>docs: add sha256 checksums for 13.0.2</li>
+  <li>anv: don't double-close the same fd</li>
+  <li>anv: don't leak memory if anv_init_wsi() fails</li>
+  <li>radv: don't leak the fd if radv_physical_device_init() succeeds</li>
+  <li>Update version to 13.0.3</li>
+</ul>
+
+<p>Eric Anholt (1):</p>
+<ul>
+  <li>vc4: In a loop break/continue, jump if everyone has taken the path.</li>
+</ul>
+
+<p>Gwan-gyeong Mun (3):</p>
+<ul>
+  <li>anv: Add missing error-checking to anv_block_pool_init (v2)</li>
+  <li>anv: Update the teardown in reverse order of the anv_CreateDevice</li>
+  <li>vulkan/wsi: Fix resource leak in success path of wsi_queue_init()</li>
+</ul>
+
+<p>Haixia Shi (1):</p>
+<ul>
+  <li>compiler/glsl: fix precision problem of tanh</li>
+</ul>
+
+<p>Ilia Mirkin (1):</p>
+<ul>
+  <li>mesa: only verify that enabled arrays have backing buffers</li>
+</ul>
+
+<p>Jason Ekstrand (8):</p>
+<ul>
+  <li>anv/cmd_buffer: Re-emit MEDIA_CURBE_LOAD when CS push constants are dirty</li>
+  <li>anv/image: Rename hiz_surface to aux_surface</li>
+  <li>anv/cmd_buffer: Remove the 1-D case from the HiZ QPitch calculation</li>
+  <li>genxml/gen9: Change the default of MI_SEMAPHORE_WAIT::RegisterPoleMode</li>
+  <li>anv/device: Return the right error for failed maps</li>
+  <li>anv/device: Implicitly unmap memory objects in FreeMemory</li>
+  <li>anv/descriptor_set: Write the state offset in the surface state free list.</li>
+  <li>spirv: Use a simpler and more correct implementaiton of tanh()</li>
+</ul>
+
+<p>Kenneth Graunke (1):</p>
+<ul>
+  <li>i965: Allocate at least some URB space even when max_vertices = 0.</li>
+</ul>
+
+<p>Marek Olšák (17):</p>
+<ul>
+  <li>radeonsi: always set all blend registers</li>
+  <li>radeonsi: set CB_BLEND1_CONTROL.ENABLE for dual source blending</li>
+  <li>radeonsi: disable RB+ blend optimizations for dual source blending</li>
+  <li>radeonsi: consolidate max-work-group-size computation</li>
+  <li>radeonsi: apply a multi-wave workgroup SPI bug workaround to affected CIK chips</li>
+  <li>radeonsi: apply a TC L1 write corruption workaround for SI</li>
+  <li>radeonsi: apply a tessellation bug workaround for SI</li>
+  <li>radeonsi: add a tess+GS hang workaround for VI dGPUs</li>
+  <li>radeonsi: apply the double EVENT_WRITE_EOP workaround to VI as well</li>
+  <li>cso: don't release sampler states that are bound</li>
+  <li>radeonsi: always restore sampler states when unbinding sampler views</li>
+  <li>radeonsi: fix incorrect FMASK checking in bind_sampler_states</li>
+  <li>radeonsi: allow specifying simm16 of emit_waitcnt at call sites</li>
+  <li>radeonsi: wait for outstanding memory instructions in TCS barriers</li>
+  <li>tgsi: fix the src type of TGSI_OPCODE_MEMBAR</li>
+  <li>radeonsi: wait for outstanding LDS instructions in memory barriers if needed</li>
+  <li>radeonsi: disable the constant engine (CE) on Carrizo and Stoney</li>
+</ul>
+
+<p>Matt Turner (3):</p>
+<ul>
+  <li>i965/fs: Rename opt_copy_propagate -&gt; opt_copy_propagation.</li>
+  <li>i965/fs: Add unit tests for copy propagation pass.</li>
+  <li>i965/fs: Reject copy propagation into SEL if not min/max.</li>
+</ul>
+
+<p>Nanley Chery (1):</p>
+<ul>
+  <li>mesa/fbobject: Update CubeMapFace when reusing textures</li>
+</ul>
+
+<p>Nicolai Hähnle (4):</p>
+<ul>
+  <li>radeonsi: fix isolines tess factor writes to control ring</li>
+  <li>radeonsi: update all GSVS ring descriptors for new buffer allocations</li>
+  <li>radeonsi: do not kill GS with memory writes</li>
+  <li>radeonsi: fix an off-by-one error in the bounds check for max_vertices</li>
+</ul>
+
+<p>Rhys Kidd (1):</p>
+<ul>
+  <li>glsl: Add pthread libs to cache_test</li>
+</ul>
+
+<p>Timothy Arceri (2):</p>
+<ul>
+  <li>mesa: fix active subroutine uniforms properly</li>
+  <li>Revert "nir: Turn imov/fmov of undef into undef."</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/13.0.4.html
+++ b/docs/relnotes/13.0.4.html
@@ -0,0 +1,255 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 13.0.4 Release Notes / February 1, 2017</h1>
+
+<p>
+Mesa 13.0.4 is a bug fix release which fixes bugs found since the 13.0.3 release.
+</p>
+<p>
+Mesa 13.0.4 implements the OpenGL 4.4 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.4.  OpenGL
+4.4 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+a78518030b0b7d77a6c426ac3ff40f4b27fb0e2cdb0dfbe685024a46cae59bad  mesa-13.0.4.tar.gz
+a95d7ce8f7bd5f88585e4be3144a341236d8c0fc91f6feaec59bb8ba3120e726  mesa-13.0.4.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92634">Bug 92634</a> - gallium's vl_mpeg12_decoder does not work with st/va</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94512">Bug 94512</a> - X segfaults with glx-tls enabled in a x32 environment</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94900">Bug 94900</a> - HD6950 GPU lockup loop with various steam games (octodad[always], saints row 4[always], dead island[always], grid autosport[sometimes])</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98263">Bug 98263</a> - [radv] The Talos Principle fails to launch with &quot;Fatal error: Cannot set display mode.&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98914">Bug 98914</a> - mesa-vdpau-drivers: breaks vdpau for mpeg2video</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98975">Bug 98975</a> - Wasteland 2 Directors Cut: Hangs. GPU fault</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99030">Bug 99030</a> - [HSW, regression] transform feedback fails on Linux 4.8</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99085">Bug 99085</a> - [EGL] dEQP-EGL.functional.sharing.gles2.multithread intermittent</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99097">Bug 99097</a> - [vulkancts] dEQP-VK.image.store regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99100">Bug 99100</a> - [SKL,BDW,BSW,KBL] dEQP-VK.glsl.return.return_in_dynamic_loop_dynamic_vertex regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99144">Bug 99144</a> - Incorrect rendering using glDrawArraysInstancedBaseInstance and first != 0 on Skylake</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99154">Bug 99154</a> - Link time error when using multiple builtin functions</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99158">Bug 99158</a> - vdpau segfaults and gpu locks with kodi on R9285</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99185">Bug 99185</a> - dEQP-EGL.functional.image.modify.tex_rgb5_a1_tex_subimage_rgba8</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99188">Bug 99188</a> - dEQP-EGL.functional.create_context_ext.robust_gl_30.rgb565_no_depth_no_stencil</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99210">Bug 99210</a> - ES3-CTS.functional.texture.mipmap.cube.generate.rgba5551_*</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99354">Bug 99354</a> - [G71] &quot;Assertion `bkref' failed&quot; reproducible with glmark2</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99450">Bug 99450</a> - [amdgpu] Payday 2 visual glitches on some models</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99451">Bug 99451</a> - polygon offset use after free</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Andres Rodriguez (2):</p>
+<ul>
+  <li>vulkan/wsi: clarify the severity of lack of DRI3 v2</li>
+  <li>radv: fix include order for installed headers v2</li>
+</ul>
+
+<p>Arda Coskunses (2):</p>
+<ul>
+  <li>vulkan/wsi/x11: don't crash on null visual</li>
+  <li>vulkan/wsi/x11: don't crash on null wsi x11 connection</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (1):</p>
+<ul>
+  <li>radv: Support loader interface version 3.</li>
+</ul>
+
+<p>Chad Versace (10):</p>
+<ul>
+  <li>egl: Check config's surface types in eglCreate*Surface()</li>
+  <li>dri: Add __DRI_IMAGE_FORMAT_ARGB1555</li>
+  <li>mesa/texformat: Handle GL_RGBA + GL_UNSIGNED_SHORT_5_5_5_1</li>
+  <li>egl: Emit correct error when robust context creation fails</li>
+  <li>anv: Handle vkGetPhysicalDeviceQueueFamilyProperties with count == 0</li>
+  <li>mesa/shaderobj: Fix races on refcounts</li>
+  <li>meta: Disable dithering during glGenerateMipmap</li>
+  <li>vulkan: Add new cast macros for VkIcd types</li>
+  <li>vulkan: Update vk_icd.h to interface version 3</li>
+  <li>anv: Support loader interface version 3 (patch v2)</li>
+</ul>
+
+<p>Christian König (1):</p>
+<ul>
+  <li>vl/zscan: fix "Fix trivial sign compare warnings"</li>
+</ul>
+
+<p>Chuck Atkins (1):</p>
+<ul>
+  <li>glx: Add missing glproto dependency for gallium-xlib glx</li>
+</ul>
+
+<p>Damien Grassart (1):</p>
+<ul>
+  <li>anv: return count of queue families written</li>
+</ul>
+
+<p>Dave Airlie (1):</p>
+<ul>
+  <li>radv: flush smem for uniform buffer bit.</li>
+</ul>
+
+<p>Emil Velikov (10):</p>
+<ul>
+  <li>docs: add sha256 checksums for 13.0.3</li>
+  <li>cherry-ignore: add couple of intel_miptree_copy related patches</li>
+  <li>cherry-ignore: add radv: Call nir_lower_constant_initializers."</li>
+  <li>get-typod-pick-list.sh: add new script</li>
+  <li>cherry-ignore: add "_mesa_ClampColor extension/version fix"</li>
+  <li>cherry-ignore: add wayland race condition fix</li>
+  <li>egl/wayland: use the destroy_window_callback for swrast</li>
+  <li>automake: use shared llvm libs for make distcheck</li>
+  <li>get-pick-list.sh: Require explicit "13.0" for nominating stable patches</li>
+  <li>Update version to 13.0.4</li>
+</ul>
+
+<p>Francisco Jerez (1):</p>
+<ul>
+  <li>anv: Fix uniform and storage buffer offset alignment limits.</li>
+</ul>
+
+<p>Fredrik Höglund (2):</p>
+<ul>
+  <li>radv: fix dual source blending</li>
+  <li>dri3: Fix MakeCurrent without a default framebuffer</li>
+</ul>
+
+<p>Grazvydas Ignotas (1):</p>
+<ul>
+  <li>mapi: update the asm code to support x32</li>
+</ul>
+
+<p>Heiko Przybyl (1):</p>
+<ul>
+  <li>r600/sb: Fix loop optimization related hangs on eg</li>
+</ul>
+
+<p>Ilia Mirkin (1):</p>
+<ul>
+  <li>nouveau: take extra push space into account for pushbuf_space calls</li>
+</ul>
+
+<p>Jason Ekstrand (4):</p>
+<ul>
+  <li>i965/generator/tex: Handle an immediate sampler with an indirect texture</li>
+  <li>anv/formats: Use the real format for B4G4R4A4_UNORM_PACK16 on gen8</li>
+  <li>nir/search: Only allow matching SSA values</li>
+  <li>isl: Mark A4B4G4R4_UNORM as supported on gen8</li>
+</ul>
+
+<p>Jonas Ådahl (1):</p>
+<ul>
+  <li>egl/wayland: Cleanup private display connection when init fails</li>
+</ul>
+
+<p>Kenneth Graunke (7):</p>
+<ul>
+  <li>i965: Don't bail on vertex element processing if we need draw params.</li>
+  <li>i965: Fix last slot calculations</li>
+  <li>i965: Fix texturing in the vec4 TCS and GS backends.</li>
+  <li>spirv: Move cursor before calling vtn_ssa_value() in phi 2nd pass.</li>
+  <li>i965: Make BLORP disable the NP Z PMA stall fix.</li>
+  <li>glsl: Use ir_var_temporary when generating inline functions.</li>
+  <li>i965: Properly flush in hsw_pause_transform_feedback().</li>
+</ul>
+
+<p>Marek Olšák (4):</p>
+<ul>
+  <li>vdpau: call texture_get_handle while the mutex is being held</li>
+  <li>va: call texture_get_handle while the mutex is being held</li>
+  <li>radeonsi: for the tess barrier, only use emit_waitcnt on SI and LLVM 3.9+</li>
+  <li>radeonsi: don't forget to add HTILE to the buffer list for texturing</li>
+</ul>
+
+<p>Michel Dänzer (1):</p>
+<ul>
+  <li>cso: Don't restore nr_samplers in cso_restore_fragment_samplers</li>
+</ul>
+
+<p>Nanley Chery (3):</p>
+<ul>
+  <li>anv/cmd_buffer: Fix arrayed depth/stencil attachments</li>
+  <li>anv/cmd_buffer: Fix programmed HiZ qpitch</li>
+  <li>anv/image: Disable HiZ for depth buffer arrays</li>
+</ul>
+
+<p>Nayan Deshmukh (1):</p>
+<ul>
+  <li>st/va: delay calling begin_frame until we have all parameters</li>
+</ul>
+
+<p>Rob Clark (1):</p>
+<ul>
+  <li>freedreno: some fence cleanup</li>
+</ul>
+
+<p>Samuel Pitoiset (1):</p>
+<ul>
+  <li>gallium/hud: add missing break in hud_cpufreq_graph_install()</li>
+</ul>
+
+<p>Timothy Arceri (3):</p>
+<ul>
+  <li>nir: Turn imov/fmov of undef into undef</li>
+  <li>glsl: fix opt_minmax redundancy checks against baserange</li>
+  <li>util: fix list_is_singular()</li>
+</ul>
+
+<p>Zachary Michaels (1):</p>
+<ul>
+  <li>radeonsi: Always leave poly_offset in a valid state</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/13.0.5.html
+++ b/docs/relnotes/13.0.5.html
@@ -0,0 +1,210 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 13.0.5 Release Notes / February 20, 2017</h1>
+
+<p>
+Mesa 13.0.5 is a bug fix release which fixes bugs found since the 13.0.4 release.
+</p>
+<p>
+Mesa 13.0.5 implements the OpenGL 4.4 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.4.  OpenGL
+4.4 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+7e45e3812078726eabca6d9384364bf035a3c4279024ec9090dd1b19a8989926  mesa-13.0.5.tar.gz
+bfcea7e2c801525a60895c8aff11aa68457ee9aa35d01a4638e1f310a3f5ef87  mesa-13.0.5.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98329">Bug 98329</a> - [dEQP, EGL, SKL, BDW, BSW] dEQP-EGL.functional.image.render_multiple_contexts.gles2_renderbuffer_depth16_depth_buffer</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98421">Bug 98421</a> - src/loader/loader.c:111:40: error: unknown type name ‘drmDevicePtr’</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98526">Bug 98526</a> - glsl/tests/general-ir-test regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99532">Bug 99532</a> - Compute shader doesn't give right result under some circumstances</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99631">Bug 99631</a> - segfault with OSVRTrackerView and openscenegraph git master</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99633">Bug 99633</a> - rasterizer/core/clip.h:279:49: error: ‘const struct API_STATE’ has no member named ‘linkageCount’</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99692">Bug 99692</a> - [radv] Mostly broken on Hawaii PRO/CIK ASICs</li>
+
+</ul>
+
+<h2>Changes</h2>
+
+<p>Bartosz Tomczyk (2):</p>
+<ul>
+  <li>r600: Fix stack overflow</li>
+  <li>r600/sb: Fix memory leak</li>
+</ul>
+
+<p>Bruce Cherniak (1):</p>
+<ul>
+  <li>swr: [rasterizer core] Remove dead code Clipper::ClipScalar()</li>
+</ul>
+
+<p>Chad Versace (1):</p>
+<ul>
+  <li>i965/mt: Disable HiZ when sharing depth buffer externally (v2)</li>
+</ul>
+
+<p>Dave Airlie (3):</p>
+<ul>
+  <li>radv: change base aligmment for allocated memory.</li>
+  <li>radv: fix cik macroModeIndex.</li>
+  <li>radv: adopt some init config workarounds from radeonsi.</li>
+</ul>
+
+<p>Derek Foreman (1):</p>
+<ul>
+  <li>egl/dri2: add image_loader_extension back into loader extensions for wayland</li>
+</ul>
+
+<p>Emil Velikov (26):</p>
+<ul>
+  <li>docs: add sha256 checksums for 13.0.4</li>
+  <li>configure.ac: list radeon in --with-vulkan-drivers help string</li>
+  <li>i965: automake: correctly set MKDIR_GEN</li>
+  <li>freedreno: automake: correctly set MKDIR_GEN</li>
+  <li>i965: automake: include builddir prior to srcdir</li>
+  <li>i915: automake: include builddir prior to srcdir</li>
+  <li>egl: automake: include builddir prior to srcdir</li>
+  <li>clover: automake: include builddir prior to srcdir</li>
+  <li>st/dri: automake: include builddir prior to srcdir</li>
+  <li>d3dadapter9: automake: include builddir prior to srcdir</li>
+  <li>glx: automake: include builddir prior to srcdir</li>
+  <li>glx/apple: automake: include builddir prior to srcdir</li>
+  <li>glx/windows: automake: include builddir prior to srcdir</li>
+  <li>loader: automake: include builddir prior to srcdir</li>
+  <li>mapi: automake: include builddir prior to srcdir</li>
+  <li>radeon, r200: automake: include builddir prior to srcdir</li>
+  <li>dri/swrast: automake: include builddir prior to srcdir</li>
+  <li>dri/osmesa: automake: include builddir prior to srcdir</li>
+  <li>mesa/tests: automake: include builddir prior to srcdir</li>
+  <li>bin/get-extra-pick-list: use git merge-base to get the branchpoint</li>
+  <li>bin/get-extra-pick-list: rework to use already_picked list</li>
+  <li>bin/get-typod-pick-list.sh: limit `git grep ...' to only as needed</li>
+  <li>bin/get-pick-list.sh: limit `git grep ...' only as needed</li>
+  <li>bin/get-pick-list.sh: remove ancient way of nominating patches</li>
+  <li>bin/get-fixes-pick-list.sh: add new script</li>
+  <li>Update version to 13.0.5</li>
+</ul>
+
+<p>Eric Anholt (1):</p>
+<ul>
+  <li>vc4: Avoid emitting small immediates for UBO indirect load address guards.</li>
+</ul>
+
+<p>Hans de Goede (1):</p>
+<ul>
+  <li>glx/glvnd: Fix GLXdispatchIndex sorting</li>
+</ul>
+
+<p>Ian Romanick (11):</p>
+<ul>
+  <li>linker: Slight code rearrange to prevent duplication in the next commit</li>
+  <li>linker: Accurately track gl_uniform_block::stageref</li>
+  <li>glsl: Split process_block_array into two functions</li>
+  <li>glsl: Fix wonkey indentation left from previous commit</li>
+  <li>glsl: Track the linearized array index for each UBO instance array element</li>
+  <li>glsl: Use simpler visitor to determine which UBO and SSBO blocks are used</li>
+  <li>glsl: Add tracking for elements of an array-of-arrays that have been accessed</li>
+  <li>glsl: Add structures to track accessed elements of a single array</li>
+  <li>glsl: Mark a set of array elements as accessed using a list of array_deref_range</li>
+  <li>glsl: Walk a list of ir_dereference_array to mark array elements as accessed</li>
+  <li>linker: Accurately mark a uniform block instance array element as used in a stage</li>
+</ul>
+
+<p>Ilia Mirkin (3):</p>
+<ul>
+  <li>vbo: process buffer binding state changes on draw when recording</li>
+  <li>st/mesa: MAX_VARYING is the max supported number of patch varyings, not min</li>
+  <li>nvc0: disable linked tsc mode in compute launch descriptor</li>
+</ul>
+
+<p>Jason Ekstrand (11):</p>
+<ul>
+  <li>nir/search: Use the correct bit size for integer comparisons</li>
+  <li>i965/blorp: Use the correct ISL format for combined depth/stencil</li>
+  <li>intel/blorp: Handle clearing of A4B4G4R4 on all platforms</li>
+  <li>isl/formats: Only advertise sampling for A4B4G4R4 on Broadwell</li>
+  <li>anv: Flush render cache before STATE_BASE_ADDRESS on gen7</li>
+  <li>anv: Improve flushing around STATE_BASE_ADDRESS</li>
+  <li>vulkan/wsi/wayland: Handle VK_INCOMPLETE for GetFormats</li>
+  <li>vulkan/wsi/wayland: Handle VK_INCOMPLETE for GetPresentModes</li>
+  <li>vulkan/wsi: Lower the maximum image sizes</li>
+  <li>i965/sampler_state: Pass texObj into update_sampler_state</li>
+  <li>i965/sampler_state: Set the "Base Mip Level" field on Sandy Bridge</li>
+</ul>
+
+<p>Kenneth Graunke (1):</p>
+<ul>
+  <li>i965: Unbind deleted shaders from brw_context, fixing malloc heisenbug.</li>
+</ul>
+
+<p>Lionel Landwerlin (5):</p>
+<ul>
+  <li>anv: don't require render target isl bit for depth/stencil surfaces</li>
+  <li>anv: set command buffer to NULL when allocations fail</li>
+  <li>anv: fix descriptor pool internal size allocation</li>
+  <li>spirv: handle OpUndef as part of the variable parsing pass</li>
+  <li>spirv: handle undefined components for OpVectorShuffle</li>
+</ul>
+
+<p>Marc-André Lureau (1):</p>
+<ul>
+  <li>tgsi-dump: dump label if instruction has one</li>
+</ul>
+
+<p>Marek Olšák (2):</p>
+<ul>
+  <li>radeonsi: always set the TCL1_ACTION_ENA when invalidating L2</li>
+  <li>gallium/radeon: fix performance of buffer readbacks</li>
+</ul>
+
+<p>Topi Pohjolainen (2):</p>
+<ul>
+  <li>i965: Make depth clear flushing more explicit</li>
+  <li>i965/gen6: Issue direct depth stall and flush after depth clear</li>
+</ul>
+
+<p>Vinson Lee (2):</p>
+<ul>
+  <li>scons: Require libdrm &gt;= 2.4.66 for DRM.</li>
+  <li>util: Fix Clang trivial destructor check.</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/13.0.6.html
+++ b/docs/relnotes/13.0.6.html
@@ -0,0 +1,287 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 13.0.6 Release Notes / March 20, 2017</h1>
+
+<p>
+Mesa 13.0.6 is a bug fix release which fixes bugs found since the 13.0.5 release.
+</p>
+<p>
+Mesa 13.0.6 implements the OpenGL 4.4 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.4.  OpenGL
+4.4 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+1076590f29103f022a2cd87e6dff6ae77072013745603d06b0410c373ab2bb1a  mesa-13.0.6.tar.gz
+29ef104a7fc082d352b1599bd6cb1d040be424ccd22f5e0eb7ee9b0e9acd3597  mesa-13.0.6.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=68504">Bug 68504</a> - 9.2-rc1 workaround for clover build failure on ppc/altivec: cannot convert 'bool' to '__vector(4) __bool int' in return</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97102">Bug 97102</a> - [dri][swr] stack overflow / infinite loop with GALLIUM_DRIVER=swr</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98869">Bug 98869</a> - Electronic Super Joy graphic artefacts (regression,bisected)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99401">Bug 99401</a> - [g33] regression: piglit.spec.!opengl 1_0.gl-1_0-beginend-coverage</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99456">Bug 99456</a> - Firefox crashing when opening about:support with WebGL2 enabled</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99677">Bug 99677</a> - heap-use-after-free in glsl</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99715">Bug 99715</a> - Don't print: &quot;Note: Buggy applications may crash, if they do please report to vendor&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99850">Bug 99850</a> - Tessellation bug on Carrizo</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100049">Bug 100049</a> - &quot;ralloc: Make sure ralloc() allocations match malloc()'s alignment.&quot; causes seg fault in 32bit build</li>
+
+</ul>
+
+<h2>Changes</h2>
+
+<p>Alex Smith (2):</p>
+<ul>
+  <li>radv: Emit pending flushes before executing a secondary command buffer</li>
+  <li>radv: Flush before copying with PKT3_WRITE_DATA in CmdUpdateBuffer</li>
+</ul>
+
+<p>Bartosz Tomczyk (1):</p>
+<ul>
+  <li>glsl: fix heap-buffer-overflow</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (8):</p>
+<ul>
+  <li>radv: Pass CMASK alignment to application.</li>
+  <li>radv: Pass DCC alignment to application.</li>
+  <li>radv: Never try to create more than max_sets descriptor sets.</li>
+  <li>radv: Reset emitted compute pipeline when calling secondary cmd buffer.</li>
+  <li>radv: Only use PKT3_OCCLUSION_QUERY when it doesn't hang.</li>
+  <li>radv: Use correct size for availability flag.</li>
+  <li>radv: Disable HTILE for textures with multiple layers/levels.</li>
+  <li>radv: Emit cache flushes before CP DMA.</li>
+</ul>
+
+<p>Ben Crocker (3):</p>
+<ul>
+  <li>gallivm: Improve debug output (V2)</li>
+  <li>gallivm: Override getHostCPUName() "generic" w/ "pwr8" (v4)</li>
+  <li>gallivm: Reenable PPC VSX (v3)</li>
+</ul>
+
+<p>Brendan King (1):</p>
+<ul>
+  <li>egl/dri3: implement query surface hook</li>
+</ul>
+
+<p>Bruce Cherniak (1):</p>
+<ul>
+  <li>swr: Prune empty nodes in CalculateProcessorTopology.</li>
+</ul>
+
+<p>Connor Abbott (1):</p>
+<ul>
+  <li>anv: fix Get*MemoryRequirements for !LLC</li>
+</ul>
+
+<p>Dave Airlie (13):</p>
+<ul>
+  <li>radv: program a default point size.</li>
+  <li>radv: handle transfer_write as a dst flag.</li>
+  <li>radv/ac: handle nir irem opcode.</li>
+  <li>radv/ac: implement txs for buffer textures.</li>
+  <li>radv/ac: correctly size shared memory usage.</li>
+  <li>radv/ac: avoid the fmask path when doing txs.</li>
+  <li>radv: pass FMASK alignment to application</li>
+  <li>tgsi: fix memory leak in tgsi sanity check</li>
+  <li>radv: fix depth format in blit2d.</li>
+  <li>radv: fix txs for sampler buffers</li>
+  <li>radv: drop Z24 support.</li>
+  <li>radv: disable mip point pre clamping.</li>
+  <li>radv: setup llvm target data layout</li>
+</ul>
+
+<p>Emil Velikov (6):</p>
+<ul>
+  <li>docs: add sha256 checksums for 13.0.5</li>
+  <li>Revert "get-pick-list.sh: Require explicit "13.0" for nominating stable patches"</li>
+  <li>cherry-ignore: don't pick nir_op_pack_double optimisation fix</li>
+  <li>i965: move brw_define.h ifndef guard to the top</li>
+  <li>cherry-ignore: add ANV fast clears related fixes</li>
+  <li>Update version to 13.0.6</li>
+</ul>
+
+<p>Fredrik Höglund (2):</p>
+<ul>
+  <li>radv: fix the dynamic buffer index in vkCmdBindDescriptorSets</li>
+  <li>radv/ac: fix multiple descriptor sets with dynamic buffers</li>
+</ul>
+
+<p>George Kyriazis (1):</p>
+<ul>
+  <li>swr: Align query results allocation</li>
+</ul>
+
+<p>Grazvydas Ignotas (3):</p>
+<ul>
+  <li>r300g: only allow byteswapped formats on big endian</li>
+  <li>gallium/u_queue: fix a crash with atexit handlers</li>
+  <li>gallium/u_queue: set num_threads correctly if not all threads start</li>
+</ul>
+
+<p>Gregory Hainaut (1):</p>
+<ul>
+  <li>glapi: fix typo in count_scale</li>
+</ul>
+
+<p>Ian Romanick (1):</p>
+<ul>
+  <li>mesa: Don't advertise GL_OES_read_format in core profile</li>
+</ul>
+
+<p>Ilia Mirkin (8):</p>
+<ul>
+  <li>nvc0: increase number of ubo binding points</li>
+  <li>nvc0/ir: fix robustness guarantees for constbuf loads on kepler+ compute</li>
+  <li>nvc0/ir: fix ubo max clamp, reset file index</li>
+  <li>gm107/ir: fix address offset bitfield for ATOMS</li>
+  <li>nvc0: set the render condition in the compute object</li>
+  <li>st/mesa: don't pass compare mode for stencil-sampled textures</li>
+  <li>nvc0: take extra pushbuf space into account for pushbuf_space calls</li>
+  <li>nvc0: increase alignment to 256 for texture buffers on fermi</li>
+</ul>
+
+<p>Jacob Lifshay (1):</p>
+<ul>
+  <li>vulkan/wsi: Improve the DRI3 error message</li>
+</ul>
+
+<p>Jason Ekstrand (11):</p>
+<ul>
+  <li>i965: Use a better guardband calculation.</li>
+  <li>intel/blorp: Swizzle clear colors on the CPU</li>
+  <li>i965/fs: Remove the inline pack_double_2x32 optimization</li>
+  <li>anv: Add an invalidate_range helper</li>
+  <li>anv/query: clflush the bo map on non-LLC platforms</li>
+  <li>genxml: Make MI_STORE_DATA_IMM more consistent</li>
+  <li>anv/query: Perform CmdResetQueryPool on the GPU</li>
+  <li>blorp/exec: Use uint32_t for copying varying data</li>
+  <li>intel/blorp: Explicitly flush all allocated state</li>
+  <li>anv: Accurately advertise dynamic descriptor limits</li>
+  <li>anv: Properly handle destroying NULL devices and instances</li>
+</ul>
+
+<p>Jonas Pfeil (1):</p>
+<ul>
+  <li>ralloc: Make sure ralloc() allocations match malloc()'s alignment.</li>
+</ul>
+
+<p>Jose Maria Casanova Crespo (1):</p>
+<ul>
+  <li>glsl: non-last member unsized array on SSBO must fail compilation on GLSL ES 3.1</li>
+</ul>
+
+<p>Kenneth Graunke (7):</p>
+<ul>
+  <li>i965: Fix fast depth clears for surfaces with a dimension of 16384.</li>
+  <li>i965: Use a UW source type for CS_OPCODE_CS_TERMINATE.</li>
+  <li>i965: Fix check for negative pitch in can_do_fast_copy_blit().</li>
+  <li>i965: Support the force_glsl_version driconf option.</li>
+  <li>i965: Combine the Gen6 SF and Clip viewport atoms.</li>
+  <li>mesa: Do (TCS &amp;&amp; !TES) draw time validation in ES as well.</li>
+  <li>egl: Ensure ResetNotificationStrategy matches for shared contexts.</li>
+</ul>
+
+<p>Lionel Landwerlin (3):</p>
+<ul>
+  <li>spirv: don't assert with location decorations on non i/o variables</li>
+  <li>anv: wsi: report presentation error per image request</li>
+  <li>i965/fs: fix uninitialized memory access</li>
+</ul>
+
+<p>Marc Di Luzio (1):</p>
+<ul>
+  <li>glsl: correct compute shader checks for memoryBarrier functions</li>
+</ul>
+
+<p>Marek Olšák (10):</p>
+<ul>
+  <li>st/mesa: destroy pipe_context before destroying st_context (v2)</li>
+  <li>radeonsi: don't invoke DCC decompression in update_all_texture_descriptors</li>
+  <li>radeonsi: fix UNSIGNED_BYTE index buffer fallback with non-zero start (v2)</li>
+  <li>gallium/util: remove unused u_index_modify helpers</li>
+  <li>gallium/u_index_modify: don't add PIPE_TRANSFER_UNSYNCHRONIZED unconditionally</li>
+  <li>gallium/u_queue: fix random crashes when the app calls exit()</li>
+  <li>st/mesa: reset sample_mask, min_sample, and render_condition for PBO ops</li>
+  <li>st/mesa: set blend state for PBO readbacks</li>
+  <li>radeonsi: fix broken tessellation on Carrizo and Stoney</li>
+  <li>radeonsi: mark all bound shader buffer ranges as initialized</li>
+</ul>
+
+<p>Matt Turner (1):</p>
+<ul>
+  <li>clover: Work around build failure with AltiVec.</li>
+</ul>
+
+<p>Nicolai Hähnle (12):</p>
+<ul>
+  <li>mesa/main: fix meta caller of _mesa_ClampColor</li>
+  <li>radeonsi: fix texture gather on stencil textures</li>
+  <li>glsl: split DIV_TO_MUL_RCP into single- and double-precision flags</li>
+  <li>glx/dri3: handle NULL pointers in loader-to-DRI3 drawable conversion</li>
+  <li>glx/dri3: guard in_current_context against a disappeared drawable</li>
+  <li>glx: guard swap-interval functions against destroyed drawables</li>
+  <li>dri/common: clear the loaderPrivate pointer in driDestroyDrawable</li>
+  <li>winsys/amdgpu: reduce max_alloc_size based on GTT limits</li>
+  <li>radeonsi: handle MultiDrawIndirect in si_get_draw_start_count</li>
+  <li>radeonsi: fix UINT/SINT clamping for 10-bit formats on &lt;= CIK</li>
+  <li>st/glsl_to_tgsi: avoid iterating past the head of the instruction list</li>
+  <li>st/mesa: inform the driver of framebuffer changes before compute dispatches</li>
+</ul>
+
+<p>Samuel Iglesias Gonsálvez (6):</p>
+<ul>
+  <li>glsl: fix heap-use-after-free in ast_declarator_list::hir()</li>
+  <li>i965/fs: mark last DF uniform array element as 64 bit live one</li>
+  <li>i965/fs: detect different bit size accesses to uniforms to push them in proper locations</li>
+  <li>i965/fs: fix indirect load DF uniforms on BSW/BXT</li>
+  <li>i965/fs: fix source type when emitting MOV_INDIRECT to read ICP handles</li>
+  <li>i965/fs: emit MOV_INDIRECT with the source with the right register type</li>
+</ul>
+
+<p>Samuel Pitoiset (1):</p>
+<ul>
+  <li>winsys/amdgpu: avoid potential segfault in amdgpu_bo_map()</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/include/GL/internal/dri_interface.h
+++ b/include/GL/internal/dri_interface.h
@@ -1121,6 +1121,7 @@ struct __DRIdri2ExtensionRec {
 #define __DRI_IMAGE_FORMAT_XRGB2101010  0x1009
 #define __DRI_IMAGE_FORMAT_ARGB2101010  0x100a
 #define __DRI_IMAGE_FORMAT_SARGB8       0x100b
+#define __DRI_IMAGE_FORMAT_ARGB1555     0x100c

 #define __DRI_IMAGE_USE_SHARE		0x0001
 #define __DRI_IMAGE_USE_SCANOUT		0x0002
@@ -1148,6 +1149,7 @@ struct __DRIdri2ExtensionRec {

 #define __DRI_IMAGE_FOURCC_R8		0x20203852
 #define __DRI_IMAGE_FOURCC_GR88		0x38385247
+#define __DRI_IMAGE_FOURCC_ARGB1555	0x35315241
 #define __DRI_IMAGE_FOURCC_RGB565	0x36314752
 #define __DRI_IMAGE_FOURCC_ARGB8888	0x34325241
 #define __DRI_IMAGE_FOURCC_XRGB8888	0x34325258
--- a/include/pci_ids/i965_pci_ids.h
+++ b/include/pci_ids/i965_pci_ids.h
@@ -109,6 +109,10 @@ CHIPSET(0x162A, bdw_gt3, "Intel(R) Iris Pro P6300 (Broadwell GT3e)")
 CHIPSET(0x162B, bdw_gt3, "Intel(R) Iris 6100 (Broadwell GT3)")
 CHIPSET(0x162D, bdw_gt3, "Intel(R) Broadwell GT3")
 CHIPSET(0x162E, bdw_gt3, "Intel(R) Broadwell GT3")
+CHIPSET(0x22B0, chv,     "Intel(R) HD Graphics (Cherrytrail)")
+CHIPSET(0x22B1, chv,     "Intel(R) HD Graphics XXX (Braswell)") /* Overridden in brw_get_renderer_string */
+CHIPSET(0x22B2, chv,     "Intel(R) HD Graphics (Cherryview)")
+CHIPSET(0x22B3, chv,     "Intel(R) HD Graphics (Cherryview)")
 CHIPSET(0x1902, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)")
 CHIPSET(0x1906, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)")
 CHIPSET(0x190A, skl_gt1, "Intel(R) Skylake GT1")
@@ -134,6 +138,11 @@ CHIPSET(0x1932, skl_gt4, "Intel(R) Iris Pro Graphics 580 (Skylake GT4e)")
 CHIPSET(0x193A, skl_gt4, "Intel(R) Iris Pro Graphics P580 (Skylake GT4e)")
 CHIPSET(0x193B, skl_gt4, "Intel(R) Iris Pro Graphics 580 (Skylake GT4e)")
 CHIPSET(0x193D, skl_gt4, "Intel(R) Iris Pro Graphics P580 (Skylake GT4e)")
+CHIPSET(0x0A84, bxt,     "Intel(R) HD Graphics (Broxton)")
+CHIPSET(0x1A84, bxt,     "Intel(R) HD Graphics (Broxton)")
+CHIPSET(0x1A85, bxt_2x6, "Intel(R) HD Graphics (Broxton 2x6)")
+CHIPSET(0x5A84, bxt,     "Intel(R) HD Graphics 505 (Broxton)")
+CHIPSET(0x5A85, bxt_2x6, "Intel(R) HD Graphics 500 (Broxton 2x6)")
 CHIPSET(0x5902, kbl_gt1, "Intel(R) Kabylake GT1")
 CHIPSET(0x5906, kbl_gt1, "Intel(R) Kabylake GT1")
 CHIPSET(0x590A, kbl_gt1, "Intel(R) Kabylake GT1")
@@ -144,22 +153,15 @@ CHIPSET(0x5913, kbl_gt1_5, "Intel(R) Kabylake GT1.5")
 CHIPSET(0x5915, kbl_gt1_5, "Intel(R) Kabylake GT1.5")
 CHIPSET(0x5917, kbl_gt1_5, "Intel(R) Kabylake GT1.5")
 CHIPSET(0x5912, kbl_gt2, "Intel(R) Kabylake GT2")
-CHIPSET(0x5916, kbl_gt2, "Intel(R) Kabylake GT2")
+CHIPSET(0x5916, kbl_gt2, "Intel(R) HD Graphics 620 (Kabylake GT2)")
 CHIPSET(0x591A, kbl_gt2, "Intel(R) Kabylake GT2")
 CHIPSET(0x591B, kbl_gt2, "Intel(R) Kabylake GT2")
 CHIPSET(0x591D, kbl_gt2, "Intel(R) Kabylake GT2")
-CHIPSET(0x591E, kbl_gt2, "Intel(R) Kabylake GT2")
+CHIPSET(0x591E, kbl_gt2, "Intel(R) HD Graphics 615 (Kabylake GT2)")
 CHIPSET(0x5921, kbl_gt2, "Intel(R) Kabylake GT2F")
 CHIPSET(0x5923, kbl_gt3, "Intel(R) Kabylake GT3")
 CHIPSET(0x5926, kbl_gt3, "Intel(R) Kabylake GT3")
 CHIPSET(0x5927, kbl_gt3, "Intel(R) Kabylake GT3")
 CHIPSET(0x593B, kbl_gt4, "Intel(R) Kabylake GT4")
-CHIPSET(0x22B0, chv,     "Intel(R) HD Graphics (Cherrytrail)")
-CHIPSET(0x22B1, chv,     "Intel(R) HD Graphics XXX (Braswell)") /* Overridden in brw_get_renderer_string */
-CHIPSET(0x22B2, chv,     "Intel(R) HD Graphics (Cherryview)")
-CHIPSET(0x22B3, chv,     "Intel(R) HD Graphics (Cherryview)")
-CHIPSET(0x0A84, bxt,     "Intel(R) HD Graphics (Broxton)")
-CHIPSET(0x1A84, bxt,     "Intel(R) HD Graphics (Broxton)")
-CHIPSET(0x1A85, bxt_2x6, "Intel(R) HD Graphics (Broxton 2x6)")
-CHIPSET(0x5A84, bxt,     "Intel(R) HD Graphics (Broxton)")
-CHIPSET(0x5A85, bxt_2x6, "Intel(R) HD Graphics (Broxton 2x6)")
+CHIPSET(0x3184, glk,     "Intel(R) HD Graphics (Geminilake)")
+CHIPSET(0x3185, glk_2x6, "Intel(R) HD Graphics (Geminilake 2x6)")
--- a/include/vulkan/vk_icd.h
+++ b/include/vulkan/vk_icd.h
@@ -1,28 +1,56 @@
+//
+// File: vk_icd.h
+//
+/*
+ * Copyright (c) 2015-2016 The Khronos Group Inc.
+ * Copyright (c) 2015-2016 Valve Corporation
+ * Copyright (c) 2015-2016 LunarG, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
 #ifndef VKICD_H
 #define VKICD_H

-#include "vk_platform.h"
+#include "vulkan.h"

+/*
+ * Loader-ICD version negotiation API
+ */
+#define CURRENT_LOADER_ICD_INTERFACE_VERSION 3
+#define MIN_SUPPORTED_LOADER_ICD_INTERFACE_VERSION 0
+typedef VkResult (VKAPI_PTR *PFN_vkNegotiateLoaderICDInterfaceVersion)(uint32_t *pVersion);
 /*
 * The ICD must reserve space for a pointer for the loader's dispatch
 * table, at the start of <each object>.
 * The ICD must initialize this variable using the SET_LOADER_MAGIC_VALUE macro.
 */

-#define ICD_LOADER_MAGIC   0x01CDC0DE
+#define ICD_LOADER_MAGIC 0x01CDC0DE

-typedef union _VK_LOADER_DATA {
-  uintptr_t loaderMagic;
-  void *loaderData;
+typedef union {
+    uintptr_t loaderMagic;
+    void *loaderData;
 } VK_LOADER_DATA;

-static inline void set_loader_magic_value(void* pNewObject) {
-    VK_LOADER_DATA *loader_info = (VK_LOADER_DATA *) pNewObject;
+static inline void set_loader_magic_value(void *pNewObject) {
+    VK_LOADER_DATA *loader_info = (VK_LOADER_DATA *)pNewObject;
    loader_info->loaderMagic = ICD_LOADER_MAGIC;
 }

-static inline bool valid_loader_magic_value(void* pNewObject) {
-    const VK_LOADER_DATA *loader_info = (VK_LOADER_DATA *) pNewObject;
+static inline bool valid_loader_magic_value(void *pNewObject) {
+    const VK_LOADER_DATA *loader_info = (VK_LOADER_DATA *)pNewObject;
    return (loader_info->loaderMagic & 0xffffffff) == ICD_LOADER_MAGIC;
 }

@@ -30,56 +58,74 @@ static inline bool valid_loader_magic_value(void* pNewObject) {
 * Windows and Linux ICDs will treat VkSurfaceKHR as a pointer to a struct that
 * contains the platform-specific connection and surface information.
 */
-typedef enum _VkIcdWsiPlatform {
+typedef enum {
    VK_ICD_WSI_PLATFORM_MIR,
    VK_ICD_WSI_PLATFORM_WAYLAND,
    VK_ICD_WSI_PLATFORM_WIN32,
    VK_ICD_WSI_PLATFORM_XCB,
    VK_ICD_WSI_PLATFORM_XLIB,
+    VK_ICD_WSI_PLATFORM_DISPLAY
 } VkIcdWsiPlatform;

-typedef struct _VkIcdSurfaceBase {
-    VkIcdWsiPlatform   platform;
+typedef struct {
+    VkIcdWsiPlatform platform;
 } VkIcdSurfaceBase;

 #ifdef VK_USE_PLATFORM_MIR_KHR
-typedef struct _VkIcdSurfaceMir {
-    VkIcdSurfaceBase   base;
-    MirConnection*     connection;
-    MirSurface*        mirSurface;
+typedef struct {
+    VkIcdSurfaceBase base;
+    MirConnection *connection;
+    MirSurface *mirSurface;
 } VkIcdSurfaceMir;
 #endif // VK_USE_PLATFORM_MIR_KHR

 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
-typedef struct _VkIcdSurfaceWayland {
-    VkIcdSurfaceBase   base;
-    struct wl_display* display;
-    struct wl_surface* surface;
+typedef struct {
+    VkIcdSurfaceBase base;
+    struct wl_display *display;
+    struct wl_surface *surface;
 } VkIcdSurfaceWayland;
 #endif // VK_USE_PLATFORM_WAYLAND_KHR

 #ifdef VK_USE_PLATFORM_WIN32_KHR
-typedef struct _VkIcdSurfaceWin32 {
-    VkIcdSurfaceBase   base;
-    HINSTANCE          hinstance;
-    HWND               hwnd;
+typedef struct {
+    VkIcdSurfaceBase base;
+    HINSTANCE hinstance;
+    HWND hwnd;
 } VkIcdSurfaceWin32;
 #endif // VK_USE_PLATFORM_WIN32_KHR

 #ifdef VK_USE_PLATFORM_XCB_KHR
-typedef struct _VkIcdSurfaceXcb {
-    VkIcdSurfaceBase   base;
-    xcb_connection_t*  connection;
-    xcb_window_t       window;
+typedef struct {
+    VkIcdSurfaceBase base;
+    xcb_connection_t *connection;
+    xcb_window_t window;
 } VkIcdSurfaceXcb;
 #endif // VK_USE_PLATFORM_XCB_KHR

 #ifdef VK_USE_PLATFORM_XLIB_KHR
-typedef struct _VkIcdSurfaceXlib {
-    VkIcdSurfaceBase   base;
-    Display*           dpy;
-    Window             window;
+typedef struct {
+    VkIcdSurfaceBase base;
+    Display *dpy;
+    Window window;
 } VkIcdSurfaceXlib;
 #endif // VK_USE_PLATFORM_XLIB_KHR

+#ifdef VK_USE_PLATFORM_ANDROID_KHR
+typedef struct {
+    ANativeWindow* window;
+} VkIcdSurfaceAndroid;
+#endif //VK_USE_PLATFORM_ANDROID_KHR
+
+typedef struct {
+    VkIcdSurfaceBase base;
+    VkDisplayModeKHR displayMode;
+    uint32_t planeIndex;
+    uint32_t planeStackIndex;
+    VkSurfaceTransformFlagBitsKHR transform;
+    float globalAlpha;
+    VkDisplayPlaneAlphaFlagBitsKHR alphaMode;
+    VkExtent2D imageExtent;
+} VkIcdSurfaceDisplay;
+
 #endif // VKICD_H
--- a/scons/gallium.py
+++ b/scons/gallium.py
@@ -651,7 +651,7 @@ def generate(env):
    env.PkgCheckModules('X11', ['x11', 'xext', 'xdamage', 'xfixes', 'glproto >= 1.4.13'])
    env.PkgCheckModules('XCB', ['x11-xcb', 'xcb-glx >= 1.8.1', 'xcb-dri2 >= 1.8'])
    env.PkgCheckModules('XF86VIDMODE', ['xxf86vm'])
-    env.PkgCheckModules('DRM', ['libdrm >= 2.4.38'])
+    env.PkgCheckModules('DRM', ['libdrm >= 2.4.66'])

    if env['x11']:
        env.Append(CPPPATH = env['X11_CPPPATH'])
--- a/src/amd/addrlib/addrtypes.h
+++ b/src/amd/addrlib/addrtypes.h
@@ -88,7 +88,11 @@ typedef int            INT;

 #ifndef ADDR_FASTCALL
    #if defined(__GNUC__)
-        #define ADDR_FASTCALL __attribute__((regparm(0)))
+        #if defined(__i386__)
+            #define ADDR_FASTCALL __attribute__((regparm(0)))
+        #else
+            #define ADDR_FASTCALL
+        #endif
    #else
        #define ADDR_FASTCALL __fastcall
    #endif
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1301,6 +1301,9 @@ static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr)
 		src[1] = to_float(ctx, src[1]);
 		result = LLVMBuildFRem(ctx->builder, src[0], src[1], "");
 		break;
+	case nir_op_irem:
+		result = LLVMBuildSRem(ctx->builder, src[0], src[1], "");
+		break;
 	case nir_op_idiv:
 		result = LLVMBuildSDiv(ctx->builder, src[0], src[1], "");
 		break;
@@ -1683,7 +1686,7 @@ static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx,

 		for (c = 0; c < 2; c++) {
 			half_texel[c] = LLVMBuildExtractElement(ctx->builder, size,
-								ctx->i32zero, "");
+								LLVMConstInt(ctx->i32, c, false), "");
 			half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, "");
 			half_texel[c] = emit_fdiv(ctx, ctx->f32one, half_texel[c]);
 			half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c],
@@ -1782,15 +1785,17 @@ static LLVMValueRef visit_vulkan_resource_index(struct nir_to_llvm_context *ctx,
 	unsigned desc_set = nir_intrinsic_desc_set(instr);
 	unsigned binding = nir_intrinsic_binding(instr);
 	LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set];
-	struct radv_descriptor_set_layout *layout = ctx->options->layout->set[desc_set].layout;
+	struct radv_pipeline_layout *pipeline_layout = ctx->options->layout;
+	struct radv_descriptor_set_layout *layout = pipeline_layout->set[desc_set].layout;
 	unsigned base_offset = layout->binding[binding].offset;
 	LLVMValueRef offset, stride;

 	if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
 	    layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
+		unsigned idx = pipeline_layout->set[desc_set].dynamic_offset_start +
+			layout->binding[binding].dynamic_offset_offset;
 		desc_ptr = ctx->push_constants;
-		base_offset = ctx->options->layout->push_constant_size;
-		base_offset +=  16 * layout->binding[binding].dynamic_offset_offset;
+		base_offset = pipeline_layout->push_constant_size + 16 * idx;
 		stride = LLVMConstInt(ctx->i32, 16, false);
 	} else
 		stride = LLVMConstInt(ctx->i32, layout->binding[binding].size, false);
@@ -2609,6 +2614,24 @@ static void emit_barrier(struct nir_to_llvm_context *ctx)
 			    ctx->voidt, NULL, 0, 0);
 }

+static void emit_discard_if(struct nir_to_llvm_context *ctx,
+			    nir_intrinsic_instr *instr)
+{
+	LLVMValueRef cond;
+	ctx->shader_info->fs.can_discard = true;
+
+	cond = LLVMBuildICmp(ctx->builder, LLVMIntNE,
+			     get_src(ctx, instr->src[0]),
+			     ctx->i32zero, "");
+
+	cond = LLVMBuildSelect(ctx->builder, cond,
+			       LLVMConstReal(ctx->f32, -1.0f),
+			       ctx->f32zero, "");
+	emit_llvm_intrinsic(ctx, "llvm.AMDGPU.kill",
+			    LLVMVoidTypeInContext(ctx->context),
+			    &cond, 1, 0);
+}
+
 static LLVMValueRef
 visit_load_local_invocation_index(struct nir_to_llvm_context *ctx)
 {
@@ -2921,6 +2944,9 @@ static void visit_intrinsic(struct nir_to_llvm_context *ctx,
 				    LLVMVoidTypeInContext(ctx->context),
 				    NULL, 0, 0);
 		break;
+	case nir_intrinsic_discard_if:
+		emit_discard_if(ctx, instr);
+		break;
 	case nir_intrinsic_memory_barrier:
 		emit_waitcnt(ctx);
 		break;
@@ -3277,18 +3303,31 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)
 		}
 	}

+	if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
+		result = get_buffer_size(ctx, res_ptr, true);
+		goto write_result;
+	}
+
 	if (instr->op == nir_texop_texture_samples) {
-		LLVMValueRef res, samples;
+		LLVMValueRef res, samples, is_msaa;
 		res = LLVMBuildBitCast(ctx->builder, res_ptr, ctx->v8i32, "");
 		samples = LLVMBuildExtractElement(ctx->builder, res,
 						  LLVMConstInt(ctx->i32, 3, false), "");
+		is_msaa = LLVMBuildLShr(ctx->builder, samples,
+					LLVMConstInt(ctx->i32, 28, false), "");
+		is_msaa = LLVMBuildAnd(ctx->builder, is_msaa,
+				       LLVMConstInt(ctx->i32, 0xe, false), "");
+		is_msaa = LLVMBuildICmp(ctx->builder, LLVMIntEQ, is_msaa,
+					LLVMConstInt(ctx->i32, 0xe, false), "");
+
 		samples = LLVMBuildLShr(ctx->builder, samples,
 					LLVMConstInt(ctx->i32, 16, false), "");
 		samples = LLVMBuildAnd(ctx->builder, samples,
 				       LLVMConstInt(ctx->i32, 0xf, false), "");
 		samples = LLVMBuildShl(ctx->builder, ctx->i32one,
 				       samples, "");
-
+		samples = LLVMBuildSelect(ctx->builder, is_msaa, samples,
+					  ctx->i32one, "");
 		result = samples;
 		goto write_result;
 	}
@@ -3387,7 +3426,10 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)
 		address[count++] = sample_index;
 	} else if(instr->op == nir_texop_txs) {
 		count = 0;
-		address[count++] = lod;
+		if (lod)
+			address[count++] = lod;
+		else
+			address[count++] = ctx->i32zero;
 	}

 	for (chan = 0; chan < count; chan++) {
@@ -3412,7 +3454,7 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)
 		result = build_tex_intrinsic(ctx, instr, &txf_info);

 		result = LLVMBuildExtractElement(ctx->builder, result, ctx->i32zero, "");
-		result = LLVMBuildICmp(ctx->builder, LLVMIntEQ, result, ctx->i32zero, "");
+		result = emit_int_cmp(ctx, LLVMIntEQ, result, ctx->i32zero);
 		goto write_result;
 	}

@@ -3430,7 +3472,8 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)
 	 * The sample index should be adjusted as follows:
 	 *   sample_index = (fmask >> (sample_index * 4)) & 0xF;
 	 */
-	if (instr->sampler_dim == GLSL_SAMPLER_DIM_MS) {
+	if (instr->sampler_dim == GLSL_SAMPLER_DIM_MS &&
+	    instr->op != nir_texop_txs) {
 		LLVMValueRef txf_address[4];
 		struct ac_tex_info txf_info = { 0 };
 		unsigned txf_count = count;
@@ -3485,12 +3528,13 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)
 	if (offsets && instr->op == nir_texop_txf) {
 		nir_const_value *const_offset =
 			nir_src_as_const_value(instr->src[const_src].src);
-
+		int num_offsets = instr->src[const_src].src.ssa->num_components;
 		assert(const_offset);
-		if (instr->coord_components > 2)
+		num_offsets = MIN2(num_offsets, instr->coord_components);
+		if (num_offsets > 2)
 			address[2] = LLVMBuildAdd(ctx->builder,
 						  address[2], LLVMConstInt(ctx->i32, const_offset->i32[2], false), "");
-		if (instr->coord_components > 1)
+		if (num_offsets > 1)
 			address[1] = LLVMBuildAdd(ctx->builder,
 						  address[1], LLVMConstInt(ctx->i32, const_offset->i32[1], false), "");
 		address[0] = LLVMBuildAdd(ctx->builder,
@@ -3512,6 +3556,8 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)

 	if (instr->op == nir_texop_query_levels)
 		result = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, 3, false), "");
+	else if (instr->is_shadow && instr->op != nir_texop_txs && instr->op != nir_texop_lod && instr->op != nir_texop_tg4)
+		result = LLVMBuildExtractElement(ctx->builder, result, ctx->i32zero, "");
 	else if (instr->op == nir_texop_txs &&
 		 instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
 		 instr->is_array) {
@@ -3520,7 +3566,8 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)
 		LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, result, two, "");
 		z = LLVMBuildSDiv(ctx->builder, z, six, "");
 		result = LLVMBuildInsertElement(ctx->builder, result, z, two, "");
-	}
+	} else if (instr->dest.ssa.num_components != 4)
+		result = trim_vector(ctx, result, instr->dest.ssa.num_components);

 write_result:
 	if (result) {
@@ -3910,7 +3957,7 @@ static void
 handle_shader_output_decl(struct nir_to_llvm_context *ctx,
 			  struct nir_variable *variable)
 {
-	int idx = variable->data.location;
+	int idx = variable->data.location + variable->data.index;
 	unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);

 	variable->data.driver_location = idx * 4;
@@ -3940,7 +3987,7 @@ handle_shader_output_decl(struct nir_to_llvm_context *ctx,
 		                       si_build_alloca_undef(ctx, ctx->f32, "");
 		}
 	}
-	ctx->output_mask |= ((1ull << attrib_count) - 1) << variable->data.location;
+	ctx->output_mask |= ((1ull << attrib_count) - 1) << idx;
 }

 static void
@@ -4351,12 +4398,10 @@ handle_fs_outputs_post(struct nir_to_llvm_context *ctx,

 	for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
 		LLVMValueRef values[4];
-		bool last;
+
 		if (!(ctx->output_mask & (1ull << i)))
 			continue;

-		last = ctx->output_mask <= ((1ull << (i + 1)) - 1);
-
 		if (i == FRAG_RESULT_DEPTH) {
 			ctx->shader_info->fs.writes_z = true;
 			depth = to_float(ctx, LLVMBuildLoad(ctx->builder,
@@ -4366,10 +4411,14 @@ handle_fs_outputs_post(struct nir_to_llvm_context *ctx,
 			stencil = to_float(ctx, LLVMBuildLoad(ctx->builder,
 							      ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
 		} else {
+			bool last = false;
 			for (unsigned j = 0; j < 4; j++)
 				values[j] = to_float(ctx, LLVMBuildLoad(ctx->builder,
 									ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));

+			if (!ctx->shader_info->fs.writes_z && !ctx->shader_info->fs.writes_stencil)
+				last = ctx->output_mask <= ((1ull << (i + 1)) - 1);
+
 			si_export_mrt_color(ctx, values, V_008DFC_SQ_EXP_MRT + index, last);
 			index++;
 		}
@@ -4450,6 +4499,13 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
 	memset(shader_info, 0, sizeof(*shader_info));

 	LLVMSetTarget(ctx.module, "amdgcn--");
+
+	LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
+	char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
+	LLVMSetDataLayout(ctx.module, data_layout_str);
+	LLVMDisposeTargetData(data_layout);
+	LLVMDisposeMessage(data_layout_str);
+
 	setup_types(&ctx);

 	ctx.builder = LLVMCreateBuilderInContext(ctx.context);
@@ -4471,7 +4527,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
 				idx++;
 			}

-			shared_size *= 4;
+			shared_size *= 16;
 			var = LLVMAddGlobalInAddressSpace(ctx.module,
 							  LLVMArrayType(ctx.i8, shared_size),
 							  "compute_lds",
--- a/src/amd/vulkan/.gitignore
+++ b/src/amd/vulkan/.gitignore
@@ -4,3 +4,4 @@
 /radv_timestamp.h
 /dev_icd.json
 /vk_format_table.c
+/radeon_icd.*.json
--- a/src/amd/vulkan/Makefile.am
+++ b/src/amd/vulkan/Makefile.am
@@ -32,9 +32,6 @@ lib_LTLIBRARIES = libvulkan_radeon.la
 # The gallium includes are for the util/u_math.h include from main/macros.h

 AM_CPPFLAGS = \
-	$(AMDGPU_CFLAGS) \
-	$(VALGRIND_CFLAGS) \
-	$(DEFINES) \
 	-I$(top_srcdir)/include \
 	-I$(top_builddir)/src \
 	-I$(top_srcdir)/src \
@@ -48,7 +45,10 @@ AM_CPPFLAGS = \
 	-I$(top_srcdir)/src/mesa \
 	-I$(top_srcdir)/src/mesa/drivers/dri/common \
 	-I$(top_srcdir)/src/gallium/auxiliary \
-	-I$(top_srcdir)/src/gallium/include
+	-I$(top_srcdir)/src/gallium/include \
+	$(AMDGPU_CFLAGS) \
+	$(VALGRIND_CFLAGS) \
+	$(DEFINES)

 AM_CFLAGS = \
 	$(VISIBILITY_CFLAGS) \
@@ -131,11 +131,11 @@ vk_format_table.c: vk_format_table.py \
 	$(PYTHON2) $(srcdir)/vk_format_table.py $(srcdir)/vk_format_layout.csv > $@

 BUILT_SOURCES = $(VULKAN_GENERATED_FILES)
-CLEANFILES = $(BUILT_SOURCES) dev_icd.json radv_timestamp.h
+CLEANFILES = $(BUILT_SOURCES) dev_icd.json radeon_icd.@host_cpu@.json
 EXTRA_DIST = \
 	$(top_srcdir)/include/vulkan/vk_icd.h \
 	dev_icd.json.in \
-	radeon_icd.json \
+	radeon_icd.json.in \
 	radv_entrypoints_gen.py \
 	vk_format_layout.csv \
 	vk_format_parse.py \
@@ -155,7 +155,7 @@ libvulkan_radeon_la_LDFLAGS = \


 icdconfdir = @VULKAN_ICD_INSTALL_DIR@
-icdconf_DATA = radeon_icd.json
+icdconf_DATA = radeon_icd.@host_cpu@.json
 # The following is used for development purposes, by setting VK_ICD_FILENAMES.
 noinst_DATA = dev_icd.json

@@ -164,4 +164,9 @@ dev_icd.json : dev_icd.json.in
 		-e "s#@build_libdir@#${abs_top_builddir}/${LIB_DIR}#" \
 		< $(srcdir)/dev_icd.json.in > $@

+radeon_icd.@host_cpu@.json : radeon_icd.json.in
+	$(AM_V_GEN) $(SED) \
+		-e "s#@install_libdir@#${libdir}#" \
+		< $(srcdir)/radeon_icd.json.in > $@
+
 include $(top_srcdir)/install-lib-links.mk
--- a/src/amd/vulkan/dev_icd.json.in
+++ b/src/amd/vulkan/dev_icd.json.in
@@ -2,6 +2,6 @@
    "file_format_version": "1.0.0",
    "ICD": {
        "library_path": "@build_libdir@/libvulkan_radeon.so",
-        "abi_versions": "1.0.3"
+        "api_version": "1.0.3"
    }
 }
--- a/src/amd/vulkan/radeon_icd.json
+++ b/src/amd/vulkan/radeon_icd.json
@@ -1,7 +0,0 @@
-{
-    "file_format_version": "1.0.0",
-    "ICD": {
-        "library_path": "libvulkan_radeon.so",
-        "abi_versions": "1.0.3"
-    }
-}
--- a/src/amd/vulkan/radeon_icd.json.in
+++ b/src/amd/vulkan/radeon_icd.json.in
@@ -0,0 +1,7 @@
+{
+    "file_format_version": "1.0.0",
+    "ICD": {
+        "library_path": "@install_libdir@/libvulkan_radeon.so",
+        "api_version": "1.0.3"
+    }
+}
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -345,7 +345,8 @@ radv_emit_graphics_raster_state(struct radv_cmd_buffer *cmd_buffer,
 			       raster->spi_interp_control);

 	radeon_set_context_reg_seq(cmd_buffer->cs, R_028A00_PA_SU_POINT_SIZE, 2);
-	radeon_emit(cmd_buffer->cs, 0);
+	unsigned tmp = (unsigned)(1.0 * 8.0);
+	radeon_emit(cmd_buffer->cs, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp));
 	radeon_emit(cmd_buffer->cs, S_028A04_MIN_SIZE(radv_pack_float_12p4(0)) |
 		    S_028A04_MAX_SIZE(radv_pack_float_12p4(8192/2))); /* R_028A04_PA_SU_POINT_MINMAX */

@@ -1393,7 +1394,7 @@ void radv_CmdBindDescriptorSets(
 		radv_bind_descriptor_set(cmd_buffer, set, idx);

 		for(unsigned j = 0; j < set->layout->dynamic_offset_count; ++j, ++dyn_idx) {
-			unsigned idx = j + layout->set[i].dynamic_offset_start;
+			unsigned idx = j + layout->set[i + firstSet].dynamic_offset_start;
 			uint32_t *dst = cmd_buffer->dynamic_buffers + idx * 4;
 			assert(dyn_idx < dynamicOffsetCount);

@@ -1652,6 +1653,9 @@ void radv_CmdExecuteCommands(
 {
 	RADV_FROM_HANDLE(radv_cmd_buffer, primary, commandBuffer);

+	/* Emit pending flushes on primary prior to executing secondary */
+	si_emit_cache_flush(primary);
+
 	for (uint32_t i = 0; i < commandBufferCount; i++) {
 		RADV_FROM_HANDLE(radv_cmd_buffer, secondary, pCmdBuffers[i]);

@@ -1661,6 +1665,7 @@ void radv_CmdExecuteCommands(
 	/* if we execute secondary we need to re-emit out pipelines */
 	if (commandBufferCount) {
 		primary->state.emitted_pipeline = NULL;
+		primary->state.emitted_compute_pipeline = NULL;
 		primary->state.dirty |= RADV_CMD_DIRTY_PIPELINE;
 		primary->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_ALL;
 	}
@@ -2163,9 +2168,6 @@ static void radv_handle_cmask_image_transition(struct radv_cmd_buffer *cmd_buffe
 			radv_initialise_cmask(cmd_buffer, image, 0xffffffffu);
 	} else if (radv_layout_has_cmask(image, src_layout) &&
 		   !radv_layout_has_cmask(image, dst_layout)) {
-
-		if (!cmd_buffer->device->allow_fast_clears)
-			return;
 		radv_fast_clear_flush_image_inplace(cmd_buffer, image);
 	}
 }
@@ -2286,14 +2288,18 @@ void radv_CmdPipelineBarrier(
 		case VK_ACCESS_INDIRECT_COMMAND_READ_BIT:
 		case VK_ACCESS_INDEX_READ_BIT:
 		case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
-		case VK_ACCESS_UNIFORM_READ_BIT:
 			flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1;
 			break;
+		case VK_ACCESS_UNIFORM_READ_BIT:
+			flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1 | RADV_CMD_FLAG_INV_SMEM_L1;
+			break;
 		case VK_ACCESS_SHADER_READ_BIT:
 			flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2;
 			break;
 		case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT:
 		case VK_ACCESS_TRANSFER_READ_BIT:
+		case VK_ACCESS_TRANSFER_WRITE_BIT:
+		case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT:
 			flush_bits |= RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER | RADV_CMD_FLAG_INV_GLOBAL_L2;
 		default:
 			break;
--- a/src/amd/vulkan/radv_descriptor_set.c
+++ b/src/amd/vulkan/radv_descriptor_set.c
@@ -275,12 +275,13 @@ radv_descriptor_set_create(struct radv_device *device,
 		uint32_t layout_size = align_u32(layout->size, 32);
 		set->size = layout->size;
 		if (!cmd_buffer) {
-			if (pool->current_offset + layout_size <= pool->size) {
+			if (pool->current_offset + layout_size <= pool->size &&
+			    pool->allocated_sets < pool->max_sets) {
 				set->bo = pool->bo;
 				set->mapped_ptr = (uint32_t*)(pool->mapped_ptr + pool->current_offset);
 				set->va = device->ws->buffer_get_va(set->bo) + pool->current_offset;
 				pool->current_offset += layout_size;
-
+				++pool->allocated_sets;
 			} else {
 				int entry = pool->free_list, prev_entry = -1;
 				uint32_t offset;
@@ -417,6 +418,7 @@ VkResult radv_CreateDescriptorPool(
 	pool->full_list = 0;
 	pool->free_nodes[max_sets - 1].next = -1;
 	pool->max_sets = max_sets;
+	pool->allocated_sets = 0;

 	for (int i = 0; i  + 1 < max_sets; ++i)
 		pool->free_nodes[i].next = i + 1;
@@ -494,6 +496,7 @@ VkResult radv_ResetDescriptorPool(
 		radv_descriptor_set_destroy(device, pool, set, false);
 	}

+	pool->allocated_sets = 0;
 	pool->current_offset = 0;
 	pool->free_list = -1;
 	pool->full_list = 0;
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -44,12 +44,6 @@
 #include "util/debug.h"
 struct radv_dispatch_table dtable;

-struct radv_fence {
-	struct radeon_winsys_fence *fence;
-	bool submitted;
-	bool signalled;
-};
-
 static VkResult
 radv_physical_device_init(struct radv_physical_device *device,
 			  struct radv_instance *instance,
@@ -97,6 +91,7 @@ radv_physical_device_init(struct radv_physical_device *device,

 	fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
 	device->name = device->rad_info.name;
+	close(fd);
 	return VK_SUCCESS;

 fail:
@@ -119,13 +114,19 @@ static const VkExtensionProperties global_extensions[] = {
 #ifdef VK_USE_PLATFORM_XCB_KHR
 	{
 		.extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
-		.specVersion = 5,
+		.specVersion = 6,
+	},
+#endif
+#ifdef VK_USE_PLATFORM_XLIB_KHR
+	{
+		.extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
+		.specVersion = 6,
 	},
 #endif
 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
 	{
 		.extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
-		.specVersion = 4,
+		.specVersion = 5,
 	},
 #endif
 };
@@ -133,7 +134,7 @@ static const VkExtensionProperties global_extensions[] = {
 static const VkExtensionProperties device_extensions[] = {
 	{
 		.extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
-		.specVersion = 67,
+		.specVersion = 68,
 	},
 };

@@ -424,7 +425,7 @@ void radv_GetPhysicalDeviceProperties(
 		.maxGeometryTotalOutputComponents         = 1024,
 		.maxFragmentInputComponents               = 128,
 		.maxFragmentOutputAttachments             = 8,
-		.maxFragmentDualSrcAttachments            = 2,
+		.maxFragmentDualSrcAttachments            = 1,
 		.maxFragmentCombinedOutputResources       = 8,
 		.maxComputeSharedMemorySize               = 32768,
 		.maxComputeWorkGroupCount                 = { 65535, 65535, 65535 },
@@ -659,17 +660,15 @@ VkResult radv_EnumerateInstanceExtensionProperties(
 	uint32_t*                                   pPropertyCount,
 	VkExtensionProperties*                      pProperties)
 {
-	unsigned i;
 	if (pProperties == NULL) {
 		*pPropertyCount = ARRAY_SIZE(global_extensions);
 		return VK_SUCCESS;
 	}

-	for (i = 0; i < *pPropertyCount; i++)
-		memcpy(&pProperties[i], &global_extensions[i], sizeof(VkExtensionProperties));
+	*pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(global_extensions));
+	typed_memcpy(pProperties, global_extensions, *pPropertyCount);

-	*pPropertyCount = i;
-	if (i < ARRAY_SIZE(global_extensions))
+	if (*pPropertyCount < ARRAY_SIZE(global_extensions))
 		return VK_INCOMPLETE;

 	return VK_SUCCESS;
@@ -681,19 +680,17 @@ VkResult radv_EnumerateDeviceExtensionProperties(
 	uint32_t*                                   pPropertyCount,
 	VkExtensionProperties*                      pProperties)
 {
-	unsigned i;
-
 	if (pProperties == NULL) {
 		*pPropertyCount = ARRAY_SIZE(device_extensions);
 		return VK_SUCCESS;
 	}

-	for (i = 0; i < *pPropertyCount; i++)
-		memcpy(&pProperties[i], &device_extensions[i], sizeof(VkExtensionProperties));
+	*pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(device_extensions));
+	typed_memcpy(pProperties, device_extensions, *pPropertyCount);

-	*pPropertyCount = i;
-	if (i < ARRAY_SIZE(device_extensions))
+	if (*pPropertyCount < ARRAY_SIZE(device_extensions))
 		return VK_INCOMPLETE;
+
 	return VK_SUCCESS;
 }

@@ -869,7 +866,7 @@ VkResult radv_AllocateMemory(
 		flags |= RADEON_FLAG_NO_CPU_ACCESS;
 	else
 		flags |= RADEON_FLAG_CPU_ACCESS;
-	mem->bo = device->ws->buffer_create(device->ws, alloc_size, 32768,
+	mem->bo = device->ws->buffer_create(device->ws, alloc_size, 65536,
 					       domain, flags);

 	if (!mem->bo) {
@@ -1172,6 +1169,8 @@ VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
 	RADV_FROM_HANDLE(radv_device, device, _device);
 	RADV_FROM_HANDLE(radv_fence, fence, _fence);

+	if (fence->signalled)
+		return VK_SUCCESS;
 	if (!fence->submitted)
 		return VK_NOT_READY;

@@ -1734,31 +1733,55 @@ radv_tex_bordercolor(VkBorderColor bcolor)
 	return 0;
 }

+static unsigned
+radv_tex_aniso_filter(unsigned filter)
+{
+	if (filter < 2)
+		return 0;
+	if (filter < 4)
+		return 1;
+	if (filter < 8)
+		return 2;
+	if (filter < 16)
+		return 3;
+	return 4;
+}
+
 static void
 radv_init_sampler(struct radv_device *device,
 		  struct radv_sampler *sampler,
 		  const VkSamplerCreateInfo *pCreateInfo)
 {
-	uint32_t max_aniso = 0;
-	uint32_t max_aniso_ratio = 0;//TODO
+	uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
+					(uint32_t) pCreateInfo->maxAnisotropy : 0;
+	uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
 	bool is_vi;
 	is_vi = (device->instance->physicalDevice.rad_info.chip_class >= VI);

+	if (!is_vi && max_aniso > 0) {
+		radv_finishme("Anisotropic filtering must be disabled manually "
+		              "by the shader on SI-CI when BASE_LEVEL == LAST_LEVEL\n");
+		max_aniso = max_aniso_ratio = 0;
+	}
+
 	sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
 			     S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
 			     S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
 			     S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
 			     S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
 			     S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
+			     S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
+			     S_008F30_ANISO_BIAS(max_aniso_ratio) |
 			     S_008F30_DISABLE_CUBE_WRAP(0) |
 			     S_008F30_COMPAT_MODE(is_vi));
 	sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
-			     S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)));
+			     S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
+			     S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
 	sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
 			     S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
 			     S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
 			     S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
-			     S_008F38_MIP_POINT_PRECLAMP(1) |
+			     S_008F38_MIP_POINT_PRECLAMP(0) |
 			     S_008F38_DISABLE_LSB_CEIL(1) |
 			     S_008F38_FILTER_PREC_FIX(1) |
 			     S_008F38_ANISO_OVERRIDE(is_vi));
@@ -1800,3 +1823,48 @@ void radv_DestroySampler(
 		return;
 	vk_free2(&device->alloc, pAllocator, sampler);
 }
+
+
+/* vk_icd.h does not declare this function, so we declare it here to
+ * suppress Wmissing-prototypes.
+ */
+PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
+vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
+
+PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
+vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
+{
+	/* For the full details on loader interface versioning, see
+	* <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
+	* What follows is a condensed summary, to help you navigate the large and
+	* confusing official doc.
+	*
+	*   - Loader interface v0 is incompatible with later versions. We don't
+	*     support it.
+	*
+	*   - In loader interface v1:
+	*       - The first ICD entrypoint called by the loader is
+	*         vk_icdGetInstanceProcAddr(). The ICD must statically expose this
+	*         entrypoint.
+	*       - The ICD must statically expose no other Vulkan symbol unless it is
+	*         linked with -Bsymbolic.
+	*       - Each dispatchable Vulkan handle created by the ICD must be
+	*         a pointer to a struct whose first member is VK_LOADER_DATA. The
+	*         ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
+	*       - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
+	*         vkDestroySurfaceKHR(). The ICD must be capable of working with
+	*         such loader-managed surfaces.
+	*
+	*    - Loader interface v2 differs from v1 in:
+	*       - The first ICD entrypoint called by the loader is
+	*         vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
+	*         statically expose this entrypoint.
+	*
+	*    - Loader interface v3 differs from v2 in:
+	*        - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
+	*          vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
+	*          because the loader no longer does so.
+	*/
+	*pSupportedVersion = MIN2(*pSupportedVersion, 3u);
+	return VK_SUCCESS;
+}
--- a/src/amd/vulkan/radv_formats.c
+++ b/src/amd/vulkan/radv_formats.c
@@ -154,6 +154,7 @@ uint32_t radv_translate_tex_dataformat(VkFormat format,
 		case VK_FORMAT_D16_UNORM:
 			return V_008F14_IMG_DATA_FORMAT_16;
 		case VK_FORMAT_D24_UNORM_S8_UINT:
+		case VK_FORMAT_X8_D24_UNORM_PACK32:
 			return V_008F14_IMG_DATA_FORMAT_8_24;
 		case VK_FORMAT_S8_UINT:
 			return V_008F14_IMG_DATA_FORMAT_8;
@@ -729,9 +730,6 @@ uint32_t radv_translate_dbformat(VkFormat format)
 	case VK_FORMAT_D16_UNORM:
 	case VK_FORMAT_D16_UNORM_S8_UINT:
 		return V_028040_Z_16;
-	case VK_FORMAT_X8_D24_UNORM_PACK32:
-	case VK_FORMAT_D24_UNORM_S8_UINT:
-		return V_028040_Z_24; /* deprecated on SI */
 	case VK_FORMAT_D32_SFLOAT:
 	case VK_FORMAT_D32_SFLOAT_S8_UINT:
 		return V_028040_Z_32_FLOAT;
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -267,17 +267,7 @@ si_make_texture_descriptor(struct radv_device *device,

 	if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
 		const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
-		const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1};
-
-		switch (vk_format) {
-		case VK_FORMAT_X8_D24_UNORM_PACK32:
-		case VK_FORMAT_D24_UNORM_S8_UINT:
-		case VK_FORMAT_D32_SFLOAT_S8_UINT:
-			vk_format_compose_swizzles(mapping, swizzle_yyyy, swizzle);
-			break;
-		default:
-			vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
-		}
+		vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
 	} else {
 		vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
 	}
@@ -520,6 +510,7 @@ radv_image_alloc_fmask(struct radv_device *device,

 	image->fmask.offset = align64(image->size, image->fmask.alignment);
 	image->size = image->fmask.offset + image->fmask.size;
+	image->alignment = MAX2(image->alignment, image->fmask.alignment);
 }

 static void
@@ -585,6 +576,7 @@ radv_image_alloc_cmask(struct radv_device *device,
 	/* + 8 for storing the clear values */
 	image->clear_value_offset = image->cmask.offset + image->cmask.size;
 	image->size = image->cmask.offset + image->cmask.size + 8;
+	image->alignment = MAX2(image->alignment, image->cmask.alignment);
 }

 static void
@@ -595,6 +587,7 @@ radv_image_alloc_dcc(struct radv_device *device,
 	/* + 8 for storing the clear values */
 	image->clear_value_offset = image->dcc_offset + image->surface.dcc_size;
 	image->size = image->dcc_offset + image->surface.dcc_size + 8;
+	image->alignment = MAX2(image->alignment, image->surface.dcc_alignment);
 }

 static unsigned
@@ -666,6 +659,9 @@ radv_image_alloc_htile(struct radv_device *device,
 	if (env_var_as_boolean("RADV_HIZ_DISABLE", false))
 		return;

+	if (image->array_size > 1 || image->levels > 1)
+		return;
+
 	image->htile.size = radv_image_get_htile_size(device, image);

 	if (!image->htile.size)
@@ -775,8 +771,13 @@ radv_image_view_init(struct radv_image_view *iview,
 	iview->vk_format = pCreateInfo->format;
 	iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask;

-	if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
+	if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
 		is_stencil = true;
+		iview->vk_format = vk_format_stencil_only(iview->vk_format);
+	} else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
+		iview->vk_format = vk_format_depth_only(iview->vk_format);
+	}
+
 	iview->extent = (VkExtent3D) {
 		.width  = radv_minify(image->extent.width , range->baseMipLevel),
 		.height = radv_minify(image->extent.height, range->baseMipLevel),
@@ -794,7 +795,7 @@ radv_image_view_init(struct radv_image_view *iview,

 	si_make_texture_descriptor(device, image, false,
 				   iview->type,
-				   pCreateInfo->format,
+				   iview->vk_format,
 				   &pCreateInfo->components,
 				   0, radv_get_levelCount(image, range) - 1,
 				   range->baseArrayLayer,
@@ -836,29 +837,29 @@ void radv_image_set_optimal_micro_tile_mode(struct radv_device *device,
 		switch (micro_tile_mode) {
 		case 0: /* displayable */
 			switch (image->surface.bpe) {
-			case 8:
+			case 1:
                            image->surface.tiling_index[0] = 10;
                            break;
-			case 16:
+			case 2:
                            image->surface.tiling_index[0] = 11;
                            break;
-			default: /* 32, 64 */
+			default: /* 4, 8 */
                            image->surface.tiling_index[0] = 12;
                            break;
 			}
 			break;
 		case 1: /* thin */
 			switch (image->surface.bpe) {
-			case 8:
+			case 1:
                                image->surface.tiling_index[0] = 14;
                                break;
-			case 16:
+			case 2:
                                image->surface.tiling_index[0] = 15;
                                break;
-			case 32:
+			case 4:
                                image->surface.tiling_index[0] = 16;
                                break;
-			default: /* 64, 128 */
+			default: /* 8, 16 */
                                image->surface.tiling_index[0] = 17;
                                break;
 			}
--- a/src/amd/vulkan/radv_meta_blit2d.c
+++ b/src/amd/vulkan/radv_meta_blit2d.c
@@ -26,6 +26,7 @@

 #include "radv_meta.h"
 #include "nir/nir_builder.h"
+#include "vk_format.h"

 enum blit2d_dst_type {
 	/* We can bind this destination as a "normal" render target and render
@@ -284,8 +285,10 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer,

 	for (unsigned r = 0; r < num_rects; ++r) {
 		VkFormat depth_format = 0;
-		if (dst->aspect_mask != VK_IMAGE_ASPECT_COLOR_BIT)
-			depth_format = dst->image->vk_format;
+		if (dst->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
+			depth_format = vk_format_stencil_only(dst->image->vk_format);
+		else if (dst->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT)
+			depth_format = vk_format_depth_only(dst->image->vk_format);
 		struct blit2d_src_temps src_temps;
 		blit2d_bind_src(cmd_buffer, src_img, src_buf, &rects[r], &src_temps, src_type, depth_format);

--- a/src/amd/vulkan/radv_meta_buffer.c
+++ b/src/amd/vulkan/radv_meta_buffer.c
@@ -523,6 +523,8 @@ void radv_CmdUpdateBuffer(
 	assert(!(va & 3));

 	if (dataSize < 4096) {
+		si_emit_cache_flush(cmd_buffer);
+
 		cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, dst_buffer->bo, 8);

 		radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, words + 4);
--- a/src/amd/vulkan/radv_meta_bufimage.c
+++ b/src/amd/vulkan/radv_meta_bufimage.c
@@ -1,6 +1,33 @@
+/*
+ * Copyright © 2016 Red Hat.
+ * Copyright © 2016 Bas Nieuwenhuizen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
 #include "radv_meta.h"
 #include "nir/nir_builder.h"

+/*
+ * Compute shader implementation of image->buffer copy.
+ */
+
 static nir_shader *
 build_nir_itob_compute_shader(struct radv_device *dev)
 {
--- a/src/amd/vulkan/radv_meta_clear.c
+++ b/src/amd/vulkan/radv_meta_clear.c
@@ -998,7 +998,7 @@ radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer,
 		const VkImageSubresourceRange *range = &ranges[r];
 		for (uint32_t l = 0; l < radv_get_levelCount(image, range); ++l) {
 			const uint32_t layer_count = image->type == VK_IMAGE_TYPE_3D ?
-				radv_minify(image->extent.depth, l) :
+				radv_minify(image->extent.depth, range->baseMipLevel + l) :
 				radv_get_layerCount(image, range);
 			for (uint32_t s = 0; s < layer_count; ++s) {
 				struct radv_image_view iview;
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -144,6 +144,7 @@ radv_optimize_nir(struct nir_shader *shader)
                NIR_PASS(progress, shader, nir_opt_algebraic);
                NIR_PASS(progress, shader, nir_opt_constant_folding);
                NIR_PASS(progress, shader, nir_opt_undef);
+                NIR_PASS(progress, shader, nir_opt_conditional_discard);
        } while (progress);
 }

@@ -642,7 +643,8 @@ radv_pipeline_compute_spi_color_formats(struct radv_pipeline *pipeline,
 					const VkGraphicsPipelineCreateInfo *pCreateInfo,
 					uint32_t blend_enable,
 					uint32_t blend_need_alpha,
-					bool single_cb_enable)
+					bool single_cb_enable,
+					bool blend_mrt0_is_dual_src)
 {
 	RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
 	struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
@@ -664,6 +666,8 @@ radv_pipeline_compute_spi_color_formats(struct radv_pipeline *pipeline,

 	blend->cb_shader_mask = si_get_cb_shader_mask(col_format);

+	if (blend_mrt0_is_dual_src)
+		col_format |= (col_format & 0xf) << 4;
 	if (!col_format)
 		col_format |= V_028714_SPI_SHADER_32_R;
 	blend->spi_shader_col_format = col_format;
@@ -715,8 +719,13 @@ radv_pipeline_init_blend_state(struct radv_pipeline *pipeline,
 	struct radv_blend_state *blend = &pipeline->graphics.blend;
 	unsigned mode = V_028808_CB_NORMAL;
 	uint32_t blend_enable = 0, blend_need_alpha = 0;
+	bool blend_mrt0_is_dual_src = false;
 	int i;
 	bool single_cb_enable = false;
+
+	if (!vkblend)
+		return;
+
 	if (extra && extra->custom_blend_mode) {
 		single_cb_enable = true;
 		mode = extra->custom_blend_mode;
@@ -755,7 +764,9 @@ radv_pipeline_init_blend_state(struct radv_pipeline *pipeline,
 		}

 		if (is_dual_src(srcRGB) || is_dual_src(dstRGB) || is_dual_src(srcA) || is_dual_src(dstA))
-			radv_finishme("dual source blending");
+			if (i == 0)
+				blend_mrt0_is_dual_src = true;
+
 		if (eqRGB == VK_BLEND_OP_MIN || eqRGB == VK_BLEND_OP_MAX) {
 			srcRGB = VK_BLEND_FACTOR_ONE;
 			dstRGB = VK_BLEND_FACTOR_ONE;
@@ -797,7 +808,7 @@ radv_pipeline_init_blend_state(struct radv_pipeline *pipeline,
 		blend->cb_color_control |= S_028808_MODE(V_028808_CB_DISABLE);

 	radv_pipeline_compute_spi_color_formats(pipeline, pCreateInfo,
-						blend_enable, blend_need_alpha, single_cb_enable);
+						blend_enable, blend_need_alpha, single_cb_enable, blend_mrt0_is_dual_src);
 }

 static uint32_t si_translate_stencil_op(enum VkStencilOp op)
@@ -1069,18 +1080,27 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,

 	struct radv_dynamic_state *dynamic = &pipeline->dynamic_state;

-	dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount;
-	if (states & (1 << VK_DYNAMIC_STATE_VIEWPORT)) {
-		typed_memcpy(dynamic->viewport.viewports,
-			     pCreateInfo->pViewportState->pViewports,
-			     pCreateInfo->pViewportState->viewportCount);
-	}
+	/* Section 9.2 of the Vulkan 1.0.15 spec says:
+	 *
+	 *    pViewportState is [...] NULL if the pipeline
+	 *    has rasterization disabled.
+	 */
+	if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable) {
+		assert(pCreateInfo->pViewportState);

-	dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount;
-	if (states & (1 << VK_DYNAMIC_STATE_SCISSOR)) {
-		typed_memcpy(dynamic->scissor.scissors,
-			     pCreateInfo->pViewportState->pScissors,
-			     pCreateInfo->pViewportState->scissorCount);
+		dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount;
+		if (states & (1 << VK_DYNAMIC_STATE_VIEWPORT)) {
+			typed_memcpy(dynamic->viewport.viewports,
+				     pCreateInfo->pViewportState->pViewports,
+				     pCreateInfo->pViewportState->viewportCount);
+		}
+
+		dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount;
+		if (states & (1 << VK_DYNAMIC_STATE_SCISSOR)) {
+			typed_memcpy(dynamic->scissor.scissors,
+				     pCreateInfo->pViewportState->pScissors,
+				     pCreateInfo->pViewportState->scissorCount);
+		}
 	}

 	if (states & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) {
@@ -1098,7 +1118,21 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
 			pCreateInfo->pRasterizationState->depthBiasSlopeFactor;
 	}

-	if (states & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) {
+	/* Section 9.2 of the Vulkan 1.0.15 spec says:
+	 *
+	 *    pColorBlendState is [...] NULL if the pipeline has rasterization
+	 *    disabled or if the subpass of the render pass the pipeline is
+	 *    created against does not use any color attachments.
+	 */
+	bool uses_color_att = false;
+	for (unsigned i = 0; i < subpass->color_count; ++i) {
+		if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED) {
+			uses_color_att = true;
+			break;
+		}
+	}
+
+	if (uses_color_att && states & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) {
 		assert(pCreateInfo->pColorBlendState);
 		typed_memcpy(dynamic->blend_constants,
 			     pCreateInfo->pColorBlendState->blendConstants, 4);
@@ -1110,14 +1144,17 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
 	 * no need to override the depthstencil defaults in
 	 * radv_pipeline::dynamic_state when there is no depthstencil attachment.
 	 *
-	 * From the Vulkan spec (20 Oct 2015, git-aa308cb):
+	 * Section 9.2 of the Vulkan 1.0.15 spec says:
 	 *
-	 *    pDepthStencilState [...] may only be NULL if renderPass and subpass
-	 *    specify a subpass that has no depth/stencil attachment.
+	 *    pDepthStencilState is [...] NULL if the pipeline has rasterization
+	 *    disabled or if the subpass of the render pass the pipeline is created
+	 *    against does not use a depth/stencil attachment.
 	 */
-	if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
+	if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
+	    subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
+		assert(pCreateInfo->pDepthStencilState);
+
 		if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) {
-			assert(pCreateInfo->pDepthStencilState);
 			dynamic->depth_bounds.min =
 				pCreateInfo->pDepthStencilState->minDepthBounds;
 			dynamic->depth_bounds.max =
@@ -1125,7 +1162,6 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
 		}

 		if (states & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) {
-			assert(pCreateInfo->pDepthStencilState);
 			dynamic->stencil_compare_mask.front =
 				pCreateInfo->pDepthStencilState->front.compareMask;
 			dynamic->stencil_compare_mask.back =
@@ -1133,7 +1169,6 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
 		}

 		if (states & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) {
-			assert(pCreateInfo->pDepthStencilState);
 			dynamic->stencil_write_mask.front =
 				pCreateInfo->pDepthStencilState->front.writeMask;
 			dynamic->stencil_write_mask.back =
@@ -1141,7 +1176,6 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
 		}

 		if (states & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) {
-			assert(pCreateInfo->pDepthStencilState);
 			dynamic->stencil_reference.front =
 				pCreateInfo->pDepthStencilState->front.reference;
 			dynamic->stencil_reference.back =
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -498,6 +498,7 @@ struct radv_descriptor_pool {
 	int free_list;
 	int full_list;
 	uint32_t max_sets;
+	uint32_t allocated_sets;
 	struct radv_descriptor_pool_free_node free_nodes[];
 };

@@ -1206,6 +1207,13 @@ void radv_initialise_cmask(struct radv_cmd_buffer *cmd_buffer,
 			   struct radv_image *image, uint32_t value);
 void radv_initialize_dcc(struct radv_cmd_buffer *cmd_buffer,
 			 struct radv_image *image, uint32_t value);
+
+struct radv_fence {
+	struct radeon_winsys_fence *fence;
+	bool submitted;
+	bool signalled;
+};
+
 #define RADV_DEFINE_HANDLE_CASTS(__radv_type, __VkType)		\
 								\
 	static inline struct __radv_type *			\
--- a/src/amd/vulkan/radv_query.c
+++ b/src/amd/vulkan/radv_query.c
@@ -131,6 +131,7 @@ VkResult radv_GetQueryPoolResults(
 	VkDeviceSize                                stride,
 	VkQueryResultFlags                          flags)
 {
+	RADV_FROM_HANDLE(radv_device, device, _device);
 	RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
 	char *data = pData;
 	VkResult result = VK_SUCCESS;
@@ -141,23 +142,20 @@ VkResult radv_GetQueryPoolResults(
 		char *src = pool->ptr + query * pool->stride;
 		uint32_t available;

-		if (flags & VK_QUERY_RESULT_WAIT_BIT) {
-			while(!*(volatile uint32_t*)(pool->ptr + pool->availability_offset + 4 * query))
-				;
-		}
-
-		if (!*(uint32_t*)(pool->ptr + pool->availability_offset + 4 * query) &&
-		    !(flags & VK_QUERY_RESULT_PARTIAL_BIT)) {
-			if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
-				*(uint32_t*)dest = 0;
-			result = VK_NOT_READY;
-			continue;
-
-		}
-
-		available = *(uint32_t*)(pool->ptr + pool->availability_offset + 4 * query);
 		switch (pool->type) {
-		case VK_QUERY_TYPE_TIMESTAMP:
+		case VK_QUERY_TYPE_TIMESTAMP: {
+			if (flags & VK_QUERY_RESULT_WAIT_BIT) {
+				while(!*(volatile uint32_t*)(pool->ptr + pool->availability_offset + 4 * query))
+					;
+			}
+
+			available = *(uint32_t*)(pool->ptr + pool->availability_offset + 4 * query);
+			if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT)) {
+				result = VK_NOT_READY;
+				break;
+
+			}
+
 			if (flags & VK_QUERY_RESULT_64_BIT) {
 				*(uint64_t*)dest = *(uint64_t*)src;
 				dest += 8;
@@ -166,8 +164,32 @@ VkResult radv_GetQueryPoolResults(
 				dest += 4;
 			}
 			break;
+		}
 		case VK_QUERY_TYPE_OCCLUSION: {
-			uint64_t result = *(uint64_t*)(src + pool->stride - 16);
+			volatile uint64_t const *src64 = (volatile uint64_t const *)src;
+			uint64_t result = 0;
+			int db_count = get_max_db(device);
+			available = 1;
+
+			for (int i = 0; i < db_count; ++i) {
+				uint64_t start, end;
+				do {
+					start = src64[2 * i];
+					end = src64[2 * i + 1];
+				} while ((!(start & (1ull << 63)) || !(end & (1ull << 63))) && (flags & VK_QUERY_RESULT_WAIT_BIT));
+
+				if (!(start & (1ull << 63)) || !(end & (1ull << 63)))
+					available = 0;
+				else {
+					result += end - start;
+				}
+			}
+
+			if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT)) {
+				result = VK_NOT_READY;
+				break;
+
+			}

 			if (flags & VK_QUERY_RESULT_64_BIT) {
 				*(uint64_t*)dest = result;
@@ -183,8 +205,11 @@ VkResult radv_GetQueryPoolResults(
 		}

 		if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
-			*(uint32_t*)dest = available;
-			dest += 4;
+			if (flags & VK_QUERY_RESULT_64_BIT) {
+				*(uint64_t*)dest = available;
+			} else {
+				*(uint32_t*)dest = available;
+			}
 		}
 	}

@@ -357,11 +382,14 @@ void radv_CmdEndQuery(
 		radeon_emit(cs, va + 8);
 		radeon_emit(cs, (va + 8) >> 32);

-		radeon_emit(cs, PKT3(PKT3_OCCLUSION_QUERY, 3, 0));
-		radeon_emit(cs, va);
-		radeon_emit(cs, va >> 32);
-		radeon_emit(cs, va + pool->stride - 16);
-		radeon_emit(cs, (va + pool->stride - 16) >> 32);
+		/* hangs for VK_COMMAND_BUFFER_LEVEL_SECONDARY. */
+		if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
+			radeon_emit(cs, PKT3(PKT3_OCCLUSION_QUERY, 3, 0));
+			radeon_emit(cs, va);
+			radeon_emit(cs, va >> 32);
+			radeon_emit(cs, va + pool->stride - 16);
+			radeon_emit(cs, (va + pool->stride - 16) >> 32);
+		}

 		break;
 	default:
--- a/src/amd/vulkan/radv_wsi.c
+++ b/src/amd/vulkan/radv_wsi.c
@@ -75,7 +75,7 @@ void radv_DestroySurfaceKHR(
 	const VkAllocationCallbacks*                 pAllocator)
 {
 	RADV_FROM_HANDLE(radv_instance, instance, _instance);
-	RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
+	ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface);

 	vk_free2(&instance->alloc, pAllocator, surface);
 }
@@ -87,7 +87,7 @@ VkResult radv_GetPhysicalDeviceSurfaceSupportKHR(
 	VkBool32*                                   pSupported)
 {
 	RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
-	RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
+	ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface);
 	struct wsi_interface *iface = device->wsi_device.wsi[surface->platform];

 	return iface->get_support(surface, &device->wsi_device,
@@ -101,7 +101,7 @@ VkResult radv_GetPhysicalDeviceSurfaceCapabilitiesKHR(
 	VkSurfaceCapabilitiesKHR*                   pSurfaceCapabilities)
 {
 	RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
-	RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
+	ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface);
 	struct wsi_interface *iface = device->wsi_device.wsi[surface->platform];

 	return iface->get_capabilities(surface, pSurfaceCapabilities);
@@ -114,7 +114,7 @@ VkResult radv_GetPhysicalDeviceSurfaceFormatsKHR(
 	VkSurfaceFormatKHR*                         pSurfaceFormats)
 {
 	RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
-	RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
+	ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface);
 	struct wsi_interface *iface = device->wsi_device.wsi[surface->platform];

 	return iface->get_formats(surface, &device->wsi_device, pSurfaceFormatCount,
@@ -128,7 +128,7 @@ VkResult radv_GetPhysicalDeviceSurfacePresentModesKHR(
 	VkPresentModeKHR*                           pPresentModes)
 {
 	RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
-	RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
+	ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface);
 	struct wsi_interface *iface = device->wsi_device.wsi[surface->platform];

 	return iface->get_present_modes(surface, pPresentModeCount,
@@ -249,7 +249,7 @@ VkResult radv_CreateSwapchainKHR(
 	VkSwapchainKHR*                              pSwapchain)
 {
 	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, pCreateInfo->surface);
+	ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, pCreateInfo->surface);
 	struct wsi_interface *iface =
 		device->instance->physicalDevice.wsi_device.wsi[surface->platform];
 	struct wsi_swapchain *swapchain;
@@ -288,6 +288,9 @@ void radv_DestroySwapchainKHR(
 	RADV_FROM_HANDLE(wsi_swapchain, swapchain, _swapchain);
 	const VkAllocationCallbacks *alloc;

+	if (!_swapchain)
+		return;
+
 	if (pAllocator)
 		alloc = pAllocator;
 	else
@@ -318,13 +321,21 @@ VkResult radv_AcquireNextImageKHR(
 	VkSwapchainKHR                               _swapchain,
 	uint64_t                                     timeout,
 	VkSemaphore                                  semaphore,
-	VkFence                                      fence,
+	VkFence                                      _fence,
 	uint32_t*                                    pImageIndex)
 {
 	RADV_FROM_HANDLE(wsi_swapchain, swapchain, _swapchain);
+	RADV_FROM_HANDLE(radv_fence, fence, _fence);

-	return swapchain->acquire_next_image(swapchain, timeout, semaphore,
-					     pImageIndex);
+	VkResult result = swapchain->acquire_next_image(swapchain, timeout, semaphore,
+	                                                pImageIndex);
+
+	if (fence && result == VK_SUCCESS) {
+		fence->submitted = true;
+		fence->signalled = true;
+	}
+
+	return result;
 }

 VkResult radv_QueuePresentKHR(
--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@@ -371,6 +371,15 @@ void si_init_config(struct radv_physical_device *physical_device,
 	radeon_set_context_reg(cs, R_028408_VGT_INDX_OFFSET, 0);

 	if (physical_device->rad_info.chip_class >= CIK) {
+		/* If this is 0, Bonaire can hang even if GS isn't being used.
+		 * Other chips are unaffected. These are suboptimal values,
+		 * but we don't use on-chip GS.
+		 */
+		radeon_set_context_reg(cs, R_028A44_VGT_GS_ONCHIP_CNTL,
+				       S_028A44_ES_VERTS_PER_SUBGRP(64) |
+				       S_028A44_GS_PRIMS_PER_SUBGRP(4));
+
+		radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff));
 		radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0);
 		radeon_set_sh_reg(cs, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff));
 		radeon_set_sh_reg(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff));
@@ -383,7 +392,6 @@ void si_init_config(struct radv_physical_device *physical_device,
 			 *
 			 * LATE_ALLOC_VS = 2 is the highest safe number.
 			 */
-			radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff));
 			radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff));
 			radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(2));
 		} else {
@@ -392,7 +400,6 @@ void si_init_config(struct radv_physical_device *physical_device,
 			 * - VS can't execute on CU0.
 			 * - If HS writes outputs to LDS, LS can't execute on CU0.
 			 */
-			radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xfffe));
 			radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xfffe));
 			radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(31));
 		}
@@ -846,6 +853,7 @@ void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
 	uint64_t main_src_va, main_dest_va;
 	uint64_t skipped_size = 0, realign_size = 0;

+	si_emit_cache_flush(cmd_buffer);

 	if (cmd_buffer->device->instance->physicalDevice.rad_info.family <= CHIP_CARRIZO ||
 	    cmd_buffer->device->instance->physicalDevice.rad_info.family == CHIP_STONEY) {
@@ -909,6 +917,8 @@ void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va,

 	assert(va % 4 == 0 && size % 4 == 0);

+	si_emit_cache_flush(cmd_buffer);
+
 	while (size) {
 		unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
 		unsigned dma_flags = 0;
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c
@@ -274,6 +274,19 @@ static void radv_set_micro_tile_mode(struct radeon_surf *surf,
 		surf->micro_tile_mode = G_009910_MICRO_TILE_MODE(tile_mode);
 }

+static unsigned cik_get_macro_tile_index(struct radeon_surf *surf)
+{
+	unsigned index, tileb;
+
+	tileb = 8 * 8 * surf->bpe;
+	tileb = MIN2(surf->tile_split, tileb);
+
+	for (index = 0; tileb > 64; index++)
+		tileb >>= 1;
+
+	assert(index < 16);
+	return index;
+}

 static int radv_amdgpu_winsys_surface_init(struct radeon_winsys *_ws,
 					   struct radeon_surf *surf)
@@ -435,6 +448,7 @@ static int radv_amdgpu_winsys_surface_init(struct radeon_winsys *_ws,
 				AddrSurfInfoIn.tileIndex = 10; /* 2D displayable */
 			else
 				AddrSurfInfoIn.tileIndex = 14; /* 2D non-displayable */
+			AddrSurfInfoOut.macroModeIndex = cik_get_macro_tile_index(surf);
 		}
 	}

--- a/src/compiler/Makefile.glsl.am
+++ b/src/compiler/Makefile.glsl.am
@@ -62,10 +62,14 @@ glsl_tests_blob_test_LDADD =				\

 glsl_tests_cache_test_SOURCES =				\
 	glsl/tests/cache_test.c
+glsl_tests_cache_test_CFLAGS =				\
+	$(PTHREAD_CFLAGS)
 glsl_tests_cache_test_LDADD =				\
-	glsl/libglsl.la
+	glsl/libglsl.la					\
+	$(PTHREAD_LIBS)

 glsl_tests_general_ir_test_SOURCES =			\
+	glsl/tests/array_refcount_test.cpp 		\
 	glsl/tests/builtin_variable_test.cpp		\
 	glsl/tests/invalidate_locations_test.cpp	\
 	glsl/tests/general_ir_test.cpp			\
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -28,6 +28,8 @@ LIBGLSL_FILES = \
 	glsl/glsl_to_nir.cpp \
 	glsl/glsl_to_nir.h \
 	glsl/hir_field_selection.cpp \
+	glsl/ir_array_refcount.cpp \
+	glsl/ir_array_refcount.h \
 	glsl/ir_basic_block.cpp \
 	glsl/ir_basic_block.h \
 	glsl/ir_builder.cpp \
@@ -227,6 +229,7 @@ NIR_FILES = \
 	nir/nir_metadata.c \
 	nir/nir_move_vec_src_uses_to_dest.c \
 	nir/nir_normalize_cubemap_coords.c \
+	nir/nir_opt_conditional_discard.c \
 	nir/nir_opt_constant_folding.c \
 	nir/nir_opt_copy_propagate.c \
 	nir/nir_opt_cse.c \
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -4330,6 +4330,8 @@ handle_tess_ctrl_shader_output_decl(struct _mesa_glsl_parse_state *state,
   if (var->data.patch)
      return;

+   var->data.tess_varying_implicit_sized_array = var->type->is_unsized_array();
+
   validate_layout_qualifier_vertex_count(state, loc, var, num_vertices,
                                          &state->tcs_output_size,
                                          "tessellation control shader output");
@@ -4366,6 +4368,7 @@ handle_tess_shader_input_decl(struct _mesa_glsl_parse_state *state,
   if (var->type->is_unsized_array()) {
      var->type = glsl_type::get_array_instance(var->type->fields.array,
            state->Const.MaxPatchVertices);
+      var->data.tess_varying_implicit_sized_array = true;
   } else if (var->type->length != state->Const.MaxPatchVertices) {
      _mesa_glsl_error(&loc, state,
                       "per-vertex tessellation shader input arrays must be "
@@ -5156,11 +5159,13 @@ ast_declarator_list::hir(exec_list *instructions,
          *     sized by an earlier input primitive layout qualifier, when
          *     present, as per the following table."
          */
+         const enum ir_variable_mode mode = (const enum ir_variable_mode)
+            (earlier == NULL ? var->data.mode : earlier->data.mode);
         const bool implicitly_sized =
-            (var->data.mode == ir_var_shader_in &&
+            (mode == ir_var_shader_in &&
             state->stage >= MESA_SHADER_TESS_CTRL &&
             state->stage <= MESA_SHADER_GEOMETRY) ||
-            (var->data.mode == ir_var_shader_out &&
+            (mode == ir_var_shader_out &&
             state->stage == MESA_SHADER_TESS_CTRL);

         if (t->is_unsized_array() && !implicitly_sized)
@@ -7795,10 +7800,9 @@ ast_interface_block::hir(exec_list *instructions,
         }

         if (var->type->is_unsized_array()) {
-            if (var->is_in_shader_storage_block()) {
-               if (is_unsized_array_last_element(var)) {
-                  var->data.from_ssbo_unsized_array = true;
-               }
+            if (var->is_in_shader_storage_block() &&
+                is_unsized_array_last_element(var)) {
+               var->data.from_ssbo_unsized_array = true;
            } else {
               /* From GLSL ES 3.10 spec, section 4.1.9 "Arrays":
                *
@@ -7806,6 +7810,10 @@ ast_interface_block::hir(exec_list *instructions,
                * block and the size is not specified at compile-time, it is
                * sized at run-time. In all other cases, arrays are sized only
                * at compile-time."
+                *
+                * In desktop GLSL it is allowed to have unsized-arrays that are
+                * not last, as long as we can determine that they are implicitly
+                * sized.
                */
               if (state->es_shader) {
                  _mesa_glsl_error(&loc, state, "unsized array `%s' "
--- a/src/compiler/glsl/builtin_functions.cpp
+++ b/src/compiler/glsl/builtin_functions.cpp
@@ -537,6 +537,12 @@ compute_shader(const _mesa_glsl_parse_state *state)
   return state->stage == MESA_SHADER_COMPUTE;
 }

+static bool
+compute_shader_supported(const _mesa_glsl_parse_state *state)
+{
+   return state->has_compute_shader();
+}
+
 static bool
 buffer_atomics_supported(const _mesa_glsl_parse_state *state)
 {
@@ -1098,15 +1104,15 @@ builtin_builder::create_intrinsics()
                                          ir_intrinsic_group_memory_barrier),
                NULL);
   add_function("__intrinsic_memory_barrier_atomic_counter",
-                _memory_barrier_intrinsic(compute_shader,
+                _memory_barrier_intrinsic(compute_shader_supported,
                                          ir_intrinsic_memory_barrier_atomic_counter),
                NULL);
   add_function("__intrinsic_memory_barrier_buffer",
-                _memory_barrier_intrinsic(compute_shader,
+                _memory_barrier_intrinsic(compute_shader_supported,
                                          ir_intrinsic_memory_barrier_buffer),
                NULL);
   add_function("__intrinsic_memory_barrier_image",
-                _memory_barrier_intrinsic(compute_shader,
+                _memory_barrier_intrinsic(compute_shader_supported,
                                          ir_intrinsic_memory_barrier_image),
                NULL);
   add_function("__intrinsic_memory_barrier_shared",
@@ -2967,15 +2973,15 @@ builtin_builder::create_builtins()
                NULL);
   add_function("memoryBarrierAtomicCounter",
                _memory_barrier("__intrinsic_memory_barrier_atomic_counter",
-                                compute_shader),
+                                compute_shader_supported),
                NULL);
   add_function("memoryBarrierBuffer",
                _memory_barrier("__intrinsic_memory_barrier_buffer",
-                                compute_shader),
+                                compute_shader_supported),
                NULL);
   add_function("memoryBarrierImage",
                _memory_barrier("__intrinsic_memory_barrier_image",
-                                compute_shader),
+                                compute_shader_supported),
                NULL);
   add_function("memoryBarrierShared",
                _memory_barrier("__intrinsic_memory_barrier_shared",
@@ -3563,9 +3569,17 @@ builtin_builder::_tanh(const glsl_type *type)
   ir_variable *x = in_var(type, "x");
   MAKE_SIG(type, v130, 1, x);

+   /* Clamp x to [-10, +10] to avoid precision problems.
+    * When x > 10, e^(-x) is so small relative to e^x that it gets flushed to
+    * zero in the computation e^x + e^(-x). The same happens in the other
+    * direction when x < -10.
+    */
+   ir_variable *t = body.make_temp(type, "tmp");
+   body.emit(assign(t, min2(max2(x, imm(-10.0f)), imm(10.0f))));
+
   /* (e^x - e^(-x)) / (e^x + e^(-x)) */
-   body.emit(ret(div(sub(exp(x), exp(neg(x))),
-                     add(exp(x), exp(neg(x))))));
+   body.emit(ret(div(sub(exp(t), exp(neg(t))),
+                     add(exp(t), exp(neg(t))))));

   return sig;
 }
--- a/src/compiler/glsl/cache.c
+++ b/src/compiler/glsl/cache.c
@@ -612,19 +612,18 @@ cache_put(struct program_cache *cache,

   p_atomic_add(cache->size, size);

+ done:
+   if (fd_final != -1)
+      close(fd_final);
   /* This close finally releases the flock, (now that the final dile
    * has been renamed into place and the size has been added).
    */
-   close(fd);
-   fd = -1;
-
- done:
+   if (fd != -1)
+      close(fd);
   if (filename_tmp)
      ralloc_free(filename_tmp);
   if (filename)
      ralloc_free(filename);
-   if (fd != -1)
-      close(fd);
 }

 void *
--- a/src/compiler/glsl/glcpp/glcpp-parse.y
+++ b/src/compiler/glsl/glcpp/glcpp-parse.y
@@ -176,7 +176,7 @@ add_builtin_define(glcpp_parser_t *parser, const char *name, int value);
         * (such as the <HASH> and <DEFINE> start conditions in the lexer). */
 %token DEFINED ELIF_EXPANDED HASH_TOKEN DEFINE_TOKEN FUNC_IDENTIFIER OBJ_IDENTIFIER ELIF ELSE ENDIF ERROR_TOKEN IF IFDEF IFNDEF LINE PRAGMA UNDEF VERSION_TOKEN GARBAGE IDENTIFIER IF_EXPANDED INTEGER INTEGER_STRING LINE_EXPANDED NEWLINE OTHER PLACEHOLDER SPACE PLUS_PLUS MINUS_MINUS
 %token PASTE
-%type <ival> INTEGER operator SPACE integer_constant
+%type <ival> INTEGER operator SPACE integer_constant version_constant
 %type <expression_value> expression
 %type <str> IDENTIFIER FUNC_IDENTIFIER OBJ_IDENTIFIER INTEGER_STRING OTHER ERROR_TOKEN PRAGMA
 %type <string_list> identifier_list
@@ -424,14 +424,14 @@ control_line_success:
 |	HASH_TOKEN ENDIF {
 		_glcpp_parser_skip_stack_pop (parser, & @1);
 	} NEWLINE
-|	HASH_TOKEN VERSION_TOKEN integer_constant NEWLINE {
-		if (parser->version != 0) {
+|	HASH_TOKEN VERSION_TOKEN version_constant NEWLINE {
+		if (parser->version_set) {
 			glcpp_error(& @1, parser, "#version must appear on the first line");
 		}
 		_glcpp_parser_handle_version_declaration(parser, $3, NULL, true);
 	}
-|	HASH_TOKEN VERSION_TOKEN integer_constant IDENTIFIER NEWLINE {
-		if (parser->version != 0) {
+|	HASH_TOKEN VERSION_TOKEN version_constant IDENTIFIER NEWLINE {
+		if (parser->version_set) {
 			glcpp_error(& @1, parser, "#version must appear on the first line");
 		}
 		_glcpp_parser_handle_version_declaration(parser, $3, $4, true);
@@ -470,6 +470,17 @@ integer_constant:
 		$$ = $1;
 	}

+version_constant:
+	INTEGER_STRING {
+	   /* Both octal and hexadecimal constants begin with 0. */
+	   if ($1[0] == '0' && $1[1] != '\0') {
+		glcpp_error(&@1, parser, "invalid #version \"%s\" (not a decimal constant)", $1);
+		$$ = 0;
+	   } else {
+		$$ = strtoll($1, NULL, 10);
+	   }
+	}
+
 expression:
 	integer_constant {
 		$$.value = $1;
@@ -1376,6 +1387,7 @@ glcpp_parser_create(glcpp_extension_iterator extensions, void *state, gl_api api
   parser->state = state;
   parser->api = api;
   parser->version = 0;
+   parser->version_set = false;

   parser->has_new_line_number = 0;
   parser->new_line_number = 1;
@@ -2318,10 +2330,11 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
                                         const char *es_identifier,
                                         bool explicitly_set)
 {
-   if (parser->version != 0)
+   if (parser->version_set)
      return;

   parser->version = version;
+   parser->version_set = true;

   add_builtin_define (parser, "__VERSION__", version);

--- a/src/compiler/glsl/glcpp/glcpp.h
+++ b/src/compiler/glsl/glcpp/glcpp.h
@@ -207,6 +207,15 @@ struct glcpp_parser {
 	void *state;
 	gl_api api;
 	unsigned version;
+
+	/**
+	 * Has the #version been set?
+	 *
+	 * A separate flag is used because any possible sentinel value in
+	 * \c ::version could also be set by a #version line.
+	 */
+	bool version_set;
+
 	bool has_new_line_number;
 	int new_line_number;
 	bool has_new_source_number;
--- a/src/compiler/glsl/glsl_lexer.ll
+++ b/src/compiler/glsl/glsl_lexer.ll
@@ -253,6 +253,10 @@ HASH		^{SPC}#{SPC}
 				    yylval->n = strtol(yytext, NULL, 10);
 				    return INTCONSTANT;
 				}
+<PP>0				{
+				    yylval->n = 0;
+				    return INTCONSTANT;
+				}
 <PP>\n				{ BEGIN 0; yylineno++; yycolumn = 0; return EOL; }
 <PP>.				{ return yytext[0]; }

--- a/src/compiler/glsl/glsl_symbol_table.cpp
+++ b/src/compiler/glsl/glsl_symbol_table.cpp
@@ -126,7 +126,7 @@ void glsl_symbol_table::pop_scope()

 bool glsl_symbol_table::name_declared_this_scope(const char *name)
 {
-   return _mesa_symbol_table_symbol_scope(table, -1, name) == 0;
+   return _mesa_symbol_table_symbol_scope(table, name) == 0;
 }

 bool glsl_symbol_table::add_variable(ir_variable *v)
@@ -152,7 +152,7 @@ bool glsl_symbol_table::add_variable(ir_variable *v)
 	 symbol_table_entry *entry = new(mem_ctx) symbol_table_entry(v);
 	 if (existing != NULL)
 	    entry->f = existing->f;
-	 int added = _mesa_symbol_table_add_symbol(table, -1, v->name, entry);
+	 int added = _mesa_symbol_table_add_symbol(table, v->name, entry);
 	 assert(added == 0);
 	 (void)added;
 	 return true;
@@ -162,13 +162,13 @@ bool glsl_symbol_table::add_variable(ir_variable *v)

   /* 1.20+ rules: */
   symbol_table_entry *entry = new(mem_ctx) symbol_table_entry(v);
-   return _mesa_symbol_table_add_symbol(table, -1, v->name, entry) == 0;
+   return _mesa_symbol_table_add_symbol(table, v->name, entry) == 0;
 }

 bool glsl_symbol_table::add_type(const char *name, const glsl_type *t)
 {
   symbol_table_entry *entry = new(mem_ctx) symbol_table_entry(t);
-   return _mesa_symbol_table_add_symbol(table, -1, name, entry) == 0;
+   return _mesa_symbol_table_add_symbol(table, name, entry) == 0;
 }

 bool glsl_symbol_table::add_interface(const char *name, const glsl_type *i,
@@ -180,7 +180,7 @@ bool glsl_symbol_table::add_interface(const char *name, const glsl_type *i,
      symbol_table_entry *entry =
         new(mem_ctx) symbol_table_entry(i, mode);
      bool add_interface_symbol_result =
-         _mesa_symbol_table_add_symbol(table, -1, name, entry) == 0;
+         _mesa_symbol_table_add_symbol(table, name, entry) == 0;
      assert(add_interface_symbol_result);
      return add_interface_symbol_result;
   } else {
@@ -199,7 +199,7 @@ bool glsl_symbol_table::add_function(ir_function *f)
      }
   }
   symbol_table_entry *entry = new(mem_ctx) symbol_table_entry(f);
-   return _mesa_symbol_table_add_symbol(table, -1, f->name, entry) == 0;
+   return _mesa_symbol_table_add_symbol(table, f->name, entry) == 0;
 }

 bool glsl_symbol_table::add_default_precision_qualifier(const char *type_name,
@@ -213,13 +213,16 @@ bool glsl_symbol_table::add_default_precision_qualifier(const char *type_name,
   symbol_table_entry *entry =
      new(mem_ctx) symbol_table_entry(default_specifier);

-   return _mesa_symbol_table_add_symbol(table, -1, name, entry) == 0;
+   if (!get_entry(name))
+      return _mesa_symbol_table_add_symbol(table, name, entry) == 0;
+
+   return _mesa_symbol_table_replace_symbol(table, name, entry) == 0;
 }

 void glsl_symbol_table::add_global_function(ir_function *f)
 {
   symbol_table_entry *entry = new(mem_ctx) symbol_table_entry(f);
-   int added = _mesa_symbol_table_add_global_symbol(table, -1, f->name, entry);
+   int added = _mesa_symbol_table_add_global_symbol(table, f->name, entry);
   assert(added == 0);
   (void)added;
 }
@@ -261,7 +264,7 @@ int glsl_symbol_table::get_default_precision_qualifier(const char *type_name)
 symbol_table_entry *glsl_symbol_table::get_entry(const char *name)
 {
   return (symbol_table_entry *)
-      _mesa_symbol_table_find_symbol(table, -1, name);
+      _mesa_symbol_table_find_symbol(table, name);
 }

 void
--- a/src/compiler/glsl/ir.h
+++ b/src/compiler/glsl/ir.h
@@ -832,6 +832,12 @@ public:

      unsigned implicit_sized_array:1;

+      /**
+       * Is this a non-patch TCS output / TES input array that was implicitly
+       * sized to gl_MaxPatchVertices?
+       */
+      unsigned tess_varying_implicit_sized_array:1;
+
      /**
       * Whether this is a fragment shader output implicitly initialized with
       * the previous contents of the specified render target at the
--- a/src/compiler/glsl/ir_array_refcount.cpp
+++ b/src/compiler/glsl/ir_array_refcount.cpp
@@ -0,0 +1,254 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file ir_array_refcount.cpp
+ *
+ * Provides a visitor which produces a list of variables referenced.
+ */
+
+#include "ir.h"
+#include "ir_visitor.h"
+#include "ir_array_refcount.h"
+#include "compiler/glsl_types.h"
+#include "util/hash_table.h"
+
+ir_array_refcount_visitor::ir_array_refcount_visitor()
+   : last_array_deref(0), derefs(0), num_derefs(0), derefs_size(0)
+{
+   this->mem_ctx = ralloc_context(NULL);
+   this->ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                                      _mesa_key_pointer_equal);
+}
+
+static void
+free_entry(struct hash_entry *entry)
+{
+   ir_array_refcount_entry *ivre = (ir_array_refcount_entry *) entry->data;
+   delete ivre;
+}
+
+ir_array_refcount_visitor::~ir_array_refcount_visitor()
+{
+   ralloc_free(this->mem_ctx);
+   _mesa_hash_table_destroy(this->ht, free_entry);
+}
+
+ir_array_refcount_entry::ir_array_refcount_entry(ir_variable *var)
+   : var(var), is_referenced(false)
+{
+   num_bits = MAX2(1, var->type->arrays_of_arrays_size());
+   bits = new BITSET_WORD[BITSET_WORDS(num_bits)];
+   memset(bits, 0, BITSET_WORDS(num_bits) * sizeof(bits[0]));
+
+   /* Count the "depth" of the arrays-of-arrays. */
+   array_depth = 0;
+   for (const glsl_type *type = var->type;
+        type->is_array();
+        type = type->fields.array) {
+      array_depth++;
+   }
+}
+
+
+ir_array_refcount_entry::~ir_array_refcount_entry()
+{
+   delete [] bits;
+}
+
+
+void
+ir_array_refcount_entry::mark_array_elements_referenced(const array_deref_range *dr,
+                                                        unsigned count)
+{
+   if (count != array_depth)
+      return;
+
+   mark_array_elements_referenced(dr, count, 1, 0);
+}
+
+void
+ir_array_refcount_entry::mark_array_elements_referenced(const array_deref_range *dr,
+                                                        unsigned count,
+                                                        unsigned scale,
+                                                        unsigned linearized_index)
+{
+   /* Walk through the list of array dereferences in least- to
+    * most-significant order.  Along the way, accumulate the current
+    * linearized offset and the scale factor for each array-of-.
+    */
+   for (unsigned i = 0; i < count; i++) {
+      if (dr[i].index < dr[i].size) {
+         linearized_index += dr[i].index * scale;
+         scale *= dr[i].size;
+      } else {
+         /* For each element in the current array, update the count and
+          * offset, then recurse to process the remaining arrays.
+          *
+          * There is some inefficency here if the last element in the
+          * array_deref_range list specifies the entire array.  In that case,
+          * the loop will make recursive calls with count == 0.  In the call,
+          * all that will happen is the bit will be set.
+          */
+         for (unsigned j = 0; j < dr[i].size; j++) {
+            mark_array_elements_referenced(&dr[i + 1],
+                                           count - (i + 1),
+                                           scale * dr[i].size,
+                                           linearized_index + (j * scale));
+         }
+
+         return;
+      }
+   }
+
+   BITSET_SET(bits, linearized_index);
+}
+
+ir_array_refcount_entry *
+ir_array_refcount_visitor::get_variable_entry(ir_variable *var)
+{
+   assert(var);
+
+   struct hash_entry *e = _mesa_hash_table_search(this->ht, var);
+   if (e)
+      return (ir_array_refcount_entry *)e->data;
+
+   ir_array_refcount_entry *entry = new ir_array_refcount_entry(var);
+   _mesa_hash_table_insert(this->ht, var, entry);
+
+   return entry;
+}
+
+
+array_deref_range *
+ir_array_refcount_visitor::get_array_deref()
+{
+   if ((num_derefs + 1) * sizeof(array_deref_range) > derefs_size) {
+      void *ptr = reralloc_size(mem_ctx, derefs, derefs_size + 4096);
+
+      if (ptr == NULL)
+         return NULL;
+
+      derefs_size += 4096;
+      derefs = (array_deref_range *)ptr;
+   }
+
+   array_deref_range *d = &derefs[num_derefs];
+   num_derefs++;
+
+   return d;
+}
+
+ir_visitor_status
+ir_array_refcount_visitor::visit_enter(ir_dereference_array *ir)
+{
+   /* It could also be a vector or a matrix.  Individual elements of vectors
+    * are natrices are not tracked, so bail.
+    */
+   if (!ir->array->type->is_array())
+      return visit_continue;
+
+   /* If this array dereference is a child of an array dereference that was
+    * already visited, just continue on.  Otherwise, for an arrays-of-arrays
+    * dereference like x[1][2][3][4], we'd process the [1][2][3][4] sequence,
+    * the [1][2][3] sequence, the [1][2] sequence, and the [1] sequence.  This
+    * ensures that we only process the full sequence.
+    */
+   if (last_array_deref && last_array_deref->array == ir) {
+      last_array_deref = ir;
+      return visit_continue;
+   }
+
+   last_array_deref = ir;
+
+   num_derefs = 0;
+
+   ir_rvalue *rv = ir;
+   while (rv->ir_type == ir_type_dereference_array) {
+      ir_dereference_array *const deref = rv->as_dereference_array();
+
+      assert(deref != NULL);
+      assert(deref->array->type->is_array());
+
+      ir_rvalue *const array = deref->array;
+      const ir_constant *const idx = deref->array_index->as_constant();
+      array_deref_range *const dr = get_array_deref();
+
+      dr->size = array->type->array_size();
+
+      if (idx != NULL) {
+         dr->index = idx->get_int_component(0);
+      } else {
+         /* An unsized array can occur at the end of an SSBO.  We can't track
+          * accesses to such an array, so bail.
+          */
+         if (array->type->array_size() == 0)
+            return visit_continue;
+
+         dr->index = dr->size;
+      }
+
+      rv = array;
+   }
+
+   ir_dereference_variable *const var_deref = rv->as_dereference_variable();
+
+   /* If the array being dereferenced is not a variable, bail.  At the very
+    * least, ir_constant and ir_dereference_record are possible.
+    */
+   if (var_deref == NULL)
+      return visit_continue;
+
+   ir_array_refcount_entry *const entry =
+      this->get_variable_entry(var_deref->var);
+
+   if (entry == NULL)
+      return visit_stop;
+
+   entry->mark_array_elements_referenced(derefs, num_derefs);
+
+   return visit_continue;
+}
+
+
+ir_visitor_status
+ir_array_refcount_visitor::visit(ir_dereference_variable *ir)
+{
+   ir_variable *const var = ir->variable_referenced();
+   ir_array_refcount_entry *entry = this->get_variable_entry(var);
+
+   entry->is_referenced = true;
+
+   return visit_continue;
+}
+
+
+ir_visitor_status
+ir_array_refcount_visitor::visit_enter(ir_function_signature *ir)
+{
+   /* We don't want to descend into the function parameters and
+    * dead-code eliminate them, so just accept the body here.
+    */
+   visit_list_elements(this, &ir->body);
+   return visit_continue_with_parent;
+}
--- a/src/compiler/glsl/ir_array_refcount.h
+++ b/src/compiler/glsl/ir_array_refcount.h
@@ -0,0 +1,183 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file ir_array_refcount.h
+ *
+ * Provides a visitor which produces a list of variables referenced.
+ */
+
+#include "ir.h"
+#include "ir_visitor.h"
+#include "compiler/glsl_types.h"
+#include "util/bitset.h"
+
+/**
+ * Describes an access of an array element or an access of the whole array
+ */
+struct array_deref_range {
+   /**
+    * Index that was accessed.
+    *
+    * All valid array indices are less than the size of the array.  If index
+    * is equal to the size of the array, this means the entire array has been
+    * accessed (e.g., due to use of a non-constant index).
+    */
+   unsigned index;
+
+   /** Size of the array.  Used for offset calculations. */
+   unsigned size;
+};
+
+class ir_array_refcount_entry
+{
+public:
+   ir_array_refcount_entry(ir_variable *var);
+   ~ir_array_refcount_entry();
+
+   ir_variable *var; /* The key: the variable's pointer. */
+
+   /** Has the variable been referenced? */
+   bool is_referenced;
+
+   /**
+    * Mark a set of array elements as accessed.
+    *
+    * If every \c array_deref_range is for a single index, only a single
+    * element will be marked.  If any \c array_deref_range is for an entire
+    * array-of-, then multiple elements will be marked.
+    *
+    * Items in the \c array_deref_range list appear in least- to
+    * most-significant order.  This is the \b opposite order the indices
+    * appear in the GLSL shader text.  An array access like
+    *
+    *     x = y[1][i][3];
+    *
+    * would appear as
+    *
+    *     { { 3, n }, { m, m }, { 1, p } }
+    *
+    * where n, m, and p are the sizes of the arrays-of-arrays.
+    *
+    * The set of marked array elements can later be queried by
+    * \c ::is_linearized_index_referenced.
+    *
+    * \param dr     List of array_deref_range elements to be processed.
+    * \param count  Number of array_deref_range elements to be processed.
+    */
+   void mark_array_elements_referenced(const array_deref_range *dr,
+                                       unsigned count);
+
+   /** Has a linearized array index been referenced? */
+   bool is_linearized_index_referenced(unsigned linearized_index) const
+   {
+      assert(bits != 0);
+      assert(linearized_index <= num_bits);
+
+      return BITSET_TEST(bits, linearized_index);
+   }
+
+private:
+   /** Set of bit-flags to note which array elements have been accessed. */
+   BITSET_WORD *bits;
+
+   /**
+    * Total number of bits referenced by \c bits.
+    *
+    * Also the total number of array(s-of-arrays) elements of \c var.
+    */
+   unsigned num_bits;
+
+   /** Count of nested arrays in the type. */
+   unsigned array_depth;
+
+   /**
+    * Recursive part of the public mark_array_elements_referenced method.
+    *
+    * The recursion occurs when an entire array-of- is accessed.  See the
+    * implementation for more details.
+    *
+    * \param dr                List of array_deref_range elements to be
+    *                          processed.
+    * \param count             Number of array_deref_range elements to be
+    *                          processed.
+    * \param scale             Current offset scale.
+    * \param linearized_index  Current accumulated linearized array index.
+    */
+   void mark_array_elements_referenced(const array_deref_range *dr,
+                                       unsigned count,
+                                       unsigned scale,
+                                       unsigned linearized_index);
+
+   friend class array_refcount_test;
+};
+
+class ir_array_refcount_visitor : public ir_hierarchical_visitor {
+public:
+   ir_array_refcount_visitor(void);
+   ~ir_array_refcount_visitor(void);
+
+   virtual ir_visitor_status visit(ir_dereference_variable *);
+
+   virtual ir_visitor_status visit_enter(ir_function_signature *);
+   virtual ir_visitor_status visit_enter(ir_dereference_array *);
+
+   /**
+    * Find variable in the hash table, and insert it if not present
+    */
+   ir_array_refcount_entry *get_variable_entry(ir_variable *var);
+
+   /**
+    * Hash table mapping ir_variable to ir_array_refcount_entry.
+    */
+   struct hash_table *ht;
+
+   void *mem_ctx;
+
+private:
+   /** Get an array_deref_range element from private tracking. */
+   array_deref_range *get_array_deref();
+
+   /**
+    * Last ir_dereference_array that was visited
+    *
+    * Used to prevent some redundant calculations.
+    *
+    * \sa ::visit_enter(ir_dereference_array *)
+    */
+   ir_dereference_array *last_array_deref;
+
+   /**
+    * \name array_deref_range tracking
+    */
+   /*@{*/
+   /** Currently allocated block of derefs. */
+   array_deref_range *derefs;
+
+   /** Number of derefs used in current processing. */
+   unsigned num_derefs;
+
+   /** Size of the derefs buffer in bytes. */
+   unsigned derefs_size;
+   /*@}*/
+};
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -30,7 +30,7 @@

 /* Operations for lower_instructions() */
 #define SUB_TO_ADD_NEG     0x01
-#define DIV_TO_MUL_RCP     0x02
+#define FDIV_TO_MUL_RCP    0x02
 #define EXP_TO_EXP2        0x04
 #define POW_TO_EXP2        0x08
 #define LOG_TO_LOG2        0x10
@@ -49,6 +49,8 @@
 #define FIND_LSB_TO_FLOAT_CAST    0x20000
 #define FIND_MSB_TO_FLOAT_CAST    0x40000
 #define IMUL_HIGH_TO_MUL          0x80000
+#define DDIV_TO_MUL_RCP           0x100000
+#define DIV_TO_MUL_RCP            (FDIV_TO_MUL_RCP | DDIV_TO_MUL_RCP)

 /**
 * \see class lower_packing_builtins_visitor
--- a/src/compiler/glsl/ir_print_visitor.cpp
+++ b/src/compiler/glsl/ir_print_visitor.cpp
@@ -130,14 +130,14 @@ ir_print_visitor::unique_name(ir_variable *var)

   /* If there's no conflict, just use the original name */
   const char* name = NULL;
-   if (_mesa_symbol_table_find_symbol(this->symbols, -1, var->name) == NULL) {
+   if (_mesa_symbol_table_find_symbol(this->symbols, var->name) == NULL) {
      name = var->name;
   } else {
      static unsigned i = 1;
      name = ralloc_asprintf(this->mem_ctx, "%s@%u", var->name, ++i);
   }
   _mesa_hash_table_insert(this->printable_names, var, (void *) name);
-   _mesa_symbol_table_add_symbol(this->symbols, -1, name, var);
+   _mesa_symbol_table_add_symbol(this->symbols, name, var);
   return name;
 }

--- a/src/compiler/glsl/link_uniform_blocks.cpp
+++ b/src/compiler/glsl/link_uniform_blocks.cpp
@@ -214,67 +214,98 @@ struct block {
   bool has_instance_name;
 };

+static void process_block_array_leaf(char **name, gl_uniform_block *blocks,
+                                     ubo_visitor *parcel,
+                                     gl_uniform_buffer_variable *variables,
+                                     const struct link_uniform_block_active *const b,
+                                     unsigned *block_index,
+                                     unsigned *binding_offset,
+                                     unsigned linearized_index,
+                                     struct gl_context *ctx,
+                                     struct gl_shader_program *prog);
+
+/**
+ *
+ * \param first_index Value of \c block_index for the first element of the
+ *                    array.
+ */
 static void
 process_block_array(struct uniform_block_array_elements *ub_array, char **name,
                    size_t name_length, gl_uniform_block *blocks,
                    ubo_visitor *parcel, gl_uniform_buffer_variable *variables,
                    const struct link_uniform_block_active *const b,
                    unsigned *block_index, unsigned *binding_offset,
-                    struct gl_context *ctx, struct gl_shader_program *prog)
+                    struct gl_context *ctx, struct gl_shader_program *prog,
+                    unsigned first_index)
 {
-   if (ub_array) {
-      for (unsigned j = 0; j < ub_array->num_array_elements; j++) {
-         size_t new_length = name_length;
+   for (unsigned j = 0; j < ub_array->num_array_elements; j++) {
+      size_t new_length = name_length;

-         /* Append the subscript to the current variable name */
-         ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]",
-                                      ub_array->array_elements[j]);
+      /* Append the subscript to the current variable name */
+      ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]",
+                                   ub_array->array_elements[j]);

+      if (ub_array->array) {
         process_block_array(ub_array->array, name, new_length, blocks,
                             parcel, variables, b, block_index,
-                             binding_offset, ctx, prog);
+                             binding_offset, ctx, prog, first_index);
+      } else {
+         process_block_array_leaf(name, blocks,
+                                  parcel, variables, b, block_index,
+                                  binding_offset, *block_index - first_index,
+                                  ctx, prog);
      }
-   } else {
-      unsigned i = *block_index;
-      const glsl_type *type =  b->type->without_array();
-
-      blocks[i].Name = ralloc_strdup(blocks, *name);
-      blocks[i].Uniforms = &variables[(*parcel).index];
-
-      /* The GL_ARB_shading_language_420pack spec says:
-       *
-       *     "If the binding identifier is used with a uniform block
-       *     instanced as an array then the first element of the array
-       *     takes the specified block binding and each subsequent
-       *     element takes the next consecutive uniform block binding
-       *     point."
-       */
-      blocks[i].Binding = (b->has_binding) ? b->binding + *binding_offset : 0;
-
-      blocks[i].UniformBufferSize = 0;
-      blocks[i]._Packing = gl_uniform_block_packing(type->interface_packing);
-
-      parcel->process(type, blocks[i].Name);
-
-      blocks[i].UniformBufferSize = parcel->buffer_size;
-
-      /* Check SSBO size is lower than maximum supported size for SSBO */
-      if (b->is_shader_storage &&
-          parcel->buffer_size > ctx->Const.MaxShaderStorageBlockSize) {
-         linker_error(prog, "shader storage block `%s' has size %d, "
-                      "which is larger than than the maximum allowed (%d)",
-                      b->type->name,
-                      parcel->buffer_size,
-                      ctx->Const.MaxShaderStorageBlockSize);
-      }
-      blocks[i].NumUniforms =
-         (unsigned)(ptrdiff_t)(&variables[parcel->index] - blocks[i].Uniforms);
-
-      *block_index = *block_index + 1;
-      *binding_offset = *binding_offset + 1;
   }
 }

+static void
+process_block_array_leaf(char **name,
+                         gl_uniform_block *blocks,
+                         ubo_visitor *parcel, gl_uniform_buffer_variable *variables,
+                         const struct link_uniform_block_active *const b,
+                         unsigned *block_index, unsigned *binding_offset,
+                         unsigned linearized_index,
+                         struct gl_context *ctx, struct gl_shader_program *prog)
+{
+   unsigned i = *block_index;
+   const glsl_type *type =  b->type->without_array();
+
+   blocks[i].Name = ralloc_strdup(blocks, *name);
+   blocks[i].Uniforms = &variables[(*parcel).index];
+
+   /* The GL_ARB_shading_language_420pack spec says:
+    *
+    *     "If the binding identifier is used with a uniform block instanced as
+    *     an array then the first element of the array takes the specified
+    *     block binding and each subsequent element takes the next consecutive
+    *     uniform block binding point."
+    */
+   blocks[i].Binding = (b->has_binding) ? b->binding + *binding_offset : 0;
+
+   blocks[i].UniformBufferSize = 0;
+   blocks[i]._Packing = gl_uniform_block_packing(type->interface_packing);
+   blocks[i].linearized_array_index = linearized_index;
+
+   parcel->process(type, blocks[i].Name);
+
+   blocks[i].UniformBufferSize = parcel->buffer_size;
+
+   /* Check SSBO size is lower than maximum supported size for SSBO */
+   if (b->is_shader_storage &&
+       parcel->buffer_size > ctx->Const.MaxShaderStorageBlockSize) {
+      linker_error(prog, "shader storage block `%s' has size %d, "
+                   "which is larger than than the maximum allowed (%d)",
+                   b->type->name,
+                   parcel->buffer_size,
+                   ctx->Const.MaxShaderStorageBlockSize);
+   }
+   blocks[i].NumUniforms =
+      (unsigned)(ptrdiff_t)(&variables[parcel->index] - blocks[i].Uniforms);
+
+   *block_index = *block_index + 1;
+   *binding_offset = *binding_offset + 1;
+}
+
 /* This function resizes the array types of the block so that later we can use
 * this new size to correctly calculate the offest for indirect indexing.
 */
@@ -351,7 +382,8 @@ create_buffer_blocks(void *mem_ctx, struct gl_context *ctx,

            assert(b->has_instance_name);
            process_block_array(b->array, &name, name_length, blocks, &parcel,
-                                variables, b, &i, &binding_offset, ctx, prog);
+                                variables, b, &i, &binding_offset, ctx, prog,
+                                i);
            ralloc_free(name);
         } else {
            blocks[i].Name = ralloc_strdup(blocks, block_type->name);
--- a/src/compiler/glsl/link_uniforms.cpp
+++ b/src/compiler/glsl/link_uniforms.cpp
@@ -28,6 +28,7 @@
 #include "glsl_symbol_table.h"
 #include "program.h"
 #include "util/string_to_uint_map.h"
+#include "ir_array_refcount.h"

 /**
 * \file link_uniforms.cpp
@@ -544,7 +545,7 @@ private:
            const char *str_end;
            while((str_start = strchr(name_copy, '[')) &&
                  (str_end = strchr(name_copy, ']'))) {
-               memmove(str_start, str_end + 1, 1 + strlen(str_end));
+               memmove(str_start, str_end + 1, 1 + strlen(str_end + 1));
            }

            unsigned index = 0;
@@ -633,6 +634,8 @@ private:
         uniform->opaque[shader_type].index = this->next_subroutine;
         uniform->opaque[shader_type].active = true;

+         prog->_LinkedShaders[shader_type]->NumSubroutineUniforms++;
+
         /* Increment the subroutine index by 1 for non-arrays and by the
          * number of array elements for arrays.
          */
@@ -880,6 +883,15 @@ public:
   unsigned shader_shadow_samplers;
 };

+static bool
+variable_is_referenced(ir_array_refcount_visitor &v, ir_variable *var)
+{
+   ir_array_refcount_entry *const entry = v.get_variable_entry(var);
+
+   return entry->is_referenced;
+
+}
+
 /**
 * Walks the IR and update the references to uniform blocks in the
 * ir_variables to point at linked shader's list (previously, they
@@ -887,8 +899,13 @@ public:
 * shaders).
 */
 static void
-link_update_uniform_buffer_variables(struct gl_linked_shader *shader)
+link_update_uniform_buffer_variables(struct gl_linked_shader *shader,
+                                     unsigned stage)
 {
+   ir_array_refcount_visitor v;
+
+   v.run(shader->ir);
+
   foreach_in_list(ir_instruction, node, shader->ir) {
      ir_variable *const var = node->as_variable();

@@ -898,7 +915,48 @@ link_update_uniform_buffer_variables(struct gl_linked_shader *shader)
      assert(var->data.mode == ir_var_uniform ||
             var->data.mode == ir_var_shader_storage);

+      unsigned num_blocks = var->data.mode == ir_var_uniform ?
+         shader->NumUniformBlocks : shader->NumShaderStorageBlocks;
+      struct gl_uniform_block **blks = var->data.mode == ir_var_uniform ?
+         shader->UniformBlocks : shader->ShaderStorageBlocks;
+
      if (var->is_interface_instance()) {
+         const ir_array_refcount_entry *const entry = v.get_variable_entry(var);
+
+         if (entry->is_referenced) {
+            /* Since this is an interface instance, the instance type will be
+             * same as the array-stripped variable type.  If the variable type
+             * is an array, then the block names will be suffixed with [0]
+             * through [n-1].  Unlike for non-interface instances, there will
+             * not be structure types here, so the only name sentinel that we
+             * have to worry about is [.
+             */
+            assert(var->type->without_array() == var->get_interface_type());
+            const char sentinel = var->type->is_array() ? '[' : '\0';
+
+            const ptrdiff_t len = strlen(var->get_interface_type()->name);
+            for (unsigned i = 0; i < num_blocks; i++) {
+               const char *const begin = blks[i]->Name;
+               const char *const end = strchr(begin, sentinel);
+
+               if (end == NULL)
+                  continue;
+
+               if (len != (end - begin))
+                  continue;
+
+               /* Even when a match is found, do not "break" here.  This could
+                * be an array of instances, and all elements of the array need
+                * to be marked as referenced.
+                */
+               if (strncmp(begin, var->get_interface_type()->name, len) == 0 &&
+                   (!var->type->is_array() ||
+                    entry->is_linearized_index_referenced(blks[i]->linearized_array_index))) {
+                  blks[i]->stageref |= 1U << stage;
+               }
+            }
+         }
+
         var->data.location = 0;
         continue;
      }
@@ -913,11 +971,6 @@ link_update_uniform_buffer_variables(struct gl_linked_shader *shader)
         sentinel = '[';
      }

-      unsigned num_blocks = var->data.mode == ir_var_uniform ?
-         shader->NumUniformBlocks : shader->NumShaderStorageBlocks;
-      struct gl_uniform_block **blks = var->data.mode == ir_var_uniform ?
-         shader->UniformBlocks : shader->ShaderStorageBlocks;
-
      const unsigned l = strlen(var->name);
      for (unsigned i = 0; i < num_blocks; i++) {
         for (unsigned j = 0; j < blks[i]->NumUniforms; j++) {
@@ -931,14 +984,17 @@ link_update_uniform_buffer_variables(struct gl_linked_shader *shader)
               if ((ptrdiff_t) l != (end - begin))
                  continue;

-               if (strncmp(var->name, begin, l) == 0) {
-                  found = true;
-                  var->data.location = j;
-                  break;
-               }
-            } else if (!strcmp(var->name, blks[i]->Uniforms[j].Name)) {
-               found = true;
+               found = strncmp(var->name, begin, l) == 0;
+            } else {
+               found = strcmp(var->name, blks[i]->Uniforms[j].Name) == 0;
+            }
+
+            if (found) {
               var->data.location = j;
+
+               if (variable_is_referenced(v, var))
+                  blks[i]->stageref |= 1U << stage;
+
               break;
            }
         }
@@ -1260,7 +1316,7 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
      memset(sh->SamplerUnits, 0, sizeof(sh->SamplerUnits));
      memset(sh->ImageUnits, 0, sizeof(sh->ImageUnits));

-      link_update_uniform_buffer_variables(sh);
+      link_update_uniform_buffer_variables(sh, i);

      /* Reset various per-shader target counts.
       */
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -181,7 +181,43 @@ private:
 };


-class array_resize_visitor : public ir_hierarchical_visitor {
+/**
+ * A visitor helper that provides methods for updating the types of
+ * ir_dereferences.  Classes that update variable types (say, updating
+ * array sizes) will want to use this so that dereference types stay in sync.
+ */
+class deref_type_updater : public ir_hierarchical_visitor {
+public:
+   virtual ir_visitor_status visit(ir_dereference_variable *ir)
+   {
+      ir->type = ir->var->type;
+      return visit_continue;
+   }
+
+   virtual ir_visitor_status visit_leave(ir_dereference_array *ir)
+   {
+      const glsl_type *const vt = ir->array->type;
+      if (vt->is_array())
+         ir->type = vt->fields.array;
+      return visit_continue;
+   }
+
+   virtual ir_visitor_status visit_leave(ir_dereference_record *ir)
+   {
+      for (unsigned i = 0; i < ir->record->type->length; i++) {
+         const struct glsl_struct_field *field =
+            &ir->record->type->fields.structure[i];
+         if (strcmp(field->name, ir->field) == 0) {
+            ir->type = field->type;
+            break;
+         }
+      }
+      return visit_continue;
+   }
+};
+
+
+class array_resize_visitor : public deref_type_updater {
 public:
   unsigned num_vertices;
   gl_shader_program *prog;
@@ -240,24 +276,6 @@ public:

      return visit_continue;
   }
-
-   /* Dereferences of input variables need to be updated so that their type
-    * matches the newly assigned type of the variable they are accessing. */
-   virtual ir_visitor_status visit(ir_dereference_variable *ir)
-   {
-      ir->type = ir->var->type;
-      return visit_continue;
-   }
-
-   /* Dereferences of 2D input arrays need to be updated so that their type
-    * matches the newly assigned type of the array they are accessing. */
-   virtual ir_visitor_status visit_leave(ir_dereference_array *ir)
-   {
-      const glsl_type *const vt = ir->array->type;
-      if (vt->is_array())
-         ir->type = vt->fields.array;
-      return visit_continue;
-   }
 };

 /**
@@ -1165,11 +1183,10 @@ interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog,
         if (stage_index != -1) {
            struct gl_linked_shader *sh = prog->_LinkedShaders[i];

-            blks[j].stageref |= (1 << i);
-
            struct gl_uniform_block **sh_blks = validate_ssbo ?
               sh->ShaderStorageBlocks : sh->UniformBlocks;

+            blks[j].stageref |= sh_blks[stage_index]->stageref;
            sh_blks[stage_index] = &blks[j];
         }
      }
@@ -1353,7 +1370,7 @@ move_non_declarations(exec_list *instructions, exec_node *last,
 * it inside that function leads to compiler warnings with some versions of
 * gcc.
 */
-class array_sizing_visitor : public ir_hierarchical_visitor {
+class array_sizing_visitor : public deref_type_updater {
 public:
   array_sizing_visitor()
      : mem_ctx(ralloc_context(NULL)),
@@ -2273,6 +2290,8 @@ update_array_sizes(struct gl_shader_program *prog)
         if (prog->_LinkedShaders[i] == NULL)
            continue;

+      bool types_were_updated = false;
+
      foreach_in_list(ir_instruction, node, prog->_LinkedShaders[i]->ir) {
         ir_variable *const var = node->as_variable();

@@ -2328,11 +2347,15 @@ update_array_sizes(struct gl_shader_program *prog)

            var->type = glsl_type::get_array_instance(var->type->fields.array,
                                                      size + 1);
-            /* FINISHME: We should update the types of array
-             * dereferences of this variable now.
-             */
+            types_were_updated = true;
         }
      }
+
+      /* Update the types of dereferences in case we changed any. */
+      if (types_were_updated) {
+         deref_type_updater v;
+         v.run(prog->_LinkedShaders[i]->ir);
+      }
   }
 }

@@ -3094,7 +3117,6 @@ link_calculate_subroutine_compat(struct gl_shader_program *prog)
         if (!uni)
            continue;

-         sh->NumSubroutineUniforms++;
         count = 0;
         if (sh->NumSubroutineFunctions == 0) {
            linker_error(prog, "subroutine uniform %s defined but no valid functions found\n", uni->type->name);
@@ -3574,6 +3596,7 @@ static gl_shader_variable *
 create_shader_variable(struct gl_shader_program *shProg,
                       const ir_variable *in,
                       const char *name, const glsl_type *type,
+                       const glsl_type *interface_type,
                       bool use_implicit_location, int location,
                       const glsl_type *outermost_struct_type)
 {
@@ -3631,7 +3654,7 @@ create_shader_variable(struct gl_shader_program *shProg,

   out->type = type;
   out->outermost_struct_type = outermost_struct_type;
-   out->interface_type = in->get_interface_type();
+   out->interface_type = interface_type;
   out->component = in->data.location_frac;
   out->index = in->data.index;
   out->patch = in->data.patch;
@@ -3643,8 +3666,21 @@ create_shader_variable(struct gl_shader_program *shProg,
   return out;
 }

+static const glsl_type *
+resize_to_max_patch_vertices(const struct gl_context *ctx,
+                             const glsl_type *type)
+{
+   if (!type)
+      return NULL;
+
+   return glsl_type::get_array_instance(type->fields.array,
+                                        ctx->Const.MaxPatchVertices);
+}
+
 static bool
-add_shader_variable(struct gl_shader_program *shProg, struct set *resource_set,
+add_shader_variable(const struct gl_context *ctx,
+                    struct gl_shader_program *shProg,
+                    struct set *resource_set,
                    unsigned stage_mask,
                    GLenum programInterface, ir_variable *var,
                    const char *name, const glsl_type *type,
@@ -3673,7 +3709,7 @@ add_shader_variable(struct gl_shader_program *shProg, struct set *resource_set,
      for (unsigned i = 0; i < type->length; i++) {
         const struct glsl_struct_field *field = &type->fields.structure[i];
         char *field_name = ralloc_asprintf(shProg, "%s.%s", name, field->name);
-         if (!add_shader_variable(shProg, resource_set,
+         if (!add_shader_variable(ctx, shProg, resource_set,
                                  stage_mask, programInterface,
                                  var, field_name, field->type,
                                  use_implicit_location, field_location,
@@ -3687,6 +3723,29 @@ add_shader_variable(struct gl_shader_program *shProg, struct set *resource_set,
   }

   default: {
+      const glsl_type *interface_type = var->get_interface_type();
+
+      /* Unsized (non-patch) TCS output/TES input arrays are implicitly
+       * sized to gl_MaxPatchVertices.  Internally, we shrink them to a
+       * smaller size.
+       *
+       * This can cause trouble with SSO programs.  Since the TCS declares
+       * the number of output vertices, we can always shrink TCS output
+       * arrays.  However, the TES might not be linked with a TCS, in
+       * which case it won't know the size of the patch.  In other words,
+       * the TCS and TES may disagree on the (smaller) array sizes.  This
+       * can result in the resource names differing across stages, causing
+       * SSO validation failures and other cascading issues.
+       *
+       * Expanding the array size to the full gl_MaxPatchVertices fixes
+       * these issues.  It's also what program interface queries expect,
+       * as that is the official size of the array.
+       */
+      if (var->data.tess_varying_implicit_sized_array) {
+         type = resize_to_max_patch_vertices(ctx, type);
+         interface_type = resize_to_max_patch_vertices(ctx, interface_type);
+      }
+
      /* Issue #16 of the ARB_program_interface_query spec says:
       *
       * "* If a variable is a member of an interface block without an
@@ -3699,8 +3758,7 @@ add_shader_variable(struct gl_shader_program *shProg, struct set *resource_set,
       */
      const char *prefixed_name = (var->data.from_named_ifc_block &&
                                   !is_gl_identifier(var->name))
-         ? ralloc_asprintf(shProg, "%s.%s", var->get_interface_type()->name,
-                           name)
+         ? ralloc_asprintf(shProg, "%s.%s", interface_type->name, name)
         : name;

      /* The ARB_program_interface_query spec says:
@@ -3711,6 +3769,7 @@ add_shader_variable(struct gl_shader_program *shProg, struct set *resource_set,
       */
      gl_shader_variable *sha_v =
         create_shader_variable(shProg, var, prefixed_name, type,
+                                interface_type,
                                use_implicit_location, location,
                                outermost_struct_type);
      if (!sha_v)
@@ -3723,7 +3782,8 @@ add_shader_variable(struct gl_shader_program *shProg, struct set *resource_set,
 }

 static bool
-add_interface_variables(struct gl_shader_program *shProg,
+add_interface_variables(const struct gl_context *ctx,
+                        struct gl_shader_program *shProg,
                        struct set *resource_set,
                        unsigned stage, GLenum programInterface)
 {
@@ -3774,7 +3834,7 @@ add_interface_variables(struct gl_shader_program *shProg,
         (stage == MESA_SHADER_VERTEX && var->data.mode == ir_var_shader_in) ||
         (stage == MESA_SHADER_FRAGMENT && var->data.mode == ir_var_shader_out);

-      if (!add_shader_variable(shProg, resource_set,
+      if (!add_shader_variable(ctx, shProg, resource_set,
                               1 << stage, programInterface,
                               var, var->name, var->type, vs_input_or_fs_output,
                               var->data.location - loc_bias))
@@ -3784,7 +3844,9 @@ add_interface_variables(struct gl_shader_program *shProg,
 }

 static bool
-add_packed_varyings(struct gl_shader_program *shProg, struct set *resource_set,
+add_packed_varyings(const struct gl_context *ctx,
+                    struct gl_shader_program *shProg,
+                    struct set *resource_set,
                    int stage, GLenum type)
 {
   struct gl_linked_shader *sh = shProg->_LinkedShaders[stage];
@@ -3810,7 +3872,7 @@ add_packed_varyings(struct gl_shader_program *shProg, struct set *resource_set,
         if (type == iface) {
            const int stage_mask =
               build_stageref(shProg, var->name, var->data.mode);
-            if (!add_shader_variable(shProg, resource_set,
+            if (!add_shader_variable(ctx, shProg, resource_set,
                                     stage_mask,
                                     iface, var, var->name, var->type, false,
                                     var->data.location - VARYING_SLOT_VAR0))
@@ -3822,7 +3884,9 @@ add_packed_varyings(struct gl_shader_program *shProg, struct set *resource_set,
 }

 static bool
-add_fragdata_arrays(struct gl_shader_program *shProg, struct set *resource_set)
+add_fragdata_arrays(const struct gl_context *ctx,
+                    struct gl_shader_program *shProg,
+                    struct set *resource_set)
 {
   struct gl_linked_shader *sh = shProg->_LinkedShaders[MESA_SHADER_FRAGMENT];

@@ -3834,7 +3898,7 @@ add_fragdata_arrays(struct gl_shader_program *shProg, struct set *resource_set)
      if (var) {
         assert(var->data.mode == ir_var_shader_out);

-         if (!add_shader_variable(shProg, resource_set,
+         if (!add_shader_variable(ctx, shProg, resource_set,
                                  1 << MESA_SHADER_FRAGMENT,
                                  GL_PROGRAM_OUTPUT, var, var->name, var->type,
                                  true, var->data.location - FRAG_RESULT_DATA0))
@@ -4093,24 +4157,24 @@ build_program_resource_list(struct gl_context *ctx,

   /* Program interface needs to expose varyings in case of SSO. */
   if (shProg->SeparateShader) {
-      if (!add_packed_varyings(shProg, resource_set,
+      if (!add_packed_varyings(ctx, shProg, resource_set,
                               input_stage, GL_PROGRAM_INPUT))
         return;

-      if (!add_packed_varyings(shProg, resource_set,
+      if (!add_packed_varyings(ctx, shProg, resource_set,
                               output_stage, GL_PROGRAM_OUTPUT))
         return;
   }

-   if (!add_fragdata_arrays(shProg, resource_set))
+   if (!add_fragdata_arrays(ctx, shProg, resource_set))
      return;

   /* Add inputs and outputs to the resource list. */
-   if (!add_interface_variables(shProg, resource_set,
+   if (!add_interface_variables(ctx, shProg, resource_set,
                                input_stage, GL_PROGRAM_INPUT))
      return;

-   if (!add_interface_variables(shProg, resource_set,
+   if (!add_interface_variables(ctx, shProg, resource_set,
                                output_stage, GL_PROGRAM_OUTPUT))
      return;

@@ -4743,14 +4807,6 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
                   "type of shader\n");
   }

-   for (unsigned int i = 0; i < MESA_SHADER_STAGES; i++) {
-      if (prog->_LinkedShaders[i] != NULL) {
-         _mesa_delete_linked_shader(ctx, prog->_LinkedShaders[i]);
-      }
-
-      prog->_LinkedShaders[i] = NULL;
-   }
-
   /* Link all shaders for a particular stage and validate the result.
    */
   for (int stage = 0; stage < MESA_SHADER_STAGES; stage++) {
--- a/src/compiler/glsl/lower_blend_equation_advanced.cpp
+++ b/src/compiler/glsl/lower_blend_equation_advanced.cpp
@@ -308,12 +308,18 @@ calc_blend_result(ir_factory f,
   f.emit(assign(dst_alpha, swizzle_w(fb)));
   f.emit(if_tree(equal(dst_alpha, imm1(0)),
                     assign(dst_rgb, imm3(0)),
-                     assign(dst_rgb, div(swizzle_xyz(fb), dst_alpha))));
+                     assign(dst_rgb, csel(equal(swizzle_xyz(fb),
+                                                swizzle(fb, SWIZZLE_WWWW, 3)),
+                                          imm3(1),
+                                          div(swizzle_xyz(fb), dst_alpha)))));

   f.emit(assign(src_alpha, swizzle_w(src)));
   f.emit(if_tree(equal(src_alpha, imm1(0)),
                     assign(src_rgb, imm3(0)),
-                     assign(src_rgb, div(swizzle_xyz(src), src_alpha))));
+                     assign(src_rgb, csel(equal(swizzle_xyz(src),
+                                                swizzle(src, SWIZZLE_WWWW, 3)),
+                                          imm3(1),
+                                          div(swizzle_xyz(src), src_alpha)))));

   ir_variable *factor = f.make_temp(glsl_type::vec3_type, "__blend_factor");

--- a/src/compiler/glsl/lower_instructions.cpp
+++ b/src/compiler/glsl/lower_instructions.cpp
@@ -54,8 +54,8 @@
 * want to recognize add(op0, neg(op1)) or the other way around to
 * produce a subtract anyway.
 *
- * DIV_TO_MUL_RCP and INT_DIV_TO_MUL_RCP:
- * --------------------------------------
+ * FDIV_TO_MUL_RCP, DDIV_TO_MUL_RCP, and INT_DIV_TO_MUL_RCP:
+ * ---------------------------------------------------------
 * Breaks an ir_binop_div expression down to op0 * (rcp(op1)).
 *
 * Many GPUs don't have a divide instruction (945 and 965 included),
@@ -63,9 +63,11 @@
 * reciprocal.  By breaking the operation down, constant reciprocals
 * can get constant folded.
 *
- * DIV_TO_MUL_RCP only lowers floating point division; INT_DIV_TO_MUL_RCP
- * handles the integer case, converting to and from floating point so that
- * RCP is possible.
+ * FDIV_TO_MUL_RCP only lowers single-precision floating point division;
+ * DDIV_TO_MUL_RCP only lowers double-precision floating point division.
+ * DIV_TO_MUL_RCP is a convenience macro that sets both flags.
+ * INT_DIV_TO_MUL_RCP handles the integer case, converting to and from floating
+ * point so that RCP is possible.
 *
 * EXP_TO_EXP2 and LOG_TO_LOG2:
 * ----------------------------
@@ -326,7 +328,8 @@ lower_instructions_visitor::mod_to_floor(ir_expression *ir)
   /* Don't generate new IR that would need to be lowered in an additional
    * pass.
    */
-   if (lowering(DIV_TO_MUL_RCP) && (ir->type->is_float() || ir->type->is_double()))
+   if ((lowering(FDIV_TO_MUL_RCP) && ir->type->is_float()) ||
+       (lowering(DDIV_TO_MUL_RCP) && ir->type->is_double()))
      div_to_mul_rcp(div_expr);

   ir_expression *const floor_expr =
@@ -1588,8 +1591,8 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
   case ir_binop_div:
      if (ir->operands[1]->type->is_integer() && lowering(INT_DIV_TO_MUL_RCP))
 	 int_div_to_mul_rcp(ir);
-      else if ((ir->operands[1]->type->is_float() ||
-                ir->operands[1]->type->is_double()) && lowering(DIV_TO_MUL_RCP))
+      else if ((ir->operands[1]->type->is_float() && lowering(FDIV_TO_MUL_RCP)) ||
+               (ir->operands[1]->type->is_double() && lowering(DDIV_TO_MUL_RCP)))
 	 div_to_mul_rcp(ir);
      break;

--- a/src/compiler/glsl/lower_named_interface_blocks.cpp
+++ b/src/compiler/glsl/lower_named_interface_blocks.cpp
@@ -193,6 +193,8 @@ flatten_named_interface_blocks_declarations::run(exec_list *instructions)
            new_var->data.patch = iface_t->fields.structure[i].patch;
            new_var->data.stream = var->data.stream;
            new_var->data.how_declared = var->data.how_declared;
+            new_var->data.tess_varying_implicit_sized_array =
+               var->data.tess_varying_implicit_sized_array;
            new_var->data.from_named_ifc_block = 1;

            new_var->init_interface_type(var->type);
--- a/src/compiler/glsl/lower_output_reads.cpp
+++ b/src/compiler/glsl/lower_output_reads.cpp
@@ -157,7 +157,6 @@ ir_visitor_status
 output_read_remover::visit_leave(ir_emit_vertex *ir)
 {
   hash_table_call_foreach(replacements, emit_return_copy, ir);
-   _mesa_hash_table_clear(replacements, NULL);
   return visit_continue;
 }

--- a/src/compiler/glsl/lower_ubo_reference.cpp
+++ b/src/compiler/glsl/lower_ubo_reference.cpp
@@ -107,7 +107,6 @@ public:

   struct gl_linked_shader *shader;
   bool clamp_block_indices;
-   struct gl_uniform_buffer_variable *ubo_var;
   const struct glsl_struct_field *struct_field;
   ir_variable *variable;
   ir_rvalue *uniform_block;
@@ -308,8 +307,11 @@ lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx,
            this->uniform_block = index;
         }

-         this->ubo_var = var->is_interface_instance()
-            ? &blocks[i]->Uniforms[0] : &blocks[i]->Uniforms[var->data.location];
+         if (var->is_interface_instance()) {
+            *const_offset = 0;
+         } else {
+            *const_offset = blocks[i]->Uniforms[var->data.location].Offset;
+         }

         break;
      }
@@ -317,8 +319,6 @@ lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx,

   assert(this->uniform_block);

-   *const_offset = ubo_var->Offset;
-
   this->struct_field = NULL;
   setup_buffer_access(mem_ctx, deref, offset, const_offset, row_major,
                       matrix_columns, &this->struct_field, packing);
--- a/src/compiler/glsl/opt_function_inlining.cpp
+++ b/src/compiler/glsl/opt_function_inlining.cpp
@@ -128,7 +128,7 @@ ir_call::generate_inline(ir_instruction *next_ir)
 	 parameters[i] = NULL;
      } else {
 	 parameters[i] = sig_param->clone(ctx, ht);
-	 parameters[i]->data.mode = ir_var_auto;
+	 parameters[i]->data.mode = ir_var_temporary;

 	 /* Remove the read-only decoration because we're going to write
 	  * directly to this variable.  If the cloned variable is left
--- a/src/compiler/glsl/opt_minmax.cpp
+++ b/src/compiler/glsl/opt_minmax.cpp
@@ -355,7 +355,7 @@ ir_minmax_visitor::prune_expression(ir_expression *expr, minmax_range baserange)
          */
         if (!is_redundant && limits[i].low && baserange.high) {
            cr = compare_components(limits[i].low, baserange.high);
-            if (cr >= EQUAL && cr != MIXED)
+            if (cr > EQUAL && cr != MIXED)
               is_redundant = true;
         }
      } else {
@@ -373,7 +373,7 @@ ir_minmax_visitor::prune_expression(ir_expression *expr, minmax_range baserange)
          */
         if (!is_redundant && limits[i].high && baserange.low) {
            cr = compare_components(limits[i].high, baserange.low);
-            if (cr <= EQUAL)
+            if (cr < EQUAL)
               is_redundant = true;
         }
      }
--- a/src/compiler/glsl/standalone.cpp
+++ b/src/compiler/glsl/standalone.cpp
@@ -421,7 +421,7 @@ standalone_compile_shader(const struct standalone_options *_options,
   }

   if ((status == EXIT_SUCCESS) && options->do_link)  {
-      _mesa_clear_shader_program_data(whole_program);
+      _mesa_clear_shader_program_data(ctx, whole_program);

      link_shaders(ctx, whole_program);
      status = (whole_program->LinkStatus) ? EXIT_SUCCESS : EXIT_FAILURE;
--- a/src/compiler/glsl/standalone_scaffolding.cpp
+++ b/src/compiler/glsl/standalone_scaffolding.cpp
@@ -123,8 +123,16 @@ _mesa_delete_linked_shader(struct gl_context *ctx,
 }

 void
-_mesa_clear_shader_program_data(struct gl_shader_program *shProg)
+_mesa_clear_shader_program_data(struct gl_context *ctx,
+                                struct gl_shader_program *shProg)
 {
+   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+      if (shProg->_LinkedShaders[i] != NULL) {
+         _mesa_delete_linked_shader(ctx, shProg->_LinkedShaders[i]);
+         shProg->_LinkedShaders[i] = NULL;
+      }
+   }
+
   shProg->NumUniformStorage = 0;
   shProg->UniformStorage = NULL;
   shProg->NumUniformRemapTable = 0;
--- a/src/compiler/glsl/standalone_scaffolding.h
+++ b/src/compiler/glsl/standalone_scaffolding.h
@@ -56,7 +56,8 @@ _mesa_delete_linked_shader(struct gl_context *ctx,
                           struct gl_linked_shader *sh);

 extern "C" void
-_mesa_clear_shader_program_data(struct gl_shader_program *);
+_mesa_clear_shader_program_data(struct gl_context *ctx,
+                                struct gl_shader_program *);

 extern "C" void
 _mesa_shader_debug(struct gl_context *ctx, GLenum type, GLuint *id,
--- a/src/compiler/glsl/tests/array_refcount_test.cpp
+++ b/src/compiler/glsl/tests/array_refcount_test.cpp
@@ -0,0 +1,717 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <gtest/gtest.h>
+#include "ir.h"
+#include "ir_array_refcount.h"
+#include "ir_builder.h"
+#include "util/hash_table.h"
+
+using namespace ir_builder;
+
+class array_refcount_test : public ::testing::Test {
+public:
+   virtual void SetUp();
+   virtual void TearDown();
+
+   exec_list instructions;
+   ir_factory *body;
+   void *mem_ctx;
+
+   /**
+    * glsl_type for a vec4[3][4][5].
+    *
+    * The exceptionally verbose name is picked because it matches the syntax
+    * of http://cdecl.org/.
+    */
+   const glsl_type *array_3_of_array_4_of_array_5_of_vec4;
+
+   /**
+    * glsl_type for a int[3].
+    *
+    * The exceptionally verbose name is picked because it matches the syntax
+    * of http://cdecl.org/.
+    */
+   const glsl_type *array_3_of_int;
+
+   /**
+    * Wrapper to access private member "bits" of ir_array_refcount_entry
+    *
+    * The test class is a friend to ir_array_refcount_entry, but the
+    * individual tests are not part of the class.  Since the friendliness of
+    * the test class does not extend to the tests, provide a wrapper.
+    */
+   const BITSET_WORD *get_bits(const ir_array_refcount_entry &entry)
+   {
+      return entry.bits;
+   }
+
+   /**
+    * Wrapper to access private member "num_bits" of ir_array_refcount_entry
+    *
+    * The test class is a friend to ir_array_refcount_entry, but the
+    * individual tests are not part of the class.  Since the friendliness of
+    * the test class does not extend to the tests, provide a wrapper.
+    */
+   unsigned get_num_bits(const ir_array_refcount_entry &entry)
+   {
+      return entry.num_bits;
+   }
+
+   /**
+    * Wrapper to access private member "array_depth" of ir_array_refcount_entry
+    *
+    * The test class is a friend to ir_array_refcount_entry, but the
+    * individual tests are not part of the class.  Since the friendliness of
+    * the test class does not extend to the tests, provide a wrapper.
+    */
+   unsigned get_array_depth(const ir_array_refcount_entry &entry)
+   {
+      return entry.array_depth;
+   }
+};
+
+void
+array_refcount_test::SetUp()
+{
+   mem_ctx = ralloc_context(NULL);
+
+   instructions.make_empty();
+   body = new ir_factory(&instructions, mem_ctx);
+
+   /* The type of vec4 x[3][4][5]; */
+   const glsl_type *const array_5_of_vec4 =
+      glsl_type::get_array_instance(glsl_type::vec4_type, 5);
+   const glsl_type *const array_4_of_array_5_of_vec4 =
+      glsl_type::get_array_instance(array_5_of_vec4, 4);
+   array_3_of_array_4_of_array_5_of_vec4 =
+      glsl_type::get_array_instance(array_4_of_array_5_of_vec4, 3);
+
+   array_3_of_int = glsl_type::get_array_instance(glsl_type::int_type, 3);
+}
+
+void
+array_refcount_test::TearDown()
+{
+   delete body;
+   body = NULL;
+
+   ralloc_free(mem_ctx);
+   mem_ctx = NULL;
+}
+
+static operand
+deref_array(operand array, operand index)
+{
+   void *mem_ctx = ralloc_parent(array.val);
+
+   ir_rvalue *val = new(mem_ctx) ir_dereference_array(array.val, index.val);
+
+   return operand(val);
+}
+
+static operand
+deref_struct(operand s, const char *field)
+{
+   void *mem_ctx = ralloc_parent(s.val);
+
+   ir_rvalue *val = new(mem_ctx) ir_dereference_record(s.val, field);
+
+   return operand(val);
+}
+
+/**
+ * Verify that only the specified set of ir_variables exists in the hash table
+ */
+static void
+validate_variables_in_hash_table(struct hash_table *ht,
+                                 unsigned count,
+                                 ...)
+{
+   ir_variable **vars = new ir_variable *[count];
+   va_list args;
+
+   /* Make a copy of the list of expected ir_variables.  The copied list can
+    * be modified during the checking.
+    */
+   va_start(args, count);
+
+   for (unsigned i = 0; i < count; i++)
+      vars[i] = va_arg(args, ir_variable *);
+
+   va_end(args);
+
+   struct hash_entry *entry;
+   hash_table_foreach(ht, entry) {
+      const ir_instruction *const ir = (ir_instruction *) entry->key;
+      const ir_variable *const v = ir->as_variable();
+
+      if (v == NULL) {
+         ADD_FAILURE() << "Invalid junk in hash table: ir_type = "
+                       << ir->ir_type << ", address = "
+                       << (void *) ir;
+         continue;
+      }
+
+      unsigned i;
+      for (i = 0; i < count; i++) {
+         if (vars[i] == NULL)
+            continue;
+
+         if (vars[i] == v)
+            break;
+      }
+
+      if (i == count) {
+            ADD_FAILURE() << "Invalid variable in hash table: \""
+                          << v->name << "\"";
+      } else {
+         /* As each variable is encountered, remove it from the set.  Don't
+          * bother compacting the set because we don't care about
+          * performance here.
+          */
+         vars[i] = NULL;
+      }
+   }
+
+   /* Check that there's nothing left in the set. */
+   for (unsigned i = 0; i < count; i++) {
+      if (vars[i] != NULL) {
+         ADD_FAILURE() << "Variable was not in the hash table: \""
+                          << vars[i]->name << "\"";
+      }
+   }
+
+   delete [] vars;
+}
+
+TEST_F(array_refcount_test, ir_array_refcount_entry_initial_state_for_scalar)
+{
+   ir_variable *const var =
+      new(mem_ctx) ir_variable(glsl_type::int_type, "a", ir_var_auto);
+
+   ir_array_refcount_entry entry(var);
+
+   ASSERT_NE((void *)0, get_bits(entry));
+   EXPECT_FALSE(entry.is_referenced);
+   EXPECT_EQ(1, get_num_bits(entry));
+   EXPECT_EQ(0, get_array_depth(entry));
+   EXPECT_FALSE(entry.is_linearized_index_referenced(0));
+}
+
+TEST_F(array_refcount_test, ir_array_refcount_entry_initial_state_for_vector)
+{
+   ir_variable *const var =
+      new(mem_ctx) ir_variable(glsl_type::vec4_type, "a", ir_var_auto);
+
+   ir_array_refcount_entry entry(var);
+
+   ASSERT_NE((void *)0, get_bits(entry));
+   EXPECT_FALSE(entry.is_referenced);
+   EXPECT_EQ(1, get_num_bits(entry));
+   EXPECT_EQ(0, get_array_depth(entry));
+   EXPECT_FALSE(entry.is_linearized_index_referenced(0));
+}
+
+TEST_F(array_refcount_test, ir_array_refcount_entry_initial_state_for_matrix)
+{
+   ir_variable *const var =
+      new(mem_ctx) ir_variable(glsl_type::mat4_type, "a", ir_var_auto);
+
+   ir_array_refcount_entry entry(var);
+
+   ASSERT_NE((void *)0, get_bits(entry));
+   EXPECT_FALSE(entry.is_referenced);
+   EXPECT_EQ(1, get_num_bits(entry));
+   EXPECT_EQ(0, get_array_depth(entry));
+   EXPECT_FALSE(entry.is_linearized_index_referenced(0));
+}
+
+TEST_F(array_refcount_test, ir_array_refcount_entry_initial_state_for_array)
+{
+   ir_variable *const var =
+      new(mem_ctx) ir_variable(array_3_of_array_4_of_array_5_of_vec4,
+                               "a",
+                               ir_var_auto);
+   const unsigned total_elements = var->type->arrays_of_arrays_size();
+
+   ir_array_refcount_entry entry(var);
+
+   ASSERT_NE((void *)0, get_bits(entry));
+   EXPECT_FALSE(entry.is_referenced);
+   EXPECT_EQ(total_elements, get_num_bits(entry));
+   EXPECT_EQ(3, get_array_depth(entry));
+
+   for (unsigned i = 0; i < total_elements; i++)
+      EXPECT_FALSE(entry.is_linearized_index_referenced(i)) << "index = " << i;
+}
+
+TEST_F(array_refcount_test, mark_array_elements_referenced_simple)
+{
+   ir_variable *const var =
+      new(mem_ctx) ir_variable(array_3_of_array_4_of_array_5_of_vec4,
+                               "a",
+                               ir_var_auto);
+   const unsigned total_elements = var->type->arrays_of_arrays_size();
+
+   ir_array_refcount_entry entry(var);
+
+   static const array_deref_range dr[] = {
+      { 0, 5 }, { 1, 4 }, { 2, 3 }
+   };
+   const unsigned accessed_element = 0 + (1 * 5) + (2 * 4 * 5);
+
+   entry.mark_array_elements_referenced(dr, 3);
+
+   for (unsigned i = 0; i < total_elements; i++)
+      EXPECT_EQ(i == accessed_element, entry.is_linearized_index_referenced(i));
+}
+
+TEST_F(array_refcount_test, mark_array_elements_referenced_whole_first_array)
+{
+   ir_variable *const var =
+      new(mem_ctx) ir_variable(array_3_of_array_4_of_array_5_of_vec4,
+                               "a",
+                               ir_var_auto);
+
+   ir_array_refcount_entry entry(var);
+
+   static const array_deref_range dr[] = {
+      { 0, 5 }, { 1, 4 }, { 3, 3 }
+   };
+
+   entry.mark_array_elements_referenced(dr, 3);
+
+   for (unsigned i = 0; i < 3; i++) {
+      for (unsigned j = 0; j < 4; j++) {
+         for (unsigned k = 0; k < 5; k++) {
+            const bool accessed = (j == 1) && (k == 0);
+            const unsigned linearized_index = k + (j * 5) + (i * 4 * 5);
+
+            EXPECT_EQ(accessed,
+                      entry.is_linearized_index_referenced(linearized_index));
+         }
+      }
+   }
+}
+
+TEST_F(array_refcount_test, mark_array_elements_referenced_whole_second_array)
+{
+   ir_variable *const var =
+      new(mem_ctx) ir_variable(array_3_of_array_4_of_array_5_of_vec4,
+                               "a",
+                               ir_var_auto);
+
+   ir_array_refcount_entry entry(var);
+
+   static const array_deref_range dr[] = {
+      { 0, 5 }, { 4, 4 }, { 1, 3 }
+   };
+
+   entry.mark_array_elements_referenced(dr, 3);
+
+   for (unsigned i = 0; i < 3; i++) {
+      for (unsigned j = 0; j < 4; j++) {
+         for (unsigned k = 0; k < 5; k++) {
+            const bool accessed = (i == 1) && (k == 0);
+            const unsigned linearized_index = k + (j * 5) + (i * 4 * 5);
+
+            EXPECT_EQ(accessed,
+                      entry.is_linearized_index_referenced(linearized_index));
+         }
+      }
+   }
+}
+
+TEST_F(array_refcount_test, mark_array_elements_referenced_whole_third_array)
+{
+   ir_variable *const var =
+      new(mem_ctx) ir_variable(array_3_of_array_4_of_array_5_of_vec4,
+                               "a",
+                               ir_var_auto);
+
+   ir_array_refcount_entry entry(var);
+
+   static const array_deref_range dr[] = {
+      { 5, 5 }, { 2, 4 }, { 1, 3 }
+   };
+
+   entry.mark_array_elements_referenced(dr, 3);
+
+   for (unsigned i = 0; i < 3; i++) {
+      for (unsigned j = 0; j < 4; j++) {
+         for (unsigned k = 0; k < 5; k++) {
+            const bool accessed = (i == 1) && (j == 2);
+            const unsigned linearized_index = k + (j * 5) + (i * 4 * 5);
+
+            EXPECT_EQ(accessed,
+                      entry.is_linearized_index_referenced(linearized_index));
+         }
+      }
+   }
+}
+
+TEST_F(array_refcount_test, mark_array_elements_referenced_whole_first_and_third_arrays)
+{
+   ir_variable *const var =
+      new(mem_ctx) ir_variable(array_3_of_array_4_of_array_5_of_vec4,
+                               "a",
+                               ir_var_auto);
+
+   ir_array_refcount_entry entry(var);
+
+   static const array_deref_range dr[] = {
+      { 5, 5 }, { 3, 4 }, { 3, 3 }
+   };
+
+   entry.mark_array_elements_referenced(dr, 3);
+
+   for (unsigned i = 0; i < 3; i++) {
+      for (unsigned j = 0; j < 4; j++) {
+         for (unsigned k = 0; k < 5; k++) {
+            const bool accessed = (j == 3);
+            const unsigned linearized_index = k + (j * 5) + (i * 4 * 5);
+
+            EXPECT_EQ(accessed,
+                      entry.is_linearized_index_referenced(linearized_index));
+         }
+      }
+   }
+}
+
+TEST_F(array_refcount_test, do_not_process_vector_indexing)
+{
+   /* Vectors and matrices can also be indexed in much the same manner as
+    * arrays.  The visitor should not try to track per-element accesses to
+    * these types.
+    */
+   ir_variable *var_a = new(mem_ctx) ir_variable(glsl_type::float_type,
+                                                 "a",
+                                                 ir_var_auto);
+   ir_variable *var_b = new(mem_ctx) ir_variable(glsl_type::int_type,
+                                                 "b",
+                                                 ir_var_auto);
+   ir_variable *var_c = new(mem_ctx) ir_variable(glsl_type::vec4_type,
+                                                 "c",
+                                                 ir_var_auto);
+
+   body->emit(assign(var_a, deref_array(var_c, var_b)));
+
+   ir_array_refcount_visitor v;
+
+   visit_list_elements(&v, &instructions);
+
+   ir_array_refcount_entry *entry_a = v.get_variable_entry(var_a);
+   ir_array_refcount_entry *entry_b = v.get_variable_entry(var_b);
+   ir_array_refcount_entry *entry_c = v.get_variable_entry(var_c);
+
+   EXPECT_TRUE(entry_a->is_referenced);
+   EXPECT_TRUE(entry_b->is_referenced);
+   EXPECT_TRUE(entry_c->is_referenced);
+
+   /* As validated by previous tests, for non-array types, num_bits is 1. */
+   ASSERT_EQ(1, get_num_bits(*entry_c));
+   EXPECT_FALSE(entry_c->is_linearized_index_referenced(0));
+}
+
+TEST_F(array_refcount_test, do_not_process_matrix_indexing)
+{
+   /* Vectors and matrices can also be indexed in much the same manner as
+    * arrays.  The visitor should not try to track per-element accesses to
+    * these types.
+    */
+   ir_variable *var_a = new(mem_ctx) ir_variable(glsl_type::vec4_type,
+                                                 "a",
+                                                 ir_var_auto);
+   ir_variable *var_b = new(mem_ctx) ir_variable(glsl_type::int_type,
+                                                 "b",
+                                                 ir_var_auto);
+   ir_variable *var_c = new(mem_ctx) ir_variable(glsl_type::mat4_type,
+                                                 "c",
+                                                 ir_var_auto);
+
+   body->emit(assign(var_a, deref_array(var_c, var_b)));
+
+   ir_array_refcount_visitor v;
+
+   visit_list_elements(&v, &instructions);
+
+   ir_array_refcount_entry *entry_a = v.get_variable_entry(var_a);
+   ir_array_refcount_entry *entry_b = v.get_variable_entry(var_b);
+   ir_array_refcount_entry *entry_c = v.get_variable_entry(var_c);
+
+   EXPECT_TRUE(entry_a->is_referenced);
+   EXPECT_TRUE(entry_b->is_referenced);
+   EXPECT_TRUE(entry_c->is_referenced);
+
+   /* As validated by previous tests, for non-array types, num_bits is 1. */
+   ASSERT_EQ(1, get_num_bits(*entry_c));
+   EXPECT_FALSE(entry_c->is_linearized_index_referenced(0));
+}
+
+TEST_F(array_refcount_test, do_not_process_array_inside_structure)
+{
+   /* Structures can contain arrays.  The visitor should not try to track
+    * per-element accesses to arrays contained inside structures.
+    */
+   const glsl_struct_field fields[] = {
+      glsl_struct_field(array_3_of_int, "i"),
+   };
+
+   const glsl_type *const record_of_array_3_of_int =
+      glsl_type::get_record_instance(fields, ARRAY_SIZE(fields), "S");
+
+   ir_variable *var_a = new(mem_ctx) ir_variable(glsl_type::int_type,
+                                                 "a",
+                                                 ir_var_auto);
+
+   ir_variable *var_b = new(mem_ctx) ir_variable(record_of_array_3_of_int,
+                                                 "b",
+                                                 ir_var_auto);
+
+   /* a = b.i[2] */
+   body->emit(assign(var_a,
+                     deref_array(
+                        deref_struct(var_b, "i"),
+                        body->constant(int(2)))));
+
+   ir_array_refcount_visitor v;
+
+   visit_list_elements(&v, &instructions);
+
+   ir_array_refcount_entry *entry_a = v.get_variable_entry(var_a);
+   ir_array_refcount_entry *entry_b = v.get_variable_entry(var_b);
+
+   EXPECT_TRUE(entry_a->is_referenced);
+   EXPECT_TRUE(entry_b->is_referenced);
+
+   ASSERT_EQ(1, get_num_bits(*entry_b));
+   EXPECT_FALSE(entry_b->is_linearized_index_referenced(0));
+
+   validate_variables_in_hash_table(v.ht, 2, var_a, var_b);
+}
+
+TEST_F(array_refcount_test, visit_simple_indexing)
+{
+   ir_variable *var_a = new(mem_ctx) ir_variable(glsl_type::vec4_type,
+                                                 "a",
+                                                 ir_var_auto);
+   ir_variable *var_b = new(mem_ctx) ir_variable(array_3_of_array_4_of_array_5_of_vec4,
+                                                 "b",
+                                                 ir_var_auto);
+
+   /* a = b[2][1][0] */
+   body->emit(assign(var_a,
+                     deref_array(
+                        deref_array(
+                           deref_array(var_b, body->constant(int(2))),
+                           body->constant(int(1))),
+                        body->constant(int(0)))));
+
+   ir_array_refcount_visitor v;
+
+   visit_list_elements(&v, &instructions);
+
+   const unsigned accessed_element = 0 + (1 * 5) + (2 * 4 * 5);
+   ir_array_refcount_entry *entry_b = v.get_variable_entry(var_b);
+   const unsigned total_elements = var_b->type->arrays_of_arrays_size();
+
+   for (unsigned i = 0; i < total_elements; i++)
+      EXPECT_EQ(i == accessed_element, entry_b->is_linearized_index_referenced(i)) <<
+         "i = " << i;
+
+   validate_variables_in_hash_table(v.ht, 2, var_a, var_b);
+}
+
+TEST_F(array_refcount_test, visit_whole_second_array_indexing)
+{
+   ir_variable *var_a = new(mem_ctx) ir_variable(glsl_type::vec4_type,
+                                                 "a",
+                                                 ir_var_auto);
+   ir_variable *var_b = new(mem_ctx) ir_variable(array_3_of_array_4_of_array_5_of_vec4,
+                                                 "b",
+                                                 ir_var_auto);
+   ir_variable *var_i = new(mem_ctx) ir_variable(glsl_type::int_type,
+                                                 "i",
+                                                 ir_var_auto);
+
+   /* a = b[2][i][1] */
+   body->emit(assign(var_a,
+                     deref_array(
+                        deref_array(
+                           deref_array(var_b, body->constant(int(2))),
+                           var_i),
+                        body->constant(int(1)))));
+
+   ir_array_refcount_visitor v;
+
+   visit_list_elements(&v, &instructions);
+
+   ir_array_refcount_entry *const entry_b = v.get_variable_entry(var_b);
+   for (unsigned i = 0; i < 3; i++) {
+      for (unsigned j = 0; j < 4; j++) {
+         for (unsigned k = 0; k < 5; k++) {
+            const bool accessed = (i == 2) && (k == 1);
+            const unsigned linearized_index = k + (j * 5) + (i * 4 * 5);
+
+            EXPECT_EQ(accessed,
+                      entry_b->is_linearized_index_referenced(linearized_index)) <<
+               "i = " << i;
+         }
+      }
+   }
+
+   validate_variables_in_hash_table(v.ht, 3, var_a, var_b, var_i);
+}
+
+TEST_F(array_refcount_test, visit_array_indexing_an_array)
+{
+   ir_variable *var_a = new(mem_ctx) ir_variable(glsl_type::vec4_type,
+                                                 "a",
+                                                 ir_var_auto);
+   ir_variable *var_b = new(mem_ctx) ir_variable(array_3_of_array_4_of_array_5_of_vec4,
+                                                 "b",
+                                                 ir_var_auto);
+   ir_variable *var_c = new(mem_ctx) ir_variable(array_3_of_int,
+                                                 "c",
+                                                 ir_var_auto);
+   ir_variable *var_i = new(mem_ctx) ir_variable(glsl_type::int_type,
+                                                 "i",
+                                                 ir_var_auto);
+
+   /* a = b[2][3][c[i]] */
+   body->emit(assign(var_a,
+                     deref_array(
+                        deref_array(
+                           deref_array(var_b, body->constant(int(2))),
+                           body->constant(int(3))),
+                        deref_array(var_c, var_i))));
+
+   ir_array_refcount_visitor v;
+
+   visit_list_elements(&v, &instructions);
+
+   ir_array_refcount_entry *const entry_b = v.get_variable_entry(var_b);
+
+   for (unsigned i = 0; i < 3; i++) {
+      for (unsigned j = 0; j < 4; j++) {
+         for (unsigned k = 0; k < 5; k++) {
+            const bool accessed = (i == 2) && (j == 3);
+            const unsigned linearized_index = k + (j * 5) + (i * 4 * 5);
+
+            EXPECT_EQ(accessed,
+                      entry_b->is_linearized_index_referenced(linearized_index)) <<
+               "array b[" << i << "][" << j << "][" << k << "], " <<
+               "linear index = " << linearized_index;
+         }
+      }
+   }
+
+   ir_array_refcount_entry *const entry_c = v.get_variable_entry(var_c);
+
+   for (unsigned i = 0; i < var_c->type->array_size(); i++) {
+      EXPECT_EQ(true, entry_c->is_linearized_index_referenced(i)) <<
+         "array c, i = " << i;
+   }
+
+   validate_variables_in_hash_table(v.ht, 4, var_a, var_b, var_c, var_i);
+}
+
+TEST_F(array_refcount_test, visit_array_indexing_with_itself)
+{
+   const glsl_type *const array_2_of_array_3_of_int =
+      glsl_type::get_array_instance(array_3_of_int, 2);
+
+   const glsl_type *const array_2_of_array_2_of_array_3_of_int =
+      glsl_type::get_array_instance(array_2_of_array_3_of_int, 2);
+
+   ir_variable *var_a = new(mem_ctx) ir_variable(glsl_type::int_type,
+                                                 "a",
+                                                 ir_var_auto);
+   ir_variable *var_b = new(mem_ctx) ir_variable(array_2_of_array_2_of_array_3_of_int,
+                                                 "b",
+                                                 ir_var_auto);
+
+   /* Given GLSL code:
+    *
+    *    int b[2][2][3];
+    *    a = b[ b[0][0][0] ][ b[ b[0][1][0] ][ b[1][0][0] ][1] ][2]
+    *
+    * b[0][0][0], b[0][1][0], and b[1][0][0] are trivially accessed.
+    *
+    * b[*][*][1] and b[*][*][2] are accessed.
+    *
+    * Only b[1][1][0] is not accessed.
+    */
+   operand b000 = deref_array(
+      deref_array(
+         deref_array(var_b, body->constant(int(0))),
+         body->constant(int(0))),
+      body->constant(int(0)));
+
+   operand b010 = deref_array(
+      deref_array(
+         deref_array(var_b, body->constant(int(0))),
+         body->constant(int(1))),
+      body->constant(int(0)));
+
+   operand b100 = deref_array(
+      deref_array(
+         deref_array(var_b, body->constant(int(1))),
+         body->constant(int(0))),
+      body->constant(int(0)));
+
+   operand b_b010_b100_1 = deref_array(
+      deref_array(
+         deref_array(var_b, b010),
+         b100),
+      body->constant(int(1)));
+
+   body->emit(assign(var_a,
+                     deref_array(
+                        deref_array(
+                           deref_array(var_b, b000),
+                           b_b010_b100_1),
+                        body->constant(int(2)))));
+
+   ir_array_refcount_visitor v;
+
+   visit_list_elements(&v, &instructions);
+
+   ir_array_refcount_entry *const entry_b = v.get_variable_entry(var_b);
+
+   for (unsigned i = 0; i < 2; i++) {
+      for (unsigned j = 0; j < 2; j++) {
+         for (unsigned k = 0; k < 3; k++) {
+            const bool accessed = !(i == 1 && j == 1 && k == 0);
+            const unsigned linearized_index = k + (j * 3) + (i * 2 * 3);
+
+            EXPECT_EQ(accessed,
+                      entry_b->is_linearized_index_referenced(linearized_index)) <<
+               "array b[" << i << "][" << j << "][" << k << "], " <<
+               "linear index = " << linearized_index;
+         }
+      }
+   }
+
+   validate_variables_in_hash_table(v.ht, 2, var_a, var_b);
+}
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2625,6 +2625,8 @@ bool nir_opt_remove_phis(nir_shader *shader);

 bool nir_opt_undef(nir_shader *shader);

+bool nir_opt_conditional_discard(nir_shader *shader);
+
 void nir_sweep(nir_shader *shader);

 nir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val);
--- a/src/compiler/nir/nir_lower_wpos_ytransform.c
+++ b/src/compiler/nir/nir_lower_wpos_ytransform.c
@@ -272,6 +272,26 @@ lower_interp_var_at_offset(lower_wpos_ytransform_state *state,
                                                     flip_y)));
 }

+static void
+lower_load_sample_pos(lower_wpos_ytransform_state *state,
+                      nir_intrinsic_instr *intr)
+{
+   nir_builder *b = &state->b;
+   b->cursor = nir_after_instr(&intr->instr);
+
+   nir_ssa_def *pos = &intr->dest.ssa;
+   nir_ssa_def *scale = nir_channel(b, get_transform(state), 0);
+   nir_ssa_def *neg_scale = nir_channel(b, get_transform(state), 2);
+   /* Either y or 1-y for scale equal to 1 or -1 respectively. */
+   nir_ssa_def *flipped_y =
+               nir_fadd(b, nir_fmax(b, neg_scale, nir_imm_float(b, 0.0)),
+                        nir_fmul(b, nir_channel(b, pos, 1), scale));
+   nir_ssa_def *flipped_pos = nir_vec2(b, nir_channel(b, pos, 0), flipped_y);
+
+   nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, nir_src_for_ssa(flipped_pos),
+                                  flipped_pos->parent_instr);
+}
+
 static void
 lower_wpos_ytransform_block(lower_wpos_ytransform_state *state, nir_block *block)
 {
@@ -287,6 +307,10 @@ lower_wpos_ytransform_block(lower_wpos_ytransform_state *state, nir_block *block
               /* gl_FragCoord should not have array/struct deref's: */
               assert(dvar->deref.child == NULL);
               lower_fragcoord(state, intr);
+            } else if (var->data.mode == nir_var_system_value &&
+                       var->data.location == SYSTEM_VALUE_SAMPLE_POS) {
+               assert(dvar->deref.child == NULL);
+               lower_load_sample_pos(state, intr);
            }
         } else if (intr->intrinsic == nir_intrinsic_interp_var_at_offset) {
            lower_interp_var_at_offset(state, intr);
--- a/src/compiler/nir/nir_opt_conditional_discard.c
+++ b/src/compiler/nir/nir_opt_conditional_discard.c
@@ -0,0 +1,125 @@
+/*
+ * Copyright © 2016 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+/** @file nir_opt_conditional_discard.c
+ *
+ * Handles optimization of lowering if (cond) discard to discard_if(cond).
+ */
+
+static bool
+nir_opt_conditional_discard_block(nir_block *block, void *mem_ctx)
+{
+   nir_builder bld;
+
+   if (nir_cf_node_is_first(&block->cf_node))
+      return false;
+
+   nir_cf_node *prev_node = nir_cf_node_prev(&block->cf_node);
+   if (prev_node->type != nir_cf_node_if)
+      return false;
+
+   nir_if *if_stmt = nir_cf_node_as_if(prev_node);
+   nir_block *then_block = nir_if_first_then_block(if_stmt);
+   nir_block *else_block = nir_if_first_else_block(if_stmt);
+
+   /* check there is only one else block and it is empty */
+   if (nir_if_last_else_block(if_stmt) != else_block)
+      return false;
+   if (!exec_list_is_empty(&else_block->instr_list))
+      return false;
+
+   /* check there is only one then block and it has only one instruction in it */
+   if (nir_if_last_then_block(if_stmt) != then_block)
+      return false;
+   if (exec_list_is_empty(&then_block->instr_list))
+      return false;
+   if (exec_list_length(&then_block->instr_list) > 1)
+      return false;
+   /*
+    * make sure no subsequent phi nodes point at this if.
+    */
+   nir_block *after = nir_cf_node_as_block(nir_cf_node_next(&if_stmt->cf_node));
+   nir_foreach_instr_safe(instr, after) {
+      if (instr->type != nir_instr_type_phi)
+         break;
+      nir_phi_instr *phi = nir_instr_as_phi(instr);
+
+      nir_foreach_phi_src(phi_src, phi) {
+         if (phi_src->pred == then_block ||
+             phi_src->pred == else_block)
+            return false;
+      }
+   }
+
+   /* Get the first instruction in the then block and confirm it is
+    * a discard or a discard_if
+    */
+   nir_instr *instr = nir_block_first_instr(then_block);
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
+
+   nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+   if (intrin->intrinsic != nir_intrinsic_discard &&
+       intrin->intrinsic != nir_intrinsic_discard_if)
+      return false;
+
+   nir_src cond;
+
+   nir_builder_init(&bld, mem_ctx);
+   bld.cursor = nir_before_cf_node(prev_node);
+   if (intrin->intrinsic == nir_intrinsic_discard)
+      cond = if_stmt->condition;
+   else
+      cond = nir_src_for_ssa(nir_iand(&bld,
+                                      nir_ssa_for_src(&bld, if_stmt->condition, 1),
+                                      nir_ssa_for_src(&bld, intrin->src[0], 1)));
+
+   nir_intrinsic_instr *discard_if =
+      nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_discard_if);
+   nir_src_copy(&discard_if->src[0], &cond, discard_if);
+
+   nir_instr_insert_before_cf(prev_node, &discard_if->instr);
+   nir_instr_remove(&intrin->instr);
+   nir_cf_node_remove(&if_stmt->cf_node);
+
+   return true;
+}
+
+bool
+nir_opt_conditional_discard(nir_shader *shader)
+{
+   bool progress = false;
+
+   nir_foreach_function(function, shader) {
+      if (function->impl) {
+         void *mem_ctx = ralloc_parent(function->impl);
+         nir_foreach_block_safe(block, function->impl) {
+            progress |= nir_opt_conditional_discard_block(block, mem_ctx);
+         }
+      }
+   }
+   return progress;
+}
--- a/src/compiler/nir/nir_opt_undef.c
+++ b/src/compiler/nir/nir_opt_undef.c
@@ -86,17 +86,15 @@ opt_undef_vecN(nir_builder *b, nir_alu_instr *alu)

   assert(alu->dest.dest.is_ssa);

-   unsigned num_components = nir_op_infos[alu->op].num_inputs;
-
-   for (unsigned i = 0; i < num_components; i++) {
+   for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
      if (!alu->src[i].src.is_ssa ||
          alu->src[i].src.ssa->parent_instr->type != nir_instr_type_ssa_undef)
         return false;
   }

   b->cursor = nir_before_instr(&alu->instr);
-   nir_ssa_def *undef =
-      nir_ssa_undef(b, num_components, nir_dest_bit_size(alu->dest.dest));
+   nir_ssa_def *undef = nir_ssa_undef(b, alu->dest.dest.ssa.num_components,
+                                      nir_dest_bit_size(alu->dest.dest));
   nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(undef));

   return true;
--- a/src/compiler/nir/nir_search.c
+++ b/src/compiler/nir/nir_search.c
@@ -98,6 +98,16 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
 {
   uint8_t new_swizzle[4];

+   /* Searching only works on SSA values because, if it's not SSA, we can't
+    * know if the value changed between one instance of that value in the
+    * expression and another.  Also, the replace operation will place reads of
+    * that value right before the last instruction in the expression we're
+    * replacing so those reads will happen after the original reads and may
+    * not be valid if they're register reads.
+    */
+   if (!instr->src[src].src.is_ssa)
+      return false;
+
   /* If the source is an explicitly sized source, then we need to reset
    * both the number of components and the swizzle.
    */
@@ -116,9 +126,6 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,

   switch (value->type) {
   case nir_search_value_expression:
-      if (!instr->src[src].src.is_ssa)
-         return false;
-
      if (instr->src[src].src.ssa->parent_instr->type != nir_instr_type_alu)
         return false;

@@ -131,8 +138,7 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
      assert(var->variable < NIR_SEARCH_MAX_VARIABLES);

      if (state->variables_seen & (1 << var->variable)) {
-         if (!nir_srcs_equal(state->variables[var->variable].src,
-                             instr->src[src].src))
+         if (state->variables[var->variable].src.ssa != instr->src[src].src.ssa)
            return false;

         assert(!instr->src[src].abs && !instr->src[src].negate);
@@ -204,43 +210,27 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
         return true;

      case nir_type_int:
-         for (unsigned i = 0; i < num_components; ++i) {
-            int64_t val;
-            switch (load->def.bit_size) {
-            case 32:
-               val = load->value.i32[new_swizzle[i]];
-               break;
-            case 64:
-               val = load->value.i64[new_swizzle[i]];
-               break;
-            default:
-               unreachable("unknown bit size");
-            }
-
-            if (val != const_val->data.i)
-               return false;
-         }
-         return true;
-
      case nir_type_uint:
      case nir_type_bool32:
-         for (unsigned i = 0; i < num_components; ++i) {
-            uint64_t val;
-            switch (load->def.bit_size) {
-            case 32:
-               val = load->value.u32[new_swizzle[i]];
-               break;
-            case 64:
-               val = load->value.u64[new_swizzle[i]];
-               break;
-            default:
-               unreachable("unknown bit size");
+         switch (load->def.bit_size) {
+         case 32:
+            for (unsigned i = 0; i < num_components; ++i) {
+               if (load->value.u32[new_swizzle[i]] !=
+                   (uint32_t)const_val->data.u)
+                  return false;
            }
+            return true;

-            if (val != const_val->data.u)
-               return false;
+         case 64:
+            for (unsigned i = 0; i < num_components; ++i) {
+               if (load->value.u64[new_swizzle[i]] != const_val->data.u)
+                  return false;
+            }
+            return true;
+
+         default:
+            unreachable("unknown bit size");
         }
-         return true;

      default:
         unreachable("Invalid alu source type");
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -1055,16 +1055,30 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
      SpvOp opcode = get_specialization(b, val, w[3]);
      switch (opcode) {
      case SpvOpVectorShuffle: {
-         struct vtn_value *v0 = vtn_value(b, w[4], vtn_value_type_constant);
-         struct vtn_value *v1 = vtn_value(b, w[5], vtn_value_type_constant);
-         unsigned len0 = glsl_get_vector_elements(v0->const_type);
-         unsigned len1 = glsl_get_vector_elements(v1->const_type);
+         struct vtn_value *v0 = &b->values[w[4]];
+         struct vtn_value *v1 = &b->values[w[5]];
+
+         assert(v0->value_type == vtn_value_type_constant ||
+                v0->value_type == vtn_value_type_undef);
+         assert(v1->value_type == vtn_value_type_constant ||
+                v1->value_type == vtn_value_type_undef);
+
+         unsigned len0 = v0->value_type == vtn_value_type_constant ?
+                         glsl_get_vector_elements(v0->const_type) :
+                         glsl_get_vector_elements(v0->type->type);
+         unsigned len1 = v1->value_type == vtn_value_type_constant ?
+                         glsl_get_vector_elements(v1->const_type) :
+                         glsl_get_vector_elements(v1->type->type);

         uint32_t u[8];
-         for (unsigned i = 0; i < len0; i++)
-            u[i] = v0->constant->value.u[i];
-         for (unsigned i = 0; i < len1; i++)
-            u[len0 + i] = v1->constant->value.u[i];
+         if (v0->value_type == vtn_value_type_constant) {
+            for (unsigned i = 0; i < len0; i++)
+               u[i] = v0->constant->value.u[i];
+         }
+         if (v1->value_type == vtn_value_type_constant) {
+            for (unsigned i = 0; i < len1; i++)
+               u[len0 + i] = v1->constant->value.u[i];
+         }

         for (unsigned i = 0; i < count - 6; i++) {
            uint32_t comp = w[i + 6];
@@ -2707,6 +2721,7 @@ vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode,
      vtn_handle_constant(b, opcode, w, count);
      break;

+   case SpvOpUndef:
   case SpvOpVariable:
      vtn_handle_variables(b, opcode, w, count);
      break;
--- a/src/compiler/spirv/vtn_cfg.c
+++ b/src/compiler/spirv/vtn_cfg.c
@@ -527,12 +527,13 @@ vtn_handle_phi_second_pass(struct vtn_builder *b, SpvOp opcode,
   nir_variable *phi_var = phi_entry->data;

   for (unsigned i = 3; i < count; i += 2) {
-      struct vtn_ssa_value *src = vtn_ssa_value(b, w[i]);
      struct vtn_block *pred =
         vtn_value(b, w[i + 1], vtn_value_type_block)->block;

      b->nb.cursor = nir_after_instr(&pred->end_nop->instr);

+      struct vtn_ssa_value *src = vtn_ssa_value(b, w[i]);
+
      vtn_local_store(b, src, nir_deref_var_create(b, phi_var));
   }

--- a/src/compiler/spirv/vtn_glsl450.c
+++ b/src/compiler/spirv/vtn_glsl450.c
@@ -565,16 +565,21 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint,
                                   build_exp(nb, nir_fneg(nb, src[0]))));
      return;

-   case GLSLstd450Tanh:
-      /* (0.5 * (e^x - e^(-x))) / (0.5 * (e^x + e^(-x))) */
-      val->ssa->def =
-         nir_fdiv(nb, nir_fmul(nb, nir_imm_float(nb, 0.5f),
-                                   nir_fsub(nb, build_exp(nb, src[0]),
-                                                build_exp(nb, nir_fneg(nb, src[0])))),
-                      nir_fmul(nb, nir_imm_float(nb, 0.5f),
-                                   nir_fadd(nb, build_exp(nb, src[0]),
-                                                build_exp(nb, nir_fneg(nb, src[0])))));
+   case GLSLstd450Tanh: {
+      /* tanh(x) := (0.5 * (e^x - e^(-x))) / (0.5 * (e^x + e^(-x)))
+       *
+       * With a little algebra this reduces to (e^2x - 1) / (e^2x + 1)
+       *
+       * We clamp x to (-inf, +10] to avoid precision problems.  When x > 10,
+       * e^2x is so much larger than 1.0 that 1.0 gets flushed to zero in the
+       * computation e^2x +/- 1 so it can be ignored.
+       */
+      nir_ssa_def *x = nir_fmin(nb, src[0], nir_imm_float(nb, 10));
+      nir_ssa_def *exp2x = build_exp(nb, nir_fmul(nb, x, nir_imm_float(nb, 2)));
+      val->ssa->def = nir_fdiv(nb, nir_fsub(nb, exp2x, nir_imm_float(nb, 1)),
+                                   nir_fadd(nb, exp2x, nir_imm_float(nb, 1)));
      return;
+   }

   case GLSLstd450Asinh:
      val->ssa->def = nir_fmul(nb, nir_fsign(nb, src[0]),
--- a/src/compiler/spirv/vtn_variables.c
+++ b/src/compiler/spirv/vtn_variables.c
@@ -805,8 +805,12 @@ vtn_get_builtin_location(struct vtn_builder *b,
      set_mode_system_value(mode);
      break;
   case SpvBuiltInPrimitiveId:
-      *location = VARYING_SLOT_PRIMITIVE_ID;
-      *mode = nir_var_shader_out;
+      if (*mode == nir_var_shader_out) {
+         *location = VARYING_SLOT_PRIMITIVE_ID;
+      } else {
+         *location = SYSTEM_VALUE_PRIMITIVE_ID;
+         set_mode_system_value(mode);
+      }
      break;
   case SpvBuiltInInvocationId:
      *location = SYSTEM_VALUE_INVOCATION_ID;
@@ -1054,7 +1058,8 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member,
         is_vertex_input = false;
         location += VARYING_SLOT_VAR0;
      } else {
-         unreachable("Location must be on input or output variable");
+         vtn_warn("Location must be on input or output variable");
+         return;
      }

      if (vtn_var->var) {
@@ -1154,6 +1159,12 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
                     const uint32_t *w, unsigned count)
 {
   switch (opcode) {
+   case SpvOpUndef: {
+      struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_undef);
+      val->type = vtn_value(b, w[1], vtn_value_type_type)->type;
+      break;
+   }
+
   case SpvOpVariable: {
      struct vtn_variable *var = rzalloc(b, struct vtn_variable);
      var->type = vtn_value(b, w[1], vtn_value_type_type)->type;
--- a/src/egl/Makefile.am
+++ b/src/egl/Makefile.am
@@ -95,8 +95,8 @@ AM_CFLAGS += \
 	-I$(top_srcdir)/src/egl/drivers/dri2 \
 	-I$(top_srcdir)/src/gbm/backends/dri \
 	-I$(top_srcdir)/src/egl/wayland/wayland-egl \
-	-I$(top_srcdir)/src/egl/wayland/wayland-drm \
 	-I$(top_builddir)/src/egl/wayland/wayland-drm \
+	-I$(top_srcdir)/src/egl/wayland/wayland-drm \
 	-DDEFAULT_DRIVER_DIR=\"$(DRI_DRIVER_SEARCH_DIR)\" \
 	-D_EGL_BUILT_IN_DRIVER_DRI2

--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -241,6 +241,15 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,
            return NULL;
         break;

+      case __DRI_ATTRIB_MAX_PBUFFER_WIDTH:
+         _eglSetConfigKey(&base, EGL_MAX_PBUFFER_WIDTH,
+                          _EGL_MAX_PBUFFER_WIDTH);
+         break;
+      case __DRI_ATTRIB_MAX_PBUFFER_HEIGHT:
+         _eglSetConfigKey(&base, EGL_MAX_PBUFFER_HEIGHT,
+                          _EGL_MAX_PBUFFER_HEIGHT);
+         break;
+
      default:
         key = dri2_to_egl_attribute_map[attrib];
         if (key != 0)
@@ -1077,6 +1086,20 @@ dri2_create_context(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf,
   if (!_eglInitContext(&dri2_ctx->base, disp, conf, attrib_list))
      goto cleanup;

+   /* The EGL_EXT_create_context_robustness spec says:
+    *
+    *    "Add to the eglCreateContext context creation errors: [...]
+    *
+    *     * If the reset notification behavior of <share_context> and the
+    *       newly created context are different then an EGL_BAD_MATCH error is
+    *       generated."
+    */
+   if (share_list && share_list->ResetNotificationStrategy !=
+                     dri2_ctx->base.ResetNotificationStrategy) {
+      _eglError(EGL_BAD_MATCH, "eglCreateContext");
+      goto cleanup;
+   }
+
   switch (dri2_ctx->base.ClientAPI) {
   case EGL_OPENGL_ES_API:
      switch (dri2_ctx->base.ClientMajorVersion) {
--- a/src/egl/drivers/dri2/egl_dri2.h
+++ b/src/egl/drivers/dri2/egl_dri2.h
@@ -80,8 +80,6 @@
 #include "eglimage.h"
 #include "eglsync.h"

-#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
-
 struct wl_buffer;

 struct dri2_egl_driver
--- a/src/egl/drivers/dri2/egl_dri2_fallbacks.h
+++ b/src/egl/drivers/dri2/egl_dri2_fallbacks.h
@@ -66,7 +66,8 @@ dri2_fallback_swap_buffers_with_damage(_EGLDriver *drv, _EGLDisplay *dpy,
                                      _EGLSurface *surf,
                                      const EGLint *rects, EGLint n_rects)
 {
-   return EGL_FALSE;
+   struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy);
+   return dri2_dpy->vtbl->swap_buffers(drv, dpy, surf);
 }

 static inline EGLBoolean
--- a/src/egl/drivers/dri2/platform_android.c
+++ b/src/egl/drivers/dri2/platform_android.c
@@ -766,8 +766,6 @@ droid_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *dpy)
     EGL_NATIVE_VISUAL_TYPE, 0,
     EGL_FRAMEBUFFER_TARGET_ANDROID, EGL_TRUE,
     EGL_RECORDABLE_ANDROID, EGL_TRUE,
-     EGL_MAX_PBUFFER_WIDTH, _EGL_MAX_PBUFFER_WIDTH,
-     EGL_MAX_PBUFFER_HEIGHT, _EGL_MAX_PBUFFER_HEIGHT,
     EGL_NONE
   };
   unsigned int format_count[ARRAY_SIZE(visuals)] = { 0 };
--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -118,6 +118,13 @@ resize_callback(struct wl_egl_window *wl_win, void *data)
   (*dri2_dpy->flush->invalidate)(dri2_surf->dri_drawable);
 }

+static void
+destroy_window_callback(void *data)
+{
+   struct dri2_egl_surface *dri2_surf = data;
+   dri2_surf->wl_win = NULL;
+}
+
 /**
 * Called via eglCreateWindowSurface(), drv->API.CreateWindowSurface().
 */
@@ -159,6 +166,7 @@ dri2_wl_create_surface(_EGLDriver *drv, _EGLDisplay *disp,

   dri2_surf->wl_win->private = dri2_surf;
   dri2_surf->wl_win->resize_callback = resize_callback;
+   dri2_surf->wl_win->destroy_window_callback = destroy_window_callback;

   dri2_surf->base.Width =  -1;
   dri2_surf->base.Height = -1;
@@ -254,8 +262,11 @@ dri2_wl_destroy_surface(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf)
   if (dri2_surf->throttle_callback)
      wl_callback_destroy(dri2_surf->throttle_callback);

-   dri2_surf->wl_win->private = NULL;
-   dri2_surf->wl_win->resize_callback = NULL;
+   if (dri2_surf->wl_win) {
+      dri2_surf->wl_win->private = NULL;
+      dri2_surf->wl_win->resize_callback = NULL;
+      dri2_surf->wl_win->destroy_window_callback = NULL;
+   }

   free(surf);

@@ -1070,6 +1081,7 @@ static struct dri2_egl_display_vtbl dri2_wl_display_vtbl = {

 static const __DRIextension *dri2_loader_extensions[] = {
   &dri2_loader_extension.base,
+   &image_loader_extension.base,
   &image_lookup_extension.base,
   &use_invalidate.base,
   NULL,
@@ -1272,6 +1284,8 @@ dri2_initialize_wayland_drm(_EGLDriver *drv, _EGLDisplay *disp)
 cleanup_registry:
   wl_registry_destroy(dri2_dpy->wl_registry);
   wl_event_queue_destroy(dri2_dpy->wl_queue);
+   if (disp->PlatformDisplay == NULL)
+      wl_display_disconnect(dri2_dpy->wl_dpy);
 cleanup_dpy:
   free(dri2_dpy);
   disp->DriverData = NULL;
@@ -1731,6 +1745,8 @@ dri2_wl_swrast_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,
      dri2_surf->format = WL_SHM_FORMAT_ARGB8888;

   dri2_surf->wl_win = window;
+   dri2_surf->wl_win->private = dri2_surf;
+   dri2_surf->wl_win->destroy_window_callback = destroy_window_callback;

   dri2_surf->base.Width = -1;
   dri2_surf->base.Height = -1;
@@ -1913,6 +1929,8 @@ dri2_initialize_wayland_swrast(_EGLDriver *drv, _EGLDisplay *disp)
 cleanup_registry:
   wl_registry_destroy(dri2_dpy->wl_registry);
   wl_event_queue_destroy(dri2_dpy->wl_queue);
+   if (disp->PlatformDisplay == NULL)
+      wl_display_disconnect(dri2_dpy->wl_dpy);
 cleanup_dpy:
   free(dri2_dpy);
   disp->DriverData = NULL;
--- a/src/egl/drivers/dri2/platform_x11_dri3.c
+++ b/src/egl/drivers/dri2/platform_x11_dri3.c
@@ -438,6 +438,25 @@ dri3_query_buffer_age(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf)
   return loader_dri3_query_buffer_age(&dri3_surf->loader_drawable);
 }

+static EGLBoolean
+dri3_query_surface(_EGLDriver *drv, _EGLDisplay *dpy,
+                   _EGLSurface *surf, EGLint attribute,
+                   EGLint *value)
+{
+   struct dri3_egl_surface *dri3_surf = dri3_egl_surface(surf);
+
+   switch (attribute) {
+   case EGL_WIDTH:
+   case EGL_HEIGHT:
+      loader_dri3_update_drawable_geometry(&dri3_surf->loader_drawable);
+      break;
+   default:
+      break;
+   }
+
+   return _eglQuerySurface(drv, dpy, surf, attribute, value);
+}
+
 static __DRIdrawable *
 dri3_get_dri_drawable(_EGLSurface *surf)
 {
@@ -460,6 +479,7 @@ struct dri2_egl_display_vtbl dri3_x11_display_vtbl = {
   .post_sub_buffer = dri2_fallback_post_sub_buffer,
   .copy_buffers = dri3_copy_buffers,
   .query_buffer_age = dri3_query_buffer_age,
+   .query_surface = dri3_query_surface,
   .create_wayland_buffer_from_image = dri2_fallback_create_wayland_buffer_from_image,
   .get_sync_values = dri3_get_sync_values,
   .get_dri_drawable = dri3_get_dri_drawable,
--- a/src/egl/main/eglapi.c
+++ b/src/egl/main/eglapi.c
@@ -734,7 +734,9 @@ eglCreateContext(EGLDisplay dpy, EGLConfig config, EGLContext share_list,

   _EGL_CHECK_DISPLAY(disp, EGL_NO_CONTEXT, drv);

-   if (!config && !disp->Extensions.KHR_no_config_context)
+   if (config != EGL_NO_CONFIG_KHR)
+      _EGL_CHECK_CONFIG(disp, conf, EGL_NO_CONTEXT, drv);
+   else if (!disp->Extensions.KHR_no_config_context)
      RETURN_EGL_ERROR(disp, EGL_BAD_CONFIG, EGL_NO_CONTEXT);

   if (!share && share_list != EGL_NO_CONTEXT)
@@ -847,7 +849,7 @@ _eglCreateWindowSurfaceCommon(_EGLDisplay *disp, EGLConfig config,
      RETURN_EGL_ERROR(disp, EGL_BAD_NATIVE_WINDOW, EGL_NO_SURFACE);

 #ifdef HAVE_SURFACELESS_PLATFORM
-   if (disp->Platform == _EGL_PLATFORM_SURFACELESS) {
+   if (disp && disp->Platform == _EGL_PLATFORM_SURFACELESS) {
      /* From the EGL_MESA_platform_surfaceless spec (v1):
       *
       *    eglCreatePlatformWindowSurface fails when called with a <display>
@@ -866,6 +868,9 @@ _eglCreateWindowSurfaceCommon(_EGLDisplay *disp, EGLConfig config,

   _EGL_CHECK_CONFIG(disp, conf, EGL_NO_SURFACE, drv);

+   if ((conf->SurfaceType & EGL_WINDOW_BIT) == 0)
+      RETURN_EGL_ERROR(disp, EGL_BAD_MATCH, EGL_NO_SURFACE);
+
   surf = drv->API.CreateWindowSurface(drv, disp, conf, native_window,
                                       attrib_list);
   ret = (surf) ? _eglLinkSurface(surf) : EGL_NO_SURFACE;
@@ -968,7 +973,7 @@ _eglCreatePixmapSurfaceCommon(_EGLDisplay *disp, EGLConfig config,
   EGLSurface ret;

 #if HAVE_SURFACELESS_PLATFORM
-   if (disp->Platform == _EGL_PLATFORM_SURFACELESS) {
+   if (disp && disp->Platform == _EGL_PLATFORM_SURFACELESS) {
      /* From the EGL_MESA_platform_surfaceless spec (v1):
       *
       *   [Like eglCreatePlatformWindowSurface,] eglCreatePlatformPixmapSurface
@@ -984,6 +989,10 @@ _eglCreatePixmapSurfaceCommon(_EGLDisplay *disp, EGLConfig config,
 #endif

   _EGL_CHECK_CONFIG(disp, conf, EGL_NO_SURFACE, drv);
+
+   if ((conf->SurfaceType & EGL_PIXMAP_BIT) == 0)
+      RETURN_EGL_ERROR(disp, EGL_BAD_MATCH, EGL_NO_SURFACE);
+
   surf = drv->API.CreatePixmapSurface(drv, disp, conf, native_pixmap,
                                       attrib_list);
   ret = (surf) ? _eglLinkSurface(surf) : EGL_NO_SURFACE;
@@ -1054,6 +1063,9 @@ eglCreatePbufferSurface(EGLDisplay dpy, EGLConfig config,
   _EGL_FUNC_START(disp, EGL_OBJECT_DISPLAY_KHR, NULL, EGL_NO_SURFACE);
   _EGL_CHECK_CONFIG(disp, conf, EGL_NO_SURFACE, drv);

+   if ((conf->SurfaceType & EGL_PBUFFER_BIT) == 0)
+      RETURN_EGL_ERROR(disp, EGL_BAD_MATCH, EGL_NO_SURFACE);
+
   surf = drv->API.CreatePbufferSurface(drv, disp, conf, attrib_list);
   ret = (surf) ? _eglLinkSurface(surf) : EGL_NO_SURFACE;

@@ -2382,7 +2394,7 @@ _eglLockDisplayInterop(EGLDisplay dpy, EGLContext context,
   return MESA_GLINTEROP_SUCCESS;
 }

-int
+PUBLIC int
 MesaGLInteropEGLQueryDeviceInfo(EGLDisplay dpy, EGLContext context,
                                struct mesa_glinterop_device_info *out)
 {
@@ -2404,7 +2416,7 @@ MesaGLInteropEGLQueryDeviceInfo(EGLDisplay dpy, EGLContext context,
   return ret;
 }

-int
+PUBLIC int
 MesaGLInteropEGLExportObject(EGLDisplay dpy, EGLContext context,
                             struct mesa_glinterop_export_in *in,
                             struct mesa_glinterop_export_out *out)
--- a/src/egl/main/eglcontext.c
+++ b/src/egl/main/eglcontext.c
@@ -184,19 +184,33 @@ _eglParseContextAttribList(_EGLContext *ctx, _EGLDisplay *dpy,
            break;
         }

-         /* The EGL_KHR_create_context_spec says:
-          *
-          *     "If the EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR bit is set in
-          *     EGL_CONTEXT_FLAGS_KHR, then a context supporting <robust buffer
-          *     access> will be created. Robust buffer access is defined in the
-          *     GL_ARB_robustness extension specification, and the resulting
-          *     context must also support either the GL_ARB_robustness
-          *     extension, or a version of OpenGL incorporating equivalent
-          *     functionality. This bit is supported for OpenGL contexts.
-          */
         if ((val & EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR) &&
-             (api != EGL_OPENGL_API ||
-              !dpy->Extensions.EXT_create_context_robustness)) {
+             api != EGL_OPENGL_API) {
+            /* The EGL_KHR_create_context spec says:
+             *
+             *   10) Which error should be generated if robust buffer access
+             *       or reset notifications are requested under OpenGL ES?
+             *
+             *       As per Issue 6, this extension does not support creating
+             *       robust contexts for OpenGL ES. This is only supported via
+             *       the EGL_EXT_create_context_robustness extension.
+             *
+             *       Attempting to use this extension to create robust OpenGL
+             *       ES context will generate an EGL_BAD_ATTRIBUTE error. This
+             *       specific error is generated because this extension does
+             *       not define the EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR
+             *       and EGL_CONTEXT_OPENGL_RESET_NOTIFICATION_STRATEGY_KHR
+             *       bits for OpenGL ES contexts. Thus, use of these bits fall
+             *       under condition described by: "If an attribute is
+             *       specified that is not meaningful for the client API
+             *       type.." in the above specification.
+             *
+             * The spec requires that we emit the error even if the display
+             * supports EGL_EXT_create_context_robustness. To create a robust
+             * GLES context, the *attribute*
+             * EGL_CONTEXT_OPENGL_ROBUST_ACCESS_EXT must be used, not the
+             * *flag* EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR.
+             */
            err = EGL_BAD_ATTRIBUTE;
            break;
         }
--- a/src/egl/main/egldefines.h
+++ b/src/egl/main/egldefines.h
@@ -34,6 +34,8 @@
 #ifndef EGLDEFINES_INCLUDED
 #define EGLDEFINES_INCLUDED

+#include "util/macros.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -48,9 +50,6 @@ extern "C" {

 #define _EGL_VENDOR_STRING "Mesa Project"

-#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
-#define MIN2(A, B)  (((A) < (B)) ? (A) : (B))
-
 #ifdef __cplusplus
 }
 #endif
--- a/src/egl/main/eglsurface.c
+++ b/src/egl/main/eglsurface.c
@@ -262,9 +262,13 @@ _eglInitSurface(_EGLSurface *surf, _EGLDisplay *dpy, EGLint type,
 {
   const char *func;
   EGLint renderBuffer = EGL_BACK_BUFFER;
-   EGLint swapBehavior = EGL_BUFFER_PRESERVED;
+   EGLint swapBehavior = EGL_BUFFER_DESTROYED;
   EGLint err;

+   /* Swap behavior can be preserved only if config supports this. */
+   if (conf->SurfaceType & EGL_SWAP_BEHAVIOR_PRESERVED_BIT)
+      swapBehavior = EGL_BUFFER_PRESERVED;
+
   switch (type) {
   case EGL_WINDOW_BIT:
      func = "eglCreateWindowSurface";
--- a/src/gallium/auxiliary/cso_cache/cso_cache.c
+++ b/src/gallium/auxiliary/cso_cache/cso_cache.c
@@ -188,7 +188,9 @@ cso_insert_state(struct cso_cache *sc,
                 void *state)
 {
   struct cso_hash *hash = _cso_hash_for_type(sc, type);
-   sanitize_hash(sc, hash, type, sc->max_size);
+
+   if (type != CSO_SAMPLER)
+      sanitize_hash(sc, hash, type, sc->max_size);

   return cso_hash_insert(hash, hash_key, state);
 }
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -1275,7 +1275,6 @@ cso_restore_fragment_samplers(struct cso_context *ctx)
 {
   struct sampler_info *info = &ctx->samplers[PIPE_SHADER_FRAGMENT];

-   info->nr_samplers = ctx->nr_fragment_samplers_saved;
   memcpy(info->samplers, ctx->fragment_samplers_saved,
          sizeof(info->samplers));
   cso_single_sampler_done(ctx, PIPE_SHADER_FRAGMENT);
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -606,7 +606,10 @@ gallivm_compile_module(struct gallivm_state *gallivm)
      util_snprintf(filename, sizeof(filename), "ir_%s.bc", gallivm->module_name);
      LLVMWriteBitcodeToFile(gallivm->module, filename);
      debug_printf("%s written\n", filename);
-      debug_printf("Invoke as \"llc -o - %s\"\n", filename);
+      debug_printf("Invoke as \"llc %s%s -o - %s\"\n",
+                   (HAVE_LLVM >= 0x0305) ? "[-mcpu=<-mcpu option] " : "",
+                   "[-mattr=<-mattr option(s)>]",
+                   filename);
   }

   if (USE_MCJIT) {
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -98,6 +98,7 @@
 #include "util/u_cpu_detect.h"

 #include "lp_bld_misc.h"
+#include "lp_bld_debug.h"

 namespace {

@@ -596,7 +597,8 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,

 #if defined(PIPE_ARCH_PPC)
   MAttrs.push_back(util_cpu_caps.has_altivec ? "+altivec" : "-altivec");
-#if HAVE_LLVM >= 0x0304
+#if (HAVE_LLVM >= 0x0304)
+#if (HAVE_LLVM <= 0x0307) || (HAVE_LLVM == 0x0308 && MESA_LLVM_VERSION_PATCH == 0)
   /*
    * Make sure VSX instructions are disabled
    * See LLVM bug https://llvm.org/bugs/show_bug.cgi?id=25503#c7
@@ -604,11 +606,32 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
   if (util_cpu_caps.has_altivec) {
      MAttrs.push_back("-vsx");
   }
+#else
+   /*
+    * However, bug 25503 is fixed, by the same fix that fixed
+    * bug 26775, in versions of LLVM later than 3.8 (starting with 3.8.1):
+    * Make sure VSX instructions are ENABLED
+    * See LLVM bug https://llvm.org/bugs/show_bug.cgi?id=26775
+    */
+   if (util_cpu_caps.has_altivec) {
+      MAttrs.push_back("+vsx");
+   }
+#endif
 #endif
 #endif

   builder.setMAttrs(MAttrs);

+   if (gallivm_debug & (GALLIVM_DEBUG_IR | GALLIVM_DEBUG_ASM | GALLIVM_DEBUG_DUMP_BC)) {
+      int n = MAttrs.size();
+      if (n > 0) {
+         debug_printf("llc -mattr option(s): ");
+         for (int i = 0; i < n; i++)
+            debug_printf("%s%s", MAttrs[i].c_str(), (i < n - 1) ? "," : "");
+         debug_printf("\n");
+      }
+   }
+
 #if HAVE_LLVM >= 0x0305
   StringRef MCPU = llvm::sys::getHostCPUName();
   /*
@@ -623,7 +646,23 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
    * when not using MCJIT so no instructions are generated which the old JIT
    * can't handle. Not entirely sure if we really need to do anything yet.
    */
+#if defined(PIPE_ARCH_LITTLE_ENDIAN)  && defined(PIPE_ARCH_PPC_64)
+   /*
+    * Versions of LLVM prior to 4.0 lacked a table entry for "POWER8NVL",
+    * resulting in (big-endian) "generic" being returned on
+    * little-endian Power8NVL systems.  The result was that code that
+    * attempted to load the least significant 32 bits of a 64-bit quantity
+    * from memory loaded the wrong half.  This resulted in failures in some
+    * Piglit tests, e.g.
+    * .../arb_gpu_shader_fp64/execution/conversion/frag-conversion-explicit-double-uint
+    */
+   if (MCPU == "generic")
+      MCPU = "pwr8";
+#endif
   builder.setMCPU(MCPU);
+   if (gallivm_debug & (GALLIVM_DEBUG_IR | GALLIVM_DEBUG_ASM | GALLIVM_DEBUG_DUMP_BC)) {
+      debug_printf("llc -mcpu option: %s\n", MCPU.str().c_str());
+   }
 #endif

   ShaderMemoryManager *MM = NULL;
--- a/src/gallium/auxiliary/hud/hud_cpufreq.c
+++ b/src/gallium/auxiliary/hud/hud_cpufreq.c
@@ -36,6 +36,7 @@
 #include "hud/hud_private.h"
 #include "util/list.h"
 #include "os/os_time.h"
+#include "os/os_thread.h"
 #include "util/u_memory.h"
 #include <stdio.h>
 #include <unistd.h>
@@ -61,6 +62,7 @@ struct cpufreq_info

 static int gcpufreq_count = 0;
 static struct list_head gcpufreq_list;
+pipe_static_mutex(gcpufreq_mutex);

 static struct cpufreq_info *
 find_cfi_by_index(int cpu_index, int mode)
@@ -112,14 +114,6 @@ query_cfi_load(struct hud_graph *gr)
   }
 }

-static void
-free_query_data(void *p)
-{
-   struct cpufreq_info *cfi = (struct cpufreq_info *)p;
-   list_del(&cfi->list);
-   FREE(cfi);
-}
-
 /**
  * Create and initialize a new object for a specific CPU.
  * \param  pane  parent context.
@@ -155,6 +149,7 @@ hud_cpufreq_graph_install(struct hud_pane *pane, int cpu_index,
      break;
   case CPUFREQ_MAXIMUM:
      snprintf(gr->name, sizeof(gr->name), "%s-Max", cfi->name);
+      break;
   default:
      return;
   }
@@ -162,11 +157,6 @@ hud_cpufreq_graph_install(struct hud_pane *pane, int cpu_index,
   gr->query_data = cfi;
   gr->query_new_value = query_cfi_load;

-   /* Don't use free() as our callback as that messes up Gallium's
-    * memory debugger.  Use simple free_query_data() wrapper.
-    */
-   gr->free_query_data = free_query_data;
-
   hud_pane_add_graph(pane, gr);
   hud_pane_set_max_value(pane, 3000000 /* 3 GHz */);
 }
@@ -199,16 +189,21 @@ hud_get_num_cpufreq(bool displayhelp)
   int cpu_index;

   /* Return the number of CPU metrics we support. */
-   if (gcpufreq_count)
+   pipe_mutex_lock(gcpufreq_mutex);
+   if (gcpufreq_count) {
+      pipe_mutex_unlock(gcpufreq_mutex);
      return gcpufreq_count;
+   }

   /* Scan /sys/devices.../cpu, for every object type we support, create
    * and persist an object to represent its different metrics.
    */
   list_inithead(&gcpufreq_list);
   DIR *dir = opendir("/sys/devices/system/cpu");
-   if (!dir)
+   if (!dir) {
+      pipe_mutex_unlock(gcpufreq_mutex);
      return 0;
+   }

   while ((dp = readdir(dir)) != NULL) {

@@ -238,6 +233,7 @@ hud_get_num_cpufreq(bool displayhelp)
      snprintf(fn, sizeof(fn), "%s/cpufreq/scaling_max_freq", basename);
      add_object(dp->d_name, fn, CPUFREQ_MAXIMUM, cpu_index);
   }
+   closedir(dir);

   if (displayhelp) {
      list_for_each_entry(struct cpufreq_info, cfi, &gcpufreq_list, list) {
@@ -251,6 +247,7 @@ hud_get_num_cpufreq(bool displayhelp)
      }
   }

+   pipe_mutex_unlock(gcpufreq_mutex);
   return gcpufreq_count;
 }

--- a/src/gallium/auxiliary/hud/hud_diskstat.c
+++ b/src/gallium/auxiliary/hud/hud_diskstat.c
@@ -35,6 +35,7 @@
 #include "hud/hud_private.h"
 #include "util/list.h"
 #include "os/os_time.h"
+#include "os/os_thread.h"
 #include "util/u_memory.h"
 #include <stdio.h>
 #include <unistd.h>
@@ -81,6 +82,7 @@ struct diskstat_info
 */
 static int gdiskstat_count = 0;
 static struct list_head gdiskstat_list;
+pipe_static_mutex(gdiskstat_mutex);

 static struct diskstat_info *
 find_dsi_by_name(const char *n, int mode)
@@ -162,14 +164,6 @@ query_dsi_load(struct hud_graph *gr)
   }
 }

-static void
-free_query_data(void *p)
-{
-   struct diskstat_info *nic = (struct diskstat_info *) p;
-   list_del(&nic->list);
-   FREE(nic);
-}
-
 /**
  * Create and initialize a new object for a specific block I/O device.
  * \param  pane  parent context.
@@ -208,11 +202,6 @@ hud_diskstat_graph_install(struct hud_pane *pane, const char *dev_name,
   gr->query_data = dsi;
   gr->query_new_value = query_dsi_load;

-   /* Don't use free() as our callback as that messes up Gallium's
-    * memory debugger.  Use simple free_query_data() wrapper.
-    */
-   gr->free_query_data = free_query_data;
-
   hud_pane_add_graph(pane, gr);
   hud_pane_set_max_value(pane, 100);
 }
@@ -257,16 +246,21 @@ hud_get_num_disks(bool displayhelp)
   char name[64];

   /* Return the number of block devices and partitions. */
-   if (gdiskstat_count)
+   pipe_mutex_lock(gdiskstat_mutex);
+   if (gdiskstat_count) {
+      pipe_mutex_unlock(gdiskstat_mutex);
      return gdiskstat_count;
+   }

   /* Scan /sys/block, for every object type we support, create and
    * persist an object to represent its different statistics.
    */
   list_inithead(&gdiskstat_list);
   DIR *dir = opendir("/sys/block/");
-   if (!dir)
+   if (!dir) {
+      pipe_mutex_unlock(gdiskstat_mutex);
      return 0;
+   }

   while ((dp = readdir(dir)) != NULL) {

@@ -290,8 +284,11 @@ hud_get_num_disks(bool displayhelp)
      /* Add any partitions */
      struct dirent *dpart;
      DIR *pdir = opendir(basename);
-      if (!pdir)
+      if (!pdir) {
+         pipe_mutex_unlock(gdiskstat_mutex);
+         closedir(dir);
         return 0;
+      }

      while ((dpart = readdir(pdir)) != NULL) {
         /* Avoid 'lo' and '..' and '.' */
@@ -311,6 +308,7 @@ hud_get_num_disks(bool displayhelp)
         add_object_part(basename, dpart->d_name, DISKSTAT_WR);
      }
   }
+   closedir(dir);

   if (displayhelp) {
      list_for_each_entry(struct diskstat_info, dsi, &gdiskstat_list, list) {
@@ -322,6 +320,7 @@ hud_get_num_disks(bool displayhelp)
         puts(line);
      }
   }
+   pipe_mutex_unlock(gdiskstat_mutex);

   return gdiskstat_count;
 }
--- a/src/gallium/auxiliary/hud/hud_nic.c
+++ b/src/gallium/auxiliary/hud/hud_nic.c
@@ -35,6 +35,7 @@
 #include "hud/hud_private.h"
 #include "util/list.h"
 #include "os/os_time.h"
+#include "os/os_thread.h"
 #include "util/u_memory.h"
 #include <stdio.h>
 #include <unistd.h>
@@ -66,6 +67,7 @@ struct nic_info
 */
 static int gnic_count = 0;
 static struct list_head gnic_list;
+pipe_static_mutex(gnic_mutex);

 static struct nic_info *
 find_nic_by_name(const char *n, int mode)
@@ -234,14 +236,6 @@ query_nic_load(struct hud_graph *gr)
   }
 }

-static void
-free_query_data(void *p)
-{
-   struct nic_info *nic = (struct nic_info *) p;
-   list_del(&nic->list);
-   FREE(nic);
-}
-
 /**
  * Create and initialize a new object for a specific network interface dev.
  * \param  pane  parent context.
@@ -284,11 +278,6 @@ hud_nic_graph_install(struct hud_pane *pane, const char *nic_name,
   gr->query_data = nic;
   gr->query_new_value = query_nic_load;

-   /* Don't use free() as our callback as that messes up Gallium's
-    * memory debugger.  Use simple free_query_data() wrapper.
-    */
-   gr->free_query_data = free_query_data;
-
   hud_pane_add_graph(pane, gr);
   hud_pane_set_max_value(pane, 100);
 }
@@ -342,16 +331,21 @@ hud_get_num_nics(bool displayhelp)
   char name[64];

   /* Return the number if network interfaces. */
-   if (gnic_count)
+   pipe_mutex_lock(gnic_mutex);
+   if (gnic_count) {
+      pipe_mutex_unlock(gnic_mutex);
      return gnic_count;
+   }

   /* Scan /sys/block, for every object type we support, create and
    * persist an object to represent its different statistics.
    */
   list_inithead(&gnic_list);
   DIR *dir = opendir("/sys/class/net/");
-   if (!dir)
+   if (!dir) {
+      pipe_mutex_unlock(gnic_mutex);
      return 0;
+   }

   while ((dp = readdir(dir)) != NULL) {

@@ -412,6 +406,7 @@ hud_get_num_nics(bool displayhelp)
      }

   }
+   closedir(dir);

   list_for_each_entry(struct nic_info, nic, &gnic_list, list) {
      char line[64];
@@ -424,6 +419,7 @@ hud_get_num_nics(bool displayhelp)

   }

+   pipe_mutex_unlock(gnic_mutex);
   return gnic_count;
 }

--- a/src/gallium/auxiliary/hud/hud_sensors_temp.c
+++ b/src/gallium/auxiliary/hud/hud_sensors_temp.c
@@ -32,6 +32,7 @@
 #include "hud/hud_private.h"
 #include "util/list.h"
 #include "os/os_time.h"
+#include "os/os_thread.h"
 #include "util/u_memory.h"
 #include <stdio.h>
 #include <unistd.h>
@@ -49,6 +50,7 @@
 */
 static int gsensors_temp_count = 0;
 static struct list_head gsensors_temp_list;
+pipe_static_mutex(gsensor_temp_mutex);

 struct sensors_temp_info
 {
@@ -189,17 +191,6 @@ query_sti_load(struct hud_graph *gr)
   }
 }

-static void
-free_query_data(void *p)
-{
-   struct sensors_temp_info *sti = (struct sensors_temp_info *) p;
-   list_del(&sti->list);
-   if (sti->chip)
-      sensors_free_chip_name(sti->chip);
-   FREE(sti);
-   sensors_cleanup();
-}
-
 /**
  * Create and initialize a new object for a specific sensor interface dev.
  * \param  pane  parent context.
@@ -237,11 +228,6 @@ hud_sensors_temp_graph_install(struct hud_pane *pane, const char *dev_name,
   gr->query_data = sti;
   gr->query_new_value = query_sti_load;

-   /* Don't use free() as our callback as that messes up Gallium's
-    * memory debugger.  Use simple free_query_data() wrapper.
-    */
-   gr->free_query_data = free_query_data;
-
   hud_pane_add_graph(pane, gr);
   switch (sti->mode) {
   case SENSORS_TEMP_CURRENT:
@@ -338,12 +324,17 @@ int
 hud_get_num_sensors(bool displayhelp)
 {
   /* Return the number of sensors detected. */
-   if (gsensors_temp_count)
+   pipe_mutex_lock(gsensor_temp_mutex);
+   if (gsensors_temp_count) {
+      pipe_mutex_unlock(gsensor_temp_mutex);
      return gsensors_temp_count;
+   }

   int ret = sensors_init(NULL);
-   if (ret)
+   if (ret) {
+      pipe_mutex_unlock(gsensor_temp_mutex);
      return 0;
+   }

   list_inithead(&gsensors_temp_list);

@@ -377,6 +368,7 @@ hud_get_num_sensors(bool displayhelp)
      }
   }

+   pipe_mutex_unlock(gsensor_temp_mutex);
   return gsensors_temp_count;
 }

--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -672,17 +672,19 @@ iter_instruction(
      }
   }

-   switch (inst->Instruction.Opcode) {
-   case TGSI_OPCODE_IF:
-   case TGSI_OPCODE_UIF:
-   case TGSI_OPCODE_ELSE:
-   case TGSI_OPCODE_BGNLOOP:
-   case TGSI_OPCODE_ENDLOOP:
-   case TGSI_OPCODE_CAL:
-   case TGSI_OPCODE_BGNSUB:
-      TXT( " :" );
-      UID( inst->Label.Label );
-      break;
+   if (inst->Instruction.Label) {
+      switch (inst->Instruction.Opcode) {
+      case TGSI_OPCODE_IF:
+      case TGSI_OPCODE_UIF:
+      case TGSI_OPCODE_ELSE:
+      case TGSI_OPCODE_BGNLOOP:
+      case TGSI_OPCODE_ENDLOOP:
+      case TGSI_OPCODE_CAL:
+      case TGSI_OPCODE_BGNSUB:
+         TXT( " :" );
+         UID( inst->Label.Label );
+         break;
+      }
   }

   /* update indentation */
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -485,6 +485,7 @@ tgsi_opcode_infer_src_type( uint opcode )
   case TGSI_OPCODE_UMUL_HI:
   case TGSI_OPCODE_UP2H:
   case TGSI_OPCODE_U2I64:
+   case TGSI_OPCODE_MEMBAR:
      return TGSI_TYPE_UNSIGNED;
   case TGSI_OPCODE_IMUL_HI:
   case TGSI_OPCODE_I2F:
--- a/Show More
+++ b/Show More