docs: add release notes for 13.0.4

Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
Update version to 13.0.4
2017-02-01 10:10:38 +00:00 · 2017-02-01 10:04:14 +00:00 · 2017-01-25 13:43:44 +00:00 · 2017-01-25 13:43:22 +00:00 · 2017-01-24 01:13:33 +00:00 · 2017-01-24 01:13:33 +00:00
243 changed files with 5375 additions and 2267 deletions
--- a/Android.common.mk
+++ b/Android.common.mk
@@ -82,11 +82,13 @@ LOCAL_CFLAGS += \
 	-D__STDC_LIMIT_MACROS
 endif

+ifneq ($(LOCAL_IS_HOST_MODULE),true)
 # add libdrm if there are hardware drivers
 ifneq ($(filter-out swrast,$(MESA_GPU_DRIVERS)),)
 LOCAL_CFLAGS += -DHAVE_LIBDRM
 LOCAL_SHARED_LIBRARIES += libdrm
 endif
+endif

 LOCAL_CPPFLAGS += \
 	$(if $(filter true,$(MESA_LOLLIPOP_BUILD)),-D_USING_LIBCXX) \
--- a/Makefile.am
+++ b/Makefile.am
@@ -40,7 +40,7 @@ AM_DISTCHECK_CONFIGURE_FLAGS = \
 	--enable-vdpau \
 	--enable-xa \
 	--enable-xvmc \
-	--disable-llvm-shared-libs \
+	--enable-llvm-shared-libs \
 	--with-egl-platforms=x11,wayland,drm,surfaceless \
 	--with-dri-drivers=i915,i965,nouveau,radeon,r200,swrast \
 	--with-gallium-drivers=i915,ilo,nouveau,r300,r600,radeonsi,freedreno,svga,swrast,vc4,virgl,swr \
@@ -62,6 +62,7 @@ noinst_HEADERS = \
 	include/c99_math.h \
 	include/c11 \
 	include/D3D9 \
+	include/GL/wglext.h \
 	include/HaikuGL \
 	include/no_extern_c.h \
 	include/pci_ids
--- a/2
+++ b/2
@@ -1 +1 @@
-12.1.0-devel
+13.0.4
--- a/bin/.cherry-ignore
+++ b/bin/.cherry-ignore
@@ -0,0 +1,21 @@
+# Commit was picked with -x
+907ace57986733add2aebfa9dd7c83c67efed70e mapi: automake: set VISIBILITY_CFLAGS for shared glapi
+
+# Commit was reverted shortly after it landed in master
+a39ad185932eab4f25a0cb2b112c10d8700ef242 configure.ac: honour LLVM_LIBDIR when linking against LLVM
+
+# Commit fixes an earlier patch which is quite invasive to be considered for stable.
+157971e450c34ec430c295ff922c2e597294aba3 i965/blit: Fix the src dimension sanity check in miptree_copy
+
+# Similar to the above - depends on the series which introduce intel_miptree_copy
+b18cd8ce2c07c2d1a666fbff1f0d92d17dd5b22c i965/miptree: Use intel_miptree_copy for maps
+
+# The commit is a backport of an identical anv one. The latter is not in stable
+# and so does this one since they depend on functionality which is not in stable.
+65cbb993d33976d9ee24eff01ade8ed9013617ca radv: Call nir_lower_constant_initializers.
+
+# Commit causes regression on i915, and Nicolai requested that we drop it all together.
+963311b71fd9900351a4a9dd1cd5f5db391f7e1b mesa/main: fix version/extension checks in _mesa_ClampColor
+
+# Misnominated (only previous commit was meant to be for stable)
+36b9976e1f99e8070c67cb8a255793939db77d02 egl/wayland: Avoid race conditions when on non-main thread
--- a/bin/get-pick-list.sh
+++ b/bin/get-pick-list.sh
@@ -14,7 +14,7 @@ git log --reverse --grep="cherry picked from commit" origin/master..HEAD |\
 	sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked

 # Grep for commits that were marked as a candidate for the stable tree.
-git log --reverse --pretty=%H -i --grep='^\([[:space:]]*NOTE: .*[Cc]andidate\|CC:.*mesa-stable\)' HEAD..origin/master |\
+git log --reverse --pretty=%H -i --grep='^\([[:space:]]*NOTE: .*[Cc]andidate\|CC:.*13\.0.*mesa-stable\)' HEAD..origin/master |\
 while read sha
 do
 	# Check to see whether the patch is on the ignore list.
--- a/bin/get-typod-pick-list.sh
+++ b/bin/get-typod-pick-list.sh
@@ -0,0 +1,39 @@
+#!/bin/sh
+
+# Script for generating a list of candidates which have typos in the nomination line
+#
+# Usage examples:
+#
+# $ bin/get-typod-pick-list.sh
+# $ bin/get-typod-pick-list.sh > picklist
+# $ bin/get-typod-pick-list.sh | tee picklist
+
+# NB:
+# This script intentionally _never_ checks for specific version tag
+# Should we consider folding it with the original get-pick-list.sh
+
+# Grep for commits with "cherry picked from commit" in the commit message.
+git log --reverse --grep="cherry picked from commit" origin/master..HEAD |\
+	grep "cherry picked from commit" |\
+	sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked
+
+# Grep for commits that were marked as a candidate for the stable tree.
+git log --reverse --pretty=%H -i --grep='^CC:.*mesa-dev' HEAD..origin/master |\
+while read sha
+do
+	# Check to see whether the patch is on the ignore list.
+	if [ -f bin/.cherry-ignore ] ; then
+		if grep -q ^$sha bin/.cherry-ignore ; then
+			continue
+		fi
+	fi
+
+	# Check to see if it has already been picked over.
+	if grep -q ^$sha already_picked ; then
+		continue
+	fi
+
+	git log -n1 --pretty=oneline $sha | cat
+done
+
+rm -f already_picked
--- a/configure.ac
+++ b/configure.ac
@@ -1377,6 +1377,9 @@ AC_ARG_ENABLE([driglx-direct],
 dnl
 dnl libGL configuration per driver
 dnl
+if test "x$enable_glx" != xno; then
+    PKG_CHECK_MODULES([GLPROTO], [glproto >= $GLPROTO_REQUIRED])
+fi
 case "x$enable_glx" in
 xxlib | xgallium-xlib)
    # Xlib-based GLX
@@ -1390,7 +1393,6 @@ xxlib | xgallium-xlib)
    ;;
 xdri)
    # DRI-based GLX
-    PKG_CHECK_MODULES([GLPROTO], [glproto >= $GLPROTO_REQUIRED])

    # find the DRI deps for libGL
    dri_modules="x11 xext xdamage xfixes x11-xcb xcb xcb-glx >= $XCBGLX_REQUIRED"
@@ -1667,13 +1669,6 @@ AC_ARG_WITH([vulkan-icddir],
    [VULKAN_ICD_INSTALL_DIR='${datarootdir}/vulkan/icd.d'])
 AC_SUBST([VULKAN_ICD_INSTALL_DIR])

-AC_ARG_ENABLE([vulkan-icd-full-driver-path],
-   [AS_HELP_STRING([--disable-vulkan-icd-full-driver-path],
-                   [create Vulkan ICD files with just a .so name and no path])],
-   [vulkan_icd_driver_path="$enableval"],
-   [vulkan_icd_driver_path="yes"])
-AM_CONDITIONAL(VULKAN_ICD_DRIVER_PATH, test "x$vulkan_icd_driver_path" = xyes)
-
 if test -n "$with_vulkan_drivers"; then
    VULKAN_DRIVERS=`IFS=', '; echo $with_vulkan_drivers`
    for driver in $VULKAN_DRIVERS; do
--- a/docs/relnotes/13.0.0.html
+++ b/docs/relnotes/13.0.0.html
@@ -14,7 +14,7 @@
 <iframe src="../contents.html"></iframe>
 <div class="content">

-<h1>Mesa 13.0.0 Release Notes / TBD</h1>
+<h1>Mesa 13.0.0 Release Notes / November 1, 2016</h1>

 <p>
 Mesa 13.0.0 is a new development release.
@@ -33,7 +33,8 @@ because compatibility contexts are not supported.

 <h2>SHA256 checksums</h2>
 <pre>
-TBD.
+4a54d7cdc1a94a8dae05a75ccff48356406d51b0d6a64cbdc641c266e3e008eb  mesa-13.0.0.tar.gz
+94edb4ebff82066a68be79d9c2627f15995e1fe10f67ab3fc63deb842027d727  mesa-13.0.0.tar.xz
 </pre>


@@ -74,11 +75,236 @@ Note: some of the new features are only available with certain drivers.

 <h2>Bug fixes</h2>

-TBD.
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=61907">Bug 61907</a> - Indirect rendering of multi-texture vertex arrays broken</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=69622">Bug 69622</a> - eglTerminate then eglMakeCurrent crahes</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=71759">Bug 71759</a> - Intel driver fails with &quot;intel_do_flush_locked failed: No such file or directory&quot; if buffer imported with EGL_NATIVE_PIXMAP_KHR</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=83036">Bug 83036</a> - [ILK]Piglit spec_ARB_copy_image_arb_copy_image-formats fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89599">Bug 89599</a> - symbol 'x86_64_entry_start' is already defined when building with LLVM/clang</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90513">Bug 90513</a> - Odd gray and red flicker in The Talos Principle on GK104</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91342">Bug 91342</a> - Very dark textures on some objects in indoors environments in Postal 2</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92306">Bug 92306</a> - GL Excess demo renders incorrectly on nv43</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94148">Bug 94148</a> - Framebuffer considered invalid when a draw call is done before glCheckFramebufferStatus</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94354">Bug 94354</a> - R9285 Unigine Valley perf regression since radeonsi: use re-Z</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94561">Bug 94561</a> - [llvmpipe] PIPE_CAP_VIDEO_MEMORY reports negative value on 32 bits (with 16GB ram)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94627">Bug 94627</a> - Game Risen on wine black grass</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94681">Bug 94681</a> - dEQP-GLES31.functional.ssbo.layout.random.all_shared_buffer.23 takes 25 minutes to compile</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95000">Bug 95000</a> - deqp: assert in dEQP-GLES3.functional.vertex_arrays.single_attribute.strides.fixed.user_ptr_stride17_components2_quads1</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95130">Bug 95130</a> - Derivatives of gl_Color wrong when helper pixels used</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95246">Bug 95246</a> - Segfault in glBindFramebuffer()</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95419">Bug 95419</a> - [HSW][regression][bisect] RPG Maker game gives &quot;invalid floating point operation&quot; at startup</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95462">Bug 95462</a> - [BXT,BSW] arb_gpu_shader_fp64 causes gpu hang</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95529">Bug 95529</a> - [regression, bisected] Image corruption in Chrome</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96235">Bug 96235</a> - st_nir.h:34: error: redefinition of typedef ‘nir_shader’</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96274">Bug 96274</a> - [NVC0] Failure when compiling compute shader: Assertion `bb-&gt;getFirst()-&gt;serial &lt;= bb-&gt;getExit()-&gt;serial' failed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96285">Bug 96285</a> - Mesa build broken</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96299">Bug 96299</a> - [vulkan] 64 regressions due to mesa d5f2f32</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96343">Bug 96343</a> - oom since st/mesa: implement PBO downloads for ReadPixels</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96346">Bug 96346</a> - [SNB,CTS] es2-cts.gtf.gl.atan regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96349">Bug 96349</a> - [CTS,SKL,BSW,BDW,KBL,BXT] es31-cts.arrays_of_arrays.interactionuniformbuffers3</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96351">Bug 96351</a> - [CTS,SKL,KBL,BXT] es2-cts.gtf.gl2extensiontests.egl_image.egl_image</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96358">Bug 96358</a> - SSO: wrong interface validation between GS and VS (regresion due to latest gles 3.1)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96425">Bug 96425</a> - [bisected] occasional dark render in The Talos Principle</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96484">Bug 96484</a> - [vulkan] deqp-vk.glsl.builtin.precision.sin / cos regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96504">Bug 96504</a> - [vulkancts] compute tests crash</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96516">Bug 96516</a> - [bisected: 482526] &quot;clover: Update OpenCL version string to match OpenGL&quot;: clover's build fails because of missing git_sha1.h</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96528">Bug 96528</a> - Location qualifier segfaults during shader compilation</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96541">Bug 96541</a> - Tonga Unreal elemental bad rendering since radeonsi: Decompress DCC textures in a render feedback loop</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96565">Bug 96565</a> - Clive Barker's Jericho displays strange,vivid colors when motion blur enabled</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96607">Bug 96607</a> - [bisected] texture misrender / flicker in The Talos Principle on SKL</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96617">Bug 96617</a> - gl_SecondaryFragDataEXT doesn't work for extended blend func</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96629">Bug 96629</a> - dEQP-GLES2.functional.texture.completeness.cube.not_positive_level_0: Assertion `width &gt;= 1' failed.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96639">Bug 96639</a> - st/mesa: transfer_map with too-high level with dEQP-GLES2.functional.texture.completeness.cube.extra_level</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96674">Bug 96674</a> - [SNB, ILK] spec.ext_image_dma_buf_import.ext_image_dma_buf_import-sample_nv1</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96729">Bug 96729</a> - Wrong shader compilation error message</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96762">Bug 96762</a> - [radeonsi,apitrace] Firewatch: nothing rendered in scrollable (text) areas</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96765">Bug 96765</a> - BindFragDataLocationIndexed on array fragment shader output.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96770">Bug 96770</a> - include/GL/mesa_glinterop.h:62: error: redefinition of typedef ‘GLXContext’</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96782">Bug 96782</a> - [regression bisected] R600 fp64 and glsl-4.00 piglit failures</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96791">Bug 96791</a> - Cannot use image from swapchains for sampling</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96825">Bug 96825</a> - anv_device.c:31:27: fatal error: anv_timestamp.h: No such file or directory</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96835">Bug 96835</a> - &quot;gallium: Force blend color to 16-byte alignment&quot; crash with &quot;-march=native -O3&quot; causes some 32bit games to crash</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96850">Bug 96850</a> - Crucible tests fail for 32bit mesa</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96878">Bug 96878</a> - [Bisected: cc2d0e6][HSW] &quot;GPU HANG&quot; msg after autologin to gnome-session</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96908">Bug 96908</a> - [radeonsi] MSAA causes graphical artifacts</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96911">Bug 96911</a> - webgl2 conformance2/textures/misc/tex-mipmap-levels.html crashes 12.1 Intel driver</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96949">Bug 96949</a> - [regression] Piglit numSamples assertion failures with 9a23a177b90</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96950">Bug 96950</a> - Another regression from bc4e0c486: vbo: Use a bitmask to track the active arrays in vbo_exec*.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96971">Bug 96971</a> - invariant qualifier is not valid for shader inputs</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97019">Bug 97019</a> - [clover] build failure in llvm/codegen/native.cpp:129:52</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97032">Bug 97032</a> - [BDW,SKL] piglit.spec.arb_gpu_shader5.arb_gpu_shader5-interpolateatcentroid-flat</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97033">Bug 97033</a> - [BDW,SKL] piglit.spec.arb_gpu_shader_fp64.varying-packing.simple regressions</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97039">Bug 97039</a> - The Talos Principle and Serious Sam 3 GPU faults</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97083">Bug 97083</a> - [IVB,BYT] GPU hang on deqp-gles31.functional.separate.shader.random</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97140">Bug 97140</a> - dd_draw.c:949:11: error: implicit declaration of function 'fmemopen' is invalid in C99 [-Werror,-Wimplicit-function-declaration]</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97207">Bug 97207</a> - [IVY BRIDGE] Fragment shader discard writing to depth</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97214">Bug 97214</a> - X not running with error &quot;Failed to make EGL context current&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97225">Bug 97225</a> - [i965 on HD4600 Haswell] xcom switch to ingame cinematics cause segmentation fault</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97231">Bug 97231</a> - GL_DEPTH_CLAMP doesn't clamp to the far plane</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97233">Bug 97233</a> - vkQuake VkSpecializationMapEntry related bug</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97260">Bug 97260</a> - R9 290 low performance in Linux 4.7</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97267">Bug 97267</a> - [BDW] GL45-CTS.texture_cube_map_array.sampling asserts inside brw_fs.cpp</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97278">Bug 97278</a> - [vulkancts,HSW] all vulkancts tests assert on HSW</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97285">Bug 97285</a> - Darkness in Dota 2 after Patch &quot;Make Gallium's BlitFramebuffer follow the GL 4.4 sRGB rules&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97286">Bug 97286</a> - `make check` fails uniform-initializer-test</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97305">Bug 97305</a> - Gallium: TBOs and images set the offset in elements, not bytes</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97307">Bug 97307</a> - glsl/glcpp/tests/glcpp-test regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97309">Bug 97309</a> - piglit.spec.glsl-1_30.compiler.switch-statement.switch-case-duplicated.vert regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97322">Bug 97322</a> - GenerateMipmap creates wrong mipmap for sRGB texture</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97331">Bug 97331</a> - glDrawElementsBaseVertex doesn't work in display list on i915</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97351">Bug 97351</a> - DrawElementsBaseVertex with VBO ignores base vertex on Intel GMA 9xx in some cases</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97413">Bug 97413</a> - BioShock Infinite crashes on startup with Mesa Git version, R7 370</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97426">Bug 97426</a> - glScissor gives vertically inverted result</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97448">Bug 97448</a> - [HSW] deqp-vk.api_.copy_and_blit.image_to_image_stencil regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97476">Bug 97476</a> - Shader binaries should not be stored in the PipelineCache</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97477">Bug 97477</a> - i915g: gl_FragCoord is always (0.0, max_y)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97513">Bug 97513</a> - clover reports wrong device pointer size</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97549">Bug 97549</a> - [SNB, BXT] up to 40% perf drop from &quot;loader/dri3: Overhaul dri3_update_num_back&quot; commit</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97587">Bug 97587</a> - make check nir/tests/control_flow_tests regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97761">Bug 97761</a> - es2-cts.gtf.gl2extensiontests.egl_image_external.testsimpleunassociated crashes</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97773">Bug 97773</a> - New Mesa master now results in warnings in glrender (and subsurfaces and simple-egl), black screen</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97779">Bug 97779</a> - [regression, bisected][BDW, GPU hang] stuck on render ring, always reproducible</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97790">Bug 97790</a> - Vulkan cts regressions due to 24be63066</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97804">Bug 97804</a> - Later precision statement isn't overriding earlier one</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97808">Bug 97808</a> - &quot;tgsi/scan: don't set interp flags for inputs only used by INTERP instructions&quot; causes glitches in wine with gallium nine</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97887">Bug 97887</a> - llvm segfault in janusvr -render vive</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97894">Bug 97894</a> - Crash in u_transfer_unmap_vtbl when unmapping a buffer mapped in different context</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97952">Bug 97952</a> - /usr/include/string.h:518:12: error: exception specification in declaration does not match previous declaration</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97969">Bug 97969</a> - [radeonsi, bisected: fb827c0] Video decoding shows green artifacts</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97976">Bug 97976</a> - VCE regression BO to small for addr since winsys/amdgpu: enable buffer allocation from slabs</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98005">Bug 98005</a> - VCE dual instance encoding inconsistent since st/va: enable dual instances encode by sync surface</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98025">Bug 98025</a> - [radeonsi] incorrect primitive restart index used</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98128">Bug 98128</a> - nir/tests/control_flow_tests.cpp:79:73: error: ‘nir_loop_first_cf_node’ was not declared in this scope</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98131">Bug 98131</a> - Compiler should reject lowp/mediump qualifiers on atomic_uints</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98133">Bug 98133</a> - GetSynciv should raise an error if bufSize &lt; 0</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98134">Bug 98134</a> - dEQP-GLES31.functional.debug.negative_coverage.get_error.buffer.draw_buffers wants a different GL error code</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98135">Bug 98135</a> - dEQP-GLES31.functional.debug.negative_coverage.get_error.shader.transform_feedback_varyings wants a different GL error code</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98167">Bug 98167</a> - [vulkan, radv] missing libgcrypt and openssl devel results in linker error in libvulkan_common</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98172">Bug 98172</a> - Concurrent call to glClientWaitSync results in segfault in one of the waiters.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98244">Bug 98244</a> - dEQP: textureOffset(sampler2DArrayShadow, ...) should not exist.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98264">Bug 98264</a> - Build broken for i965 due to multiple deifnitions of intelFenceExtension</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98307">Bug 98307</a> - &quot;st/glsl_to_tgsi: explicitly track all input and output declaration&quot; broke flightgear colors on rs780</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98326">Bug 98326</a> - [dEQP, EGL] pbuffer depth/stencil tests fail</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98415">Bug 98415</a> - Vulkan Driver JSON file contains incorrect field</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98431">Bug 98431</a> - UnrealEngine v4 demos startup fails to blorp blit assert</li>
+
+</ul>
+

 <h2>Changes</h2>

-TBD.
+Mesa no longer depends on libudev.

 </div>
 </body>
--- a/docs/relnotes/13.0.1.html
+++ b/docs/relnotes/13.0.1.html
@@ -0,0 +1,188 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 13.0.1 Release Notes / November 14, 2016</h1>
+
+<p>
+Mesa 13.0.1 is a bug fix release which fixes bugs found since the 13.0.0 release.
+</p>
+<p>
+Mesa 13.0.1 implements the OpenGL 4.4 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.4.  OpenGL
+4.4 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+7cbb91dead05cde279ee95f86e8321c8e1c8fc9deb88f12e0f587672a10d88c5  mesa-13.0.1.tar.gz
+71962fb2bf77d33b0ad4a565b490dbbeaf4619099c6d9722f04a73187957a731  mesa-13.0.1.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97715">Bug 97715</a> - [ILK,G45,G965] piglit.spec.arb_separate_shader_objects.misc api error checks</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98012">Bug 98012</a> - [IVB] Segfault when running Dolphin twice with Vulkan</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98512">Bug 98512</a> - radeon r600 vdpau: Invalid command stream: texture bo too small</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Adam Jackson (2):</p>
+<ul>
+  <li>glx/glvnd: Don't modify the dummy slot in the dispatch table</li>
+  <li>glx/glvnd: Fix dispatch function names and indices</li>
+</ul>
+
+<p>Andreas Boll (1):</p>
+<ul>
+  <li>glx/windows: Add wgl.h to the sources list</li>
+</ul>
+
+<p>Anuj Phogat (1):</p>
+<ul>
+  <li>i965: Fix GPU hang related to multiple render targets and alpha testing</li>
+</ul>
+
+<p>Chih-Wei Huang (1):</p>
+<ul>
+  <li>android: avoid using libdrm with host modules</li>
+</ul>
+
+<p>Darren Salt (1):</p>
+<ul>
+  <li>radv/pipeline: Don't dereference NULL dynamic state pointers</li>
+</ul>
+
+<p>Dave Airlie (8):</p>
+<ul>
+  <li>radv: expose xlib platform extension</li>
+  <li>radv: fix dual source blending</li>
+  <li>Revert "st/vdpau: use linear layout for output surfaces"</li>
+  <li>radv: emit correct last export when Z/stencil export is enabled</li>
+  <li>ac/nir: add support for discard_if intrinsic (v2)</li>
+  <li>nir: add conditional discard optimisation (v4)</li>
+  <li>radv: enable conditional discard optimisation on radv.</li>
+  <li>radv: fix GetFenceStatus for signaled fences</li>
+</ul>
+
+<p>Emil Velikov (6):</p>
+<ul>
+  <li>docs: add sha256 checksums for 13.0.0</li>
+  <li>amd/addrlib: limit fastcall/regparm to GCC i386</li>
+  <li>anv: use correct .specVersion for extensions</li>
+  <li>radv: use correct .specVersion for extensions</li>
+  <li>radv: Suffix the radeon_icd file with the host CPU</li>
+  <li>Update version to 13.0.1</li>
+</ul>
+
+<p>Eric Anholt (1):</p>
+<ul>
+  <li>vc4: Use Newton-Raphson on the 1/W write to fix glmark2 terrain.</li>
+</ul>
+
+<p>Francisco Jerez (1):</p>
+<ul>
+  <li>nir: Flip gl_SamplePosition in nir_lower_wpos_ytransform().</li>
+</ul>
+
+<p>Fredrik Höglund (1):</p>
+<ul>
+  <li>radv: add support for anisotropic filtering on VI+</li>
+</ul>
+
+<p>Jason Ekstrand (21):</p>
+<ul>
+  <li>anv/device: Return DEVICE_LOST if execbuf2 fails</li>
+  <li>vulkan/wsi/x11: Better handle wsi_x11_connection_create failure</li>
+  <li>vulkan/wsi/x11: Clean up connections in finish_wsi</li>
+  <li>anv: Better handle return codes from anv_physical_device_init</li>
+  <li>intel/blorp: Use wm_prog_data instead of hand-rolling our own</li>
+  <li>intel/blorp: Pass a brw_stage_prog_data to upload_shader</li>
+  <li>anv/pipeline: Put actual pointers in anv_shader_bin</li>
+  <li>anv/pipeline: Properly cache prog_data::param</li>
+  <li>intel/blorp: Emit all the binding tables</li>
+  <li>anv/device: Add an execbuf wrapper</li>
+  <li>anv: Add a cmd_buffer_execbuf helper</li>
+  <li>anv: Don't presume to know what address is in a surface relocation</li>
+  <li>anv: Add a new bo_pool_init helper</li>
+  <li>anv/allocator: Simplify anv_scratch_pool</li>
+  <li>anv: Initialize anv_bo::offset to -1</li>
+  <li>anv/batch_chain: Improve write_reloc</li>
+  <li>anv: Add an anv_execbuf helper struct</li>
+  <li>anv/batch: Move last_ss_pool_bo_offset to the command buffer</li>
+  <li>anv: Move relocation handling from EndCommandBuffer to QueueSubmit</li>
+  <li>anv/cmd_buffer: Take a command buffer instead of a batch in two helpers</li>
+  <li>anv/cmd_buffer: Enable a CS stall workaround for Sky Lake gt4</li>
+</ul>
+
+<p>Kenneth Graunke (2):</p>
+<ul>
+  <li>glsl: Update deref types when resizing implicitly sized arrays.</li>
+  <li>mesa: Fix pixel shader scratch space allocation on Gen9+ platforms.</li>
+</ul>
+
+<p>Kristian Høgsberg (1):</p>
+<ul>
+  <li>anv: Do relocations in userspace before execbuf ioctl</li>
+</ul>
+
+<p>Marek Olšák (4):</p>
+<ul>
+  <li>egl: use util/macros.h</li>
+  <li>egl: make interop ABI visible again</li>
+  <li>glx: make interop ABI visible again</li>
+  <li>radeonsi: fix an assertion failure in si_decompress_sampler_color_textures</li>
+</ul>
+
+<p>Nicolai Hähnle (4):</p>
+<ul>
+  <li>radeonsi: fix BFE/BFI lowering for GLSL semantics</li>
+  <li>glsl: fix lowering of UBO references of named blocks</li>
+  <li>st/glsl_to_tgsi: fix dvec[34] loads from SSBO</li>
+  <li>st/mesa: fix the layer of VDPAU surface samplers</li>
+</ul>
+
+<p>Steven Toth (3):</p>
+<ul>
+  <li>gallium/hud: fix a problem where objects are free'd while in use.</li>
+  <li>gallium/hud: close a previously opened handle</li>
+  <li>gallium/hud: protect against and initialization race</li>
+</ul>
+
+<p>Timothy Arceri (1):</p>
+<ul>
+  <li>mesa/glsl: delete previously linked shaders earlier when linking</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/13.0.2.html
+++ b/docs/relnotes/13.0.2.html
@@ -0,0 +1,189 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 13.0.2 Release Notes / November 28, 2016</h1>
+
+<p>
+Mesa 13.0.2 is a bug fix release which fixes bugs found since the 13.0.1 release.
+</p>
+<p>
+Mesa 13.0.2 implements the OpenGL 4.4 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.4.  OpenGL
+4.4 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+6014233a5db6032ab8de4881384871bbe029de684502707794ce7b3e6beec308  mesa-13.0.2.tar.gz
+a6ed622645f4ed61da418bf65adde5bcc4bb79023c36ba7d6b45b389da4416d5  mesa-13.0.2.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97321">Bug 97321</a> - Query INFO_LOG_LENGTH for empty info log should return 0</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97420">Bug 97420</a> - &quot;#version 0&quot; crashes glsl_compiler</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98632">Bug 98632</a> - Fix build on Hurd without PATH_MAX</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Ben Widawsky (3):</p>
+<ul>
+  <li>i965: Add some APL and KBL SKU strings</li>
+  <li>i965: Reorder PCI ID list to match release order</li>
+  <li>i965/glk: Add basic Geminilake support</li>
+</ul>
+
+<p>Dave Airlie (14):</p>
+<ul>
+  <li>radv: fix texturesamples to handle single sample case</li>
+  <li>wsi: fix VK_INCOMPLETE for vkGetSwapchainImagesKHR</li>
+  <li>radv: don't crash on null swapchain destroy.</li>
+  <li>ac/nir/llvm: fix channel in texture gather lowering code.</li>
+  <li>radv: make sure to flush input attachments correctly.</li>
+  <li>radv: fix image view creation for depth and stencil only</li>
+  <li>radv: spir-v allows texture size query with and without lod.</li>
+  <li>vulkan/wsi/x11: handle timeouts properly in next image acquire (v1.1)</li>
+  <li>vulkan/wsi: store present mode in swapchain base class</li>
+  <li>vulkan/wsi/x11: add support for IMMEDIATE present mode</li>
+  <li>radv: fix texel fetch offset with 2d arrays.</li>
+  <li>radv/si: fix optimal micro tile selection</li>
+  <li>radv/ac/llvm: shadow samplers only return one value.</li>
+  <li>radv: fix 3D clears with baseMiplevel</li>
+</ul>
+
+<p>Eduardo Lima Mitev (2):</p>
+<ul>
+  <li>vulkan/wsi/x11: Fix behavior of vkGetPhysicalDeviceSurfaceFormatsKHR</li>
+  <li>vulkan/wsi/x11: Fix behavior of vkGetPhysicalDeviceSurfacePresentModesKHR</li>
+</ul>
+
+<p>Emil Velikov (5):</p>
+<ul>
+  <li>docs: add sha256 checksums for 13.0.1</li>
+  <li>cherry-ignore: add reverted LLVM_LIBDIR patch</li>
+  <li>anv: fix enumeration of properties</li>
+  <li>radv: honour the number of properties available</li>
+  <li>Update version to 13.0.2</li>
+</ul>
+
+<p>Eric Anholt (3):</p>
+<ul>
+  <li>vc4: Don't abort when a shader compile fails.</li>
+  <li>vc4: Clamp the shadow comparison value.</li>
+  <li>vc4: Fix register class handling of DDX/DDY arguments.</li>
+</ul>
+
+<p>Gwan-gyeong Mun (2):</p>
+<ul>
+  <li>util/disk_cache: close a previously opened handle in disk_cache_put (v2)</li>
+  <li>anv: Fix unintentional integer overflow in anv_CreateDmaBufImageINTEL</li>
+</ul>
+
+<p>Iago Toral Quiroga (1):</p>
+<ul>
+  <li>anv/format: handle unsupported formats properly</li>
+</ul>
+
+<p>Ian Romanick (2):</p>
+<ul>
+  <li>glcpp: Handle '#version 0' and other invalid values</li>
+  <li>glsl: Parse 0 as a preprocessor INTCONSTANT</li>
+</ul>
+
+<p>Jason Ekstrand (15):</p>
+<ul>
+  <li>anv/gen8: Stall when needed in Cmd(Set|Reset)Event</li>
+  <li>anv/wsi: Set the fence to signaled in AcquireNextImageKHR</li>
+  <li>anv: Rework fences</li>
+  <li>vulkan/wsi/wayland: Include pthread.h</li>
+  <li>vulkan/wsi/wayland: Clean up some error handling paths</li>
+  <li>vulkan/wsi: Report the correct min/maxImageCount</li>
+  <li>i965/gs: Allow primitive id to be a system value</li>
+  <li>anv: Handle null in all destructors</li>
+  <li>anv/fence: Handle ANV_FENCE_CREATE_SIGNALED_BIT</li>
+  <li>nir/spirv: Fix handling of gl_PrimitiveId</li>
+  <li>anv/blorp: Ignore clears for attachments first used as resolve destinations</li>
+  <li>anv: Implement a depth stall restriction on gen7</li>
+  <li>anv/cmd_buffer: Handle running out of binding tables in compute shaders</li>
+  <li>anv/cmd_buffer: Emit a CS stall before setting a CS pipeline</li>
+  <li>vulkan/wsi/x11: Implement FIFO mode.</li>
+</ul>
+
+<p>Jordan Justen (2):</p>
+<ul>
+  <li>isl: Fix height calculation in isl_msaa_interleaved_scale_px_to_sa</li>
+  <li>i965/hsw: Set integer mode in sampling state for stencil texturing</li>
+</ul>
+
+<p>Kenneth Graunke (4):</p>
+<ul>
+  <li>intel: Set min_ds_entries on Broxton.</li>
+  <li>i965: Fix compute shader crash.</li>
+  <li>mesa: Drop PATH_MAX usage.</li>
+  <li>i965: Fix GS push inputs with enhanced layouts.</li>
+</ul>
+
+<p>Kevin Strasser (1):</p>
+<ul>
+  <li>vulkan/wsi: Add a thread-safe queue implementation</li>
+</ul>
+
+<p>Lionel Landwerlin (1):</p>
+<ul>
+  <li>anv: fix multi level clears with VK_REMAINING_MIP_LEVELS</li>
+</ul>
+
+<p>Lucas Stach (1):</p>
+<ul>
+  <li>gbm: request correct version of the DRI2_FENCE extension</li>
+</ul>
+
+<p>Nicolai Hähnle (2):</p>
+<ul>
+  <li>radeonsi: store group_size_variable in struct si_compute</li>
+  <li>glsl/lower_output_reads: fix geometry shader output handling with conditional emit</li>
+</ul>
+
+<p>Steinar H. Gunderson (1):</p>
+<ul>
+  <li>Fix races during _mesa_HashWalk().</li>
+</ul>
+
+<p>Tapani Pälli (1):</p>
+<ul>
+  <li>mesa: fix empty program log length</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/13.0.3.html
+++ b/docs/relnotes/13.0.3.html
@@ -0,0 +1,177 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 13.0.3 Release Notes / January 5, 2017</h1>
+
+<p>
+Mesa 13.0.3 is a bug fix release which fixes bugs found since the 13.0.2 release.
+</p>
+<p>
+Mesa 13.0.3 implements the OpenGL 4.4 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.4.  OpenGL
+4.4 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+55b07d056f9b855ba9d7c8b2ddc7d3b220a61c6ab1bdc73cbfc2f607721094c2  mesa-13.0.3.tar.gz
+d9aa8be5c176d00d0cd503cb2f64a5a403ea471ec819c022581414860d7ba40e  mesa-13.0.3.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=77662">Bug 77662</a> - Fail to render to different faces of depth-stencil cube map</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92234">Bug 92234</a> - [BDW] GPU hang in Shogun2</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98329">Bug 98329</a> - [dEQP, EGL, SKL, BDW, BSW] dEQP-EGL.functional.image.render_multiple_contexts.gles2_renderbuffer_depth16_depth_buffer</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99038">Bug 99038</a> - [dEQP, EGL, SKL, BDW, BSW] dEQP-EGL.functional.negative_api.create_pixmap_surface crashes</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Chad Versace (2):</p>
+<ul>
+  <li>i965/mt: Disable aux surfaces after making miptree shareable</li>
+  <li>egl: Fix crashes in eglCreate*Surface()</li>
+</ul>
+
+<p>Dave Airlie (4):</p>
+<ul>
+  <li>anv: set maxFragmentDualSrcAttachments to 1</li>
+  <li>radv: set maxFragmentDualSrcAttachments to 1</li>
+  <li>radv: fix another regression since shadow fixes.</li>
+  <li>radv: add missing license file to radv_meta_bufimage.</li>
+</ul>
+
+<p>Emil Velikov (5):</p>
+<ul>
+  <li>docs: add sha256 checksums for 13.0.2</li>
+  <li>anv: don't double-close the same fd</li>
+  <li>anv: don't leak memory if anv_init_wsi() fails</li>
+  <li>radv: don't leak the fd if radv_physical_device_init() succeeds</li>
+  <li>Update version to 13.0.3</li>
+</ul>
+
+<p>Eric Anholt (1):</p>
+<ul>
+  <li>vc4: In a loop break/continue, jump if everyone has taken the path.</li>
+</ul>
+
+<p>Gwan-gyeong Mun (3):</p>
+<ul>
+  <li>anv: Add missing error-checking to anv_block_pool_init (v2)</li>
+  <li>anv: Update the teardown in reverse order of the anv_CreateDevice</li>
+  <li>vulkan/wsi: Fix resource leak in success path of wsi_queue_init()</li>
+</ul>
+
+<p>Haixia Shi (1):</p>
+<ul>
+  <li>compiler/glsl: fix precision problem of tanh</li>
+</ul>
+
+<p>Ilia Mirkin (1):</p>
+<ul>
+  <li>mesa: only verify that enabled arrays have backing buffers</li>
+</ul>
+
+<p>Jason Ekstrand (8):</p>
+<ul>
+  <li>anv/cmd_buffer: Re-emit MEDIA_CURBE_LOAD when CS push constants are dirty</li>
+  <li>anv/image: Rename hiz_surface to aux_surface</li>
+  <li>anv/cmd_buffer: Remove the 1-D case from the HiZ QPitch calculation</li>
+  <li>genxml/gen9: Change the default of MI_SEMAPHORE_WAIT::RegisterPoleMode</li>
+  <li>anv/device: Return the right error for failed maps</li>
+  <li>anv/device: Implicitly unmap memory objects in FreeMemory</li>
+  <li>anv/descriptor_set: Write the state offset in the surface state free list.</li>
+  <li>spirv: Use a simpler and more correct implementaiton of tanh()</li>
+</ul>
+
+<p>Kenneth Graunke (1):</p>
+<ul>
+  <li>i965: Allocate at least some URB space even when max_vertices = 0.</li>
+</ul>
+
+<p>Marek Olšák (17):</p>
+<ul>
+  <li>radeonsi: always set all blend registers</li>
+  <li>radeonsi: set CB_BLEND1_CONTROL.ENABLE for dual source blending</li>
+  <li>radeonsi: disable RB+ blend optimizations for dual source blending</li>
+  <li>radeonsi: consolidate max-work-group-size computation</li>
+  <li>radeonsi: apply a multi-wave workgroup SPI bug workaround to affected CIK chips</li>
+  <li>radeonsi: apply a TC L1 write corruption workaround for SI</li>
+  <li>radeonsi: apply a tessellation bug workaround for SI</li>
+  <li>radeonsi: add a tess+GS hang workaround for VI dGPUs</li>
+  <li>radeonsi: apply the double EVENT_WRITE_EOP workaround to VI as well</li>
+  <li>cso: don't release sampler states that are bound</li>
+  <li>radeonsi: always restore sampler states when unbinding sampler views</li>
+  <li>radeonsi: fix incorrect FMASK checking in bind_sampler_states</li>
+  <li>radeonsi: allow specifying simm16 of emit_waitcnt at call sites</li>
+  <li>radeonsi: wait for outstanding memory instructions in TCS barriers</li>
+  <li>tgsi: fix the src type of TGSI_OPCODE_MEMBAR</li>
+  <li>radeonsi: wait for outstanding LDS instructions in memory barriers if needed</li>
+  <li>radeonsi: disable the constant engine (CE) on Carrizo and Stoney</li>
+</ul>
+
+<p>Matt Turner (3):</p>
+<ul>
+  <li>i965/fs: Rename opt_copy_propagate -&gt; opt_copy_propagation.</li>
+  <li>i965/fs: Add unit tests for copy propagation pass.</li>
+  <li>i965/fs: Reject copy propagation into SEL if not min/max.</li>
+</ul>
+
+<p>Nanley Chery (1):</p>
+<ul>
+  <li>mesa/fbobject: Update CubeMapFace when reusing textures</li>
+</ul>
+
+<p>Nicolai Hähnle (4):</p>
+<ul>
+  <li>radeonsi: fix isolines tess factor writes to control ring</li>
+  <li>radeonsi: update all GSVS ring descriptors for new buffer allocations</li>
+  <li>radeonsi: do not kill GS with memory writes</li>
+  <li>radeonsi: fix an off-by-one error in the bounds check for max_vertices</li>
+</ul>
+
+<p>Rhys Kidd (1):</p>
+<ul>
+  <li>glsl: Add pthread libs to cache_test</li>
+</ul>
+
+<p>Timothy Arceri (2):</p>
+<ul>
+  <li>mesa: fix active subroutine uniforms properly</li>
+  <li>Revert "nir: Turn imov/fmov of undef into undef."</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/13.0.4.html
+++ b/docs/relnotes/13.0.4.html
@@ -0,0 +1,254 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 13.0.4 Release Notes / February 1, 2017</h1>
+
+<p>
+Mesa 13.0.4 is a bug fix release which fixes bugs found since the 13.0.3 release.
+</p>
+<p>
+Mesa 13.0.4 implements the OpenGL 4.4 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.4.  OpenGL
+4.4 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+TBD
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92634">Bug 92634</a> - gallium's vl_mpeg12_decoder does not work with st/va</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94512">Bug 94512</a> - X segfaults with glx-tls enabled in a x32 environment</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94900">Bug 94900</a> - HD6950 GPU lockup loop with various steam games (octodad[always], saints row 4[always], dead island[always], grid autosport[sometimes])</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98263">Bug 98263</a> - [radv] The Talos Principle fails to launch with &quot;Fatal error: Cannot set display mode.&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98914">Bug 98914</a> - mesa-vdpau-drivers: breaks vdpau for mpeg2video</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98975">Bug 98975</a> - Wasteland 2 Directors Cut: Hangs. GPU fault</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99030">Bug 99030</a> - [HSW, regression] transform feedback fails on Linux 4.8</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99085">Bug 99085</a> - [EGL] dEQP-EGL.functional.sharing.gles2.multithread intermittent</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99097">Bug 99097</a> - [vulkancts] dEQP-VK.image.store regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99100">Bug 99100</a> - [SKL,BDW,BSW,KBL] dEQP-VK.glsl.return.return_in_dynamic_loop_dynamic_vertex regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99144">Bug 99144</a> - Incorrect rendering using glDrawArraysInstancedBaseInstance and first != 0 on Skylake</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99154">Bug 99154</a> - Link time error when using multiple builtin functions</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99158">Bug 99158</a> - vdpau segfaults and gpu locks with kodi on R9285</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99185">Bug 99185</a> - dEQP-EGL.functional.image.modify.tex_rgb5_a1_tex_subimage_rgba8</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99188">Bug 99188</a> - dEQP-EGL.functional.create_context_ext.robust_gl_30.rgb565_no_depth_no_stencil</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99210">Bug 99210</a> - ES3-CTS.functional.texture.mipmap.cube.generate.rgba5551_*</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99354">Bug 99354</a> - [G71] &quot;Assertion `bkref' failed&quot; reproducible with glmark2</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99450">Bug 99450</a> - [amdgpu] Payday 2 visual glitches on some models</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99451">Bug 99451</a> - polygon offset use after free</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Andres Rodriguez (2):</p>
+<ul>
+  <li>vulkan/wsi: clarify the severity of lack of DRI3 v2</li>
+  <li>radv: fix include order for installed headers v2</li>
+</ul>
+
+<p>Arda Coskunses (2):</p>
+<ul>
+  <li>vulkan/wsi/x11: don't crash on null visual</li>
+  <li>vulkan/wsi/x11: don't crash on null wsi x11 connection</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (1):</p>
+<ul>
+  <li>radv: Support loader interface version 3.</li>
+</ul>
+
+<p>Chad Versace (10):</p>
+<ul>
+  <li>egl: Check config's surface types in eglCreate*Surface()</li>
+  <li>dri: Add __DRI_IMAGE_FORMAT_ARGB1555</li>
+  <li>mesa/texformat: Handle GL_RGBA + GL_UNSIGNED_SHORT_5_5_5_1</li>
+  <li>egl: Emit correct error when robust context creation fails</li>
+  <li>anv: Handle vkGetPhysicalDeviceQueueFamilyProperties with count == 0</li>
+  <li>mesa/shaderobj: Fix races on refcounts</li>
+  <li>meta: Disable dithering during glGenerateMipmap</li>
+  <li>vulkan: Add new cast macros for VkIcd types</li>
+  <li>vulkan: Update vk_icd.h to interface version 3</li>
+  <li>anv: Support loader interface version 3 (patch v2)</li>
+</ul>
+
+<p>Christian König (1):</p>
+<ul>
+  <li>vl/zscan: fix "Fix trivial sign compare warnings"</li>
+</ul>
+
+<p>Chuck Atkins (1):</p>
+<ul>
+  <li>glx: Add missing glproto dependency for gallium-xlib glx</li>
+</ul>
+
+<p>Damien Grassart (1):</p>
+<ul>
+  <li>anv: return count of queue families written</li>
+</ul>
+
+<p>Dave Airlie (1):</p>
+<ul>
+  <li>radv: flush smem for uniform buffer bit.</li>
+</ul>
+
+<p>Emil Velikov (10):</p>
+<ul>
+  <li>docs: add sha256 checksums for 13.0.3</li>
+  <li>cherry-ignore: add couple of intel_miptree_copy related patches</li>
+  <li>cherry-ignore: add radv: Call nir_lower_constant_initializers."</li>
+  <li>get-typod-pick-list.sh: add new script</li>
+  <li>cherry-ignore: add "_mesa_ClampColor extension/version fix"</li>
+  <li>cherry-ignore: add wayland race condition fix</li>
+  <li>egl/wayland: use the destroy_window_callback for swrast</li>
+  <li>automake: use shared llvm libs for make distcheck</li>
+  <li>get-pick-list.sh: Require explicit "13.0" for nominating stable patches</li>
+  <li>Update version to 13.0.4</li>
+</ul>
+
+<p>Francisco Jerez (1):</p>
+<ul>
+  <li>anv: Fix uniform and storage buffer offset alignment limits.</li>
+</ul>
+
+<p>Fredrik Höglund (2):</p>
+<ul>
+  <li>radv: fix dual source blending</li>
+  <li>dri3: Fix MakeCurrent without a default framebuffer</li>
+</ul>
+
+<p>Grazvydas Ignotas (1):</p>
+<ul>
+  <li>mapi: update the asm code to support x32</li>
+</ul>
+
+<p>Heiko Przybyl (1):</p>
+<ul>
+  <li>r600/sb: Fix loop optimization related hangs on eg</li>
+</ul>
+
+<p>Ilia Mirkin (1):</p>
+<ul>
+  <li>nouveau: take extra push space into account for pushbuf_space calls</li>
+</ul>
+
+<p>Jason Ekstrand (4):</p>
+<ul>
+  <li>i965/generator/tex: Handle an immediate sampler with an indirect texture</li>
+  <li>anv/formats: Use the real format for B4G4R4A4_UNORM_PACK16 on gen8</li>
+  <li>nir/search: Only allow matching SSA values</li>
+  <li>isl: Mark A4B4G4R4_UNORM as supported on gen8</li>
+</ul>
+
+<p>Jonas Ådahl (1):</p>
+<ul>
+  <li>egl/wayland: Cleanup private display connection when init fails</li>
+</ul>
+
+<p>Kenneth Graunke (7):</p>
+<ul>
+  <li>i965: Don't bail on vertex element processing if we need draw params.</li>
+  <li>i965: Fix last slot calculations</li>
+  <li>i965: Fix texturing in the vec4 TCS and GS backends.</li>
+  <li>spirv: Move cursor before calling vtn_ssa_value() in phi 2nd pass.</li>
+  <li>i965: Make BLORP disable the NP Z PMA stall fix.</li>
+  <li>glsl: Use ir_var_temporary when generating inline functions.</li>
+  <li>i965: Properly flush in hsw_pause_transform_feedback().</li>
+</ul>
+
+<p>Marek Olšák (4):</p>
+<ul>
+  <li>vdpau: call texture_get_handle while the mutex is being held</li>
+  <li>va: call texture_get_handle while the mutex is being held</li>
+  <li>radeonsi: for the tess barrier, only use emit_waitcnt on SI and LLVM 3.9+</li>
+  <li>radeonsi: don't forget to add HTILE to the buffer list for texturing</li>
+</ul>
+
+<p>Michel Dänzer (1):</p>
+<ul>
+  <li>cso: Don't restore nr_samplers in cso_restore_fragment_samplers</li>
+</ul>
+
+<p>Nanley Chery (3):</p>
+<ul>
+  <li>anv/cmd_buffer: Fix arrayed depth/stencil attachments</li>
+  <li>anv/cmd_buffer: Fix programmed HiZ qpitch</li>
+  <li>anv/image: Disable HiZ for depth buffer arrays</li>
+</ul>
+
+<p>Nayan Deshmukh (1):</p>
+<ul>
+  <li>st/va: delay calling begin_frame until we have all parameters</li>
+</ul>
+
+<p>Rob Clark (1):</p>
+<ul>
+  <li>freedreno: some fence cleanup</li>
+</ul>
+
+<p>Samuel Pitoiset (1):</p>
+<ul>
+  <li>gallium/hud: add missing break in hud_cpufreq_graph_install()</li>
+</ul>
+
+<p>Timothy Arceri (3):</p>
+<ul>
+  <li>nir: Turn imov/fmov of undef into undef</li>
+  <li>glsl: fix opt_minmax redundancy checks against baserange</li>
+  <li>util: fix list_is_singular()</li>
+</ul>
+
+<p>Zachary Michaels (1):</p>
+<ul>
+  <li>radeonsi: Always leave poly_offset in a valid state</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/include/GL/internal/dri_interface.h
+++ b/include/GL/internal/dri_interface.h
@@ -1121,6 +1121,7 @@ struct __DRIdri2ExtensionRec {
 #define __DRI_IMAGE_FORMAT_XRGB2101010  0x1009
 #define __DRI_IMAGE_FORMAT_ARGB2101010  0x100a
 #define __DRI_IMAGE_FORMAT_SARGB8       0x100b
+#define __DRI_IMAGE_FORMAT_ARGB1555     0x100c

 #define __DRI_IMAGE_USE_SHARE		0x0001
 #define __DRI_IMAGE_USE_SCANOUT		0x0002
@@ -1148,6 +1149,7 @@ struct __DRIdri2ExtensionRec {

 #define __DRI_IMAGE_FOURCC_R8		0x20203852
 #define __DRI_IMAGE_FOURCC_GR88		0x38385247
+#define __DRI_IMAGE_FOURCC_ARGB1555	0x35315241
 #define __DRI_IMAGE_FOURCC_RGB565	0x36314752
 #define __DRI_IMAGE_FOURCC_ARGB8888	0x34325241
 #define __DRI_IMAGE_FOURCC_XRGB8888	0x34325258
--- a/include/pci_ids/i965_pci_ids.h
+++ b/include/pci_ids/i965_pci_ids.h
@@ -109,6 +109,10 @@ CHIPSET(0x162A, bdw_gt3, "Intel(R) Iris Pro P6300 (Broadwell GT3e)")
 CHIPSET(0x162B, bdw_gt3, "Intel(R) Iris 6100 (Broadwell GT3)")
 CHIPSET(0x162D, bdw_gt3, "Intel(R) Broadwell GT3")
 CHIPSET(0x162E, bdw_gt3, "Intel(R) Broadwell GT3")
+CHIPSET(0x22B0, chv,     "Intel(R) HD Graphics (Cherrytrail)")
+CHIPSET(0x22B1, chv,     "Intel(R) HD Graphics XXX (Braswell)") /* Overridden in brw_get_renderer_string */
+CHIPSET(0x22B2, chv,     "Intel(R) HD Graphics (Cherryview)")
+CHIPSET(0x22B3, chv,     "Intel(R) HD Graphics (Cherryview)")
 CHIPSET(0x1902, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)")
 CHIPSET(0x1906, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)")
 CHIPSET(0x190A, skl_gt1, "Intel(R) Skylake GT1")
@@ -134,6 +138,11 @@ CHIPSET(0x1932, skl_gt4, "Intel(R) Iris Pro Graphics 580 (Skylake GT4e)")
 CHIPSET(0x193A, skl_gt4, "Intel(R) Iris Pro Graphics P580 (Skylake GT4e)")
 CHIPSET(0x193B, skl_gt4, "Intel(R) Iris Pro Graphics 580 (Skylake GT4e)")
 CHIPSET(0x193D, skl_gt4, "Intel(R) Iris Pro Graphics P580 (Skylake GT4e)")
+CHIPSET(0x0A84, bxt,     "Intel(R) HD Graphics (Broxton)")
+CHIPSET(0x1A84, bxt,     "Intel(R) HD Graphics (Broxton)")
+CHIPSET(0x1A85, bxt_2x6, "Intel(R) HD Graphics (Broxton 2x6)")
+CHIPSET(0x5A84, bxt,     "Intel(R) HD Graphics 505 (Broxton)")
+CHIPSET(0x5A85, bxt_2x6, "Intel(R) HD Graphics 500 (Broxton 2x6)")
 CHIPSET(0x5902, kbl_gt1, "Intel(R) Kabylake GT1")
 CHIPSET(0x5906, kbl_gt1, "Intel(R) Kabylake GT1")
 CHIPSET(0x590A, kbl_gt1, "Intel(R) Kabylake GT1")
@@ -144,22 +153,15 @@ CHIPSET(0x5913, kbl_gt1_5, "Intel(R) Kabylake GT1.5")
 CHIPSET(0x5915, kbl_gt1_5, "Intel(R) Kabylake GT1.5")
 CHIPSET(0x5917, kbl_gt1_5, "Intel(R) Kabylake GT1.5")
 CHIPSET(0x5912, kbl_gt2, "Intel(R) Kabylake GT2")
-CHIPSET(0x5916, kbl_gt2, "Intel(R) Kabylake GT2")
+CHIPSET(0x5916, kbl_gt2, "Intel(R) HD Graphics 620 (Kabylake GT2)")
 CHIPSET(0x591A, kbl_gt2, "Intel(R) Kabylake GT2")
 CHIPSET(0x591B, kbl_gt2, "Intel(R) Kabylake GT2")
 CHIPSET(0x591D, kbl_gt2, "Intel(R) Kabylake GT2")
-CHIPSET(0x591E, kbl_gt2, "Intel(R) Kabylake GT2")
+CHIPSET(0x591E, kbl_gt2, "Intel(R) HD Graphics 615 (Kabylake GT2)")
 CHIPSET(0x5921, kbl_gt2, "Intel(R) Kabylake GT2F")
 CHIPSET(0x5923, kbl_gt3, "Intel(R) Kabylake GT3")
 CHIPSET(0x5926, kbl_gt3, "Intel(R) Kabylake GT3")
 CHIPSET(0x5927, kbl_gt3, "Intel(R) Kabylake GT3")
 CHIPSET(0x593B, kbl_gt4, "Intel(R) Kabylake GT4")
-CHIPSET(0x22B0, chv,     "Intel(R) HD Graphics (Cherrytrail)")
-CHIPSET(0x22B1, chv,     "Intel(R) HD Graphics XXX (Braswell)") /* Overridden in brw_get_renderer_string */
-CHIPSET(0x22B2, chv,     "Intel(R) HD Graphics (Cherryview)")
-CHIPSET(0x22B3, chv,     "Intel(R) HD Graphics (Cherryview)")
-CHIPSET(0x0A84, bxt,     "Intel(R) HD Graphics (Broxton)")
-CHIPSET(0x1A84, bxt,     "Intel(R) HD Graphics (Broxton)")
-CHIPSET(0x1A85, bxt_2x6, "Intel(R) HD Graphics (Broxton 2x6)")
-CHIPSET(0x5A84, bxt,     "Intel(R) HD Graphics (Broxton)")
-CHIPSET(0x5A85, bxt_2x6, "Intel(R) HD Graphics (Broxton 2x6)")
+CHIPSET(0x3184, glk,     "Intel(R) HD Graphics (Geminilake)")
+CHIPSET(0x3185, glk_2x6, "Intel(R) HD Graphics (Geminilake 2x6)")
--- a/include/vulkan/vk_icd.h
+++ b/include/vulkan/vk_icd.h
@@ -1,28 +1,56 @@
+//
+// File: vk_icd.h
+//
+/*
+ * Copyright (c) 2015-2016 The Khronos Group Inc.
+ * Copyright (c) 2015-2016 Valve Corporation
+ * Copyright (c) 2015-2016 LunarG, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
 #ifndef VKICD_H
 #define VKICD_H

-#include "vk_platform.h"
+#include "vulkan.h"

+/*
+ * Loader-ICD version negotiation API
+ */
+#define CURRENT_LOADER_ICD_INTERFACE_VERSION 3
+#define MIN_SUPPORTED_LOADER_ICD_INTERFACE_VERSION 0
+typedef VkResult (VKAPI_PTR *PFN_vkNegotiateLoaderICDInterfaceVersion)(uint32_t *pVersion);
 /*
 * The ICD must reserve space for a pointer for the loader's dispatch
 * table, at the start of <each object>.
 * The ICD must initialize this variable using the SET_LOADER_MAGIC_VALUE macro.
 */

-#define ICD_LOADER_MAGIC   0x01CDC0DE
+#define ICD_LOADER_MAGIC 0x01CDC0DE

-typedef union _VK_LOADER_DATA {
-  uintptr_t loaderMagic;
-  void *loaderData;
+typedef union {
+    uintptr_t loaderMagic;
+    void *loaderData;
 } VK_LOADER_DATA;

-static inline void set_loader_magic_value(void* pNewObject) {
-    VK_LOADER_DATA *loader_info = (VK_LOADER_DATA *) pNewObject;
+static inline void set_loader_magic_value(void *pNewObject) {
+    VK_LOADER_DATA *loader_info = (VK_LOADER_DATA *)pNewObject;
    loader_info->loaderMagic = ICD_LOADER_MAGIC;
 }

-static inline bool valid_loader_magic_value(void* pNewObject) {
-    const VK_LOADER_DATA *loader_info = (VK_LOADER_DATA *) pNewObject;
+static inline bool valid_loader_magic_value(void *pNewObject) {
+    const VK_LOADER_DATA *loader_info = (VK_LOADER_DATA *)pNewObject;
    return (loader_info->loaderMagic & 0xffffffff) == ICD_LOADER_MAGIC;
 }

@@ -30,56 +58,74 @@ static inline bool valid_loader_magic_value(void* pNewObject) {
 * Windows and Linux ICDs will treat VkSurfaceKHR as a pointer to a struct that
 * contains the platform-specific connection and surface information.
 */
-typedef enum _VkIcdWsiPlatform {
+typedef enum {
    VK_ICD_WSI_PLATFORM_MIR,
    VK_ICD_WSI_PLATFORM_WAYLAND,
    VK_ICD_WSI_PLATFORM_WIN32,
    VK_ICD_WSI_PLATFORM_XCB,
    VK_ICD_WSI_PLATFORM_XLIB,
+    VK_ICD_WSI_PLATFORM_DISPLAY
 } VkIcdWsiPlatform;

-typedef struct _VkIcdSurfaceBase {
-    VkIcdWsiPlatform   platform;
+typedef struct {
+    VkIcdWsiPlatform platform;
 } VkIcdSurfaceBase;

 #ifdef VK_USE_PLATFORM_MIR_KHR
-typedef struct _VkIcdSurfaceMir {
-    VkIcdSurfaceBase   base;
-    MirConnection*     connection;
-    MirSurface*        mirSurface;
+typedef struct {
+    VkIcdSurfaceBase base;
+    MirConnection *connection;
+    MirSurface *mirSurface;
 } VkIcdSurfaceMir;
 #endif // VK_USE_PLATFORM_MIR_KHR

 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
-typedef struct _VkIcdSurfaceWayland {
-    VkIcdSurfaceBase   base;
-    struct wl_display* display;
-    struct wl_surface* surface;
+typedef struct {
+    VkIcdSurfaceBase base;
+    struct wl_display *display;
+    struct wl_surface *surface;
 } VkIcdSurfaceWayland;
 #endif // VK_USE_PLATFORM_WAYLAND_KHR

 #ifdef VK_USE_PLATFORM_WIN32_KHR
-typedef struct _VkIcdSurfaceWin32 {
-    VkIcdSurfaceBase   base;
-    HINSTANCE          hinstance;
-    HWND               hwnd;
+typedef struct {
+    VkIcdSurfaceBase base;
+    HINSTANCE hinstance;
+    HWND hwnd;
 } VkIcdSurfaceWin32;
 #endif // VK_USE_PLATFORM_WIN32_KHR

 #ifdef VK_USE_PLATFORM_XCB_KHR
-typedef struct _VkIcdSurfaceXcb {
-    VkIcdSurfaceBase   base;
-    xcb_connection_t*  connection;
-    xcb_window_t       window;
+typedef struct {
+    VkIcdSurfaceBase base;
+    xcb_connection_t *connection;
+    xcb_window_t window;
 } VkIcdSurfaceXcb;
 #endif // VK_USE_PLATFORM_XCB_KHR

 #ifdef VK_USE_PLATFORM_XLIB_KHR
-typedef struct _VkIcdSurfaceXlib {
-    VkIcdSurfaceBase   base;
-    Display*           dpy;
-    Window             window;
+typedef struct {
+    VkIcdSurfaceBase base;
+    Display *dpy;
+    Window window;
 } VkIcdSurfaceXlib;
 #endif // VK_USE_PLATFORM_XLIB_KHR

+#ifdef VK_USE_PLATFORM_ANDROID_KHR
+typedef struct {
+    ANativeWindow* window;
+} VkIcdSurfaceAndroid;
+#endif //VK_USE_PLATFORM_ANDROID_KHR
+
+typedef struct {
+    VkIcdSurfaceBase base;
+    VkDisplayModeKHR displayMode;
+    uint32_t planeIndex;
+    uint32_t planeStackIndex;
+    VkSurfaceTransformFlagBitsKHR transform;
+    float globalAlpha;
+    VkDisplayPlaneAlphaFlagBitsKHR alphaMode;
+    VkExtent2D imageExtent;
+} VkIcdSurfaceDisplay;
+
 #endif // VKICD_H
--- a/src/amd/addrlib/addrtypes.h
+++ b/src/amd/addrlib/addrtypes.h
@@ -88,7 +88,11 @@ typedef int            INT;

 #ifndef ADDR_FASTCALL
    #if defined(__GNUC__)
-        #define ADDR_FASTCALL __attribute__((regparm(0)))
+        #if defined(__i386__)
+            #define ADDR_FASTCALL __attribute__((regparm(0)))
+        #else
+            #define ADDR_FASTCALL
+        #endif
    #else
        #define ADDR_FASTCALL __fastcall
    #endif
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1683,7 +1683,7 @@ static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx,

 		for (c = 0; c < 2; c++) {
 			half_texel[c] = LLVMBuildExtractElement(ctx->builder, size,
-								ctx->i32zero, "");
+								LLVMConstInt(ctx->i32, c, false), "");
 			half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, "");
 			half_texel[c] = emit_fdiv(ctx, ctx->f32one, half_texel[c]);
 			half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c],
@@ -2609,6 +2609,24 @@ static void emit_barrier(struct nir_to_llvm_context *ctx)
 			    ctx->voidt, NULL, 0, 0);
 }

+static void emit_discard_if(struct nir_to_llvm_context *ctx,
+			    nir_intrinsic_instr *instr)
+{
+	LLVMValueRef cond;
+	ctx->shader_info->fs.can_discard = true;
+
+	cond = LLVMBuildICmp(ctx->builder, LLVMIntNE,
+			     get_src(ctx, instr->src[0]),
+			     ctx->i32zero, "");
+
+	cond = LLVMBuildSelect(ctx->builder, cond,
+			       LLVMConstReal(ctx->f32, -1.0f),
+			       ctx->f32zero, "");
+	emit_llvm_intrinsic(ctx, "llvm.AMDGPU.kill",
+			    LLVMVoidTypeInContext(ctx->context),
+			    &cond, 1, 0);
+}
+
 static LLVMValueRef
 visit_load_local_invocation_index(struct nir_to_llvm_context *ctx)
 {
@@ -2921,6 +2939,9 @@ static void visit_intrinsic(struct nir_to_llvm_context *ctx,
 				    LLVMVoidTypeInContext(ctx->context),
 				    NULL, 0, 0);
 		break;
+	case nir_intrinsic_discard_if:
+		emit_discard_if(ctx, instr);
+		break;
 	case nir_intrinsic_memory_barrier:
 		emit_waitcnt(ctx);
 		break;
@@ -3278,17 +3299,25 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)
 	}

 	if (instr->op == nir_texop_texture_samples) {
-		LLVMValueRef res, samples;
+		LLVMValueRef res, samples, is_msaa;
 		res = LLVMBuildBitCast(ctx->builder, res_ptr, ctx->v8i32, "");
 		samples = LLVMBuildExtractElement(ctx->builder, res,
 						  LLVMConstInt(ctx->i32, 3, false), "");
+		is_msaa = LLVMBuildLShr(ctx->builder, samples,
+					LLVMConstInt(ctx->i32, 28, false), "");
+		is_msaa = LLVMBuildAnd(ctx->builder, is_msaa,
+				       LLVMConstInt(ctx->i32, 0xe, false), "");
+		is_msaa = LLVMBuildICmp(ctx->builder, LLVMIntEQ, is_msaa,
+					LLVMConstInt(ctx->i32, 0xe, false), "");
+
 		samples = LLVMBuildLShr(ctx->builder, samples,
 					LLVMConstInt(ctx->i32, 16, false), "");
 		samples = LLVMBuildAnd(ctx->builder, samples,
 				       LLVMConstInt(ctx->i32, 0xf, false), "");
 		samples = LLVMBuildShl(ctx->builder, ctx->i32one,
 				       samples, "");
-
+		samples = LLVMBuildSelect(ctx->builder, is_msaa, samples,
+					  ctx->i32one, "");
 		result = samples;
 		goto write_result;
 	}
@@ -3387,7 +3416,10 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)
 		address[count++] = sample_index;
 	} else if(instr->op == nir_texop_txs) {
 		count = 0;
-		address[count++] = lod;
+		if (lod)
+			address[count++] = lod;
+		else
+			address[count++] = ctx->i32zero;
 	}

 	for (chan = 0; chan < count; chan++) {
@@ -3412,7 +3444,7 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)
 		result = build_tex_intrinsic(ctx, instr, &txf_info);

 		result = LLVMBuildExtractElement(ctx->builder, result, ctx->i32zero, "");
-		result = LLVMBuildICmp(ctx->builder, LLVMIntEQ, result, ctx->i32zero, "");
+		result = emit_int_cmp(ctx, LLVMIntEQ, result, ctx->i32zero);
 		goto write_result;
 	}

@@ -3485,12 +3517,13 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)
 	if (offsets && instr->op == nir_texop_txf) {
 		nir_const_value *const_offset =
 			nir_src_as_const_value(instr->src[const_src].src);
-
+		int num_offsets = instr->src[const_src].src.ssa->num_components;
 		assert(const_offset);
-		if (instr->coord_components > 2)
+		num_offsets = MIN2(num_offsets, instr->coord_components);
+		if (num_offsets > 2)
 			address[2] = LLVMBuildAdd(ctx->builder,
 						  address[2], LLVMConstInt(ctx->i32, const_offset->i32[2], false), "");
-		if (instr->coord_components > 1)
+		if (num_offsets > 1)
 			address[1] = LLVMBuildAdd(ctx->builder,
 						  address[1], LLVMConstInt(ctx->i32, const_offset->i32[1], false), "");
 		address[0] = LLVMBuildAdd(ctx->builder,
@@ -3512,6 +3545,8 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)

 	if (instr->op == nir_texop_query_levels)
 		result = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, 3, false), "");
+	else if (instr->is_shadow && instr->op != nir_texop_txs && instr->op != nir_texop_lod && instr->op != nir_texop_tg4)
+		result = LLVMBuildExtractElement(ctx->builder, result, ctx->i32zero, "");
 	else if (instr->op == nir_texop_txs &&
 		 instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
 		 instr->is_array) {
@@ -3520,7 +3555,8 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)
 		LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, result, two, "");
 		z = LLVMBuildSDiv(ctx->builder, z, six, "");
 		result = LLVMBuildInsertElement(ctx->builder, result, z, two, "");
-	}
+	} else if (instr->dest.ssa.num_components != 4)
+		result = trim_vector(ctx, result, instr->dest.ssa.num_components);

 write_result:
 	if (result) {
@@ -3910,7 +3946,7 @@ static void
 handle_shader_output_decl(struct nir_to_llvm_context *ctx,
 			  struct nir_variable *variable)
 {
-	int idx = variable->data.location;
+	int idx = variable->data.location + variable->data.index;
 	unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);

 	variable->data.driver_location = idx * 4;
@@ -3940,7 +3976,7 @@ handle_shader_output_decl(struct nir_to_llvm_context *ctx,
 		                       si_build_alloca_undef(ctx, ctx->f32, "");
 		}
 	}
-	ctx->output_mask |= ((1ull << attrib_count) - 1) << variable->data.location;
+	ctx->output_mask |= ((1ull << attrib_count) - 1) << idx;
 }

 static void
@@ -4351,12 +4387,10 @@ handle_fs_outputs_post(struct nir_to_llvm_context *ctx,

 	for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
 		LLVMValueRef values[4];
-		bool last;
+
 		if (!(ctx->output_mask & (1ull << i)))
 			continue;

-		last = ctx->output_mask <= ((1ull << (i + 1)) - 1);
-
 		if (i == FRAG_RESULT_DEPTH) {
 			ctx->shader_info->fs.writes_z = true;
 			depth = to_float(ctx, LLVMBuildLoad(ctx->builder,
@@ -4366,10 +4400,14 @@ handle_fs_outputs_post(struct nir_to_llvm_context *ctx,
 			stencil = to_float(ctx, LLVMBuildLoad(ctx->builder,
 							      ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
 		} else {
+			bool last = false;
 			for (unsigned j = 0; j < 4; j++)
 				values[j] = to_float(ctx, LLVMBuildLoad(ctx->builder,
 									ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));

+			if (!ctx->shader_info->fs.writes_z && !ctx->shader_info->fs.writes_stencil)
+				last = ctx->output_mask <= ((1ull << (i + 1)) - 1);
+
 			si_export_mrt_color(ctx, values, V_008DFC_SQ_EXP_MRT + index, last);
 			index++;
 		}
--- a/src/amd/vulkan/.gitignore
+++ b/src/amd/vulkan/.gitignore
@@ -4,3 +4,4 @@
 /radv_timestamp.h
 /dev_icd.json
 /vk_format_table.c
+/radeon_icd.*.json
--- a/src/amd/vulkan/Makefile.am
+++ b/src/amd/vulkan/Makefile.am
@@ -32,9 +32,6 @@ lib_LTLIBRARIES = libvulkan_radeon.la
 # The gallium includes are for the util/u_math.h include from main/macros.h

 AM_CPPFLAGS = \
-	$(AMDGPU_CFLAGS) \
-	$(VALGRIND_CFLAGS) \
-	$(DEFINES) \
 	-I$(top_srcdir)/include \
 	-I$(top_builddir)/src \
 	-I$(top_srcdir)/src \
@@ -48,7 +45,10 @@ AM_CPPFLAGS = \
 	-I$(top_srcdir)/src/mesa \
 	-I$(top_srcdir)/src/mesa/drivers/dri/common \
 	-I$(top_srcdir)/src/gallium/auxiliary \
-	-I$(top_srcdir)/src/gallium/include
+	-I$(top_srcdir)/src/gallium/include \
+	$(AMDGPU_CFLAGS) \
+	$(VALGRIND_CFLAGS) \
+	$(DEFINES)

 AM_CFLAGS = \
 	$(VISIBILITY_CFLAGS) \
@@ -131,11 +131,11 @@ vk_format_table.c: vk_format_table.py \
 	$(PYTHON2) $(srcdir)/vk_format_table.py $(srcdir)/vk_format_layout.csv > $@

 BUILT_SOURCES = $(VULKAN_GENERATED_FILES)
-CLEANFILES = $(BUILT_SOURCES) dev_icd.json radv_timestamp.h
+CLEANFILES = $(BUILT_SOURCES) dev_icd.json radeon_icd.@host_cpu@.json
 EXTRA_DIST = \
 	$(top_srcdir)/include/vulkan/vk_icd.h \
 	dev_icd.json.in \
-	radeon_icd.json \
+	radeon_icd.json.in \
 	radv_entrypoints_gen.py \
 	vk_format_layout.csv \
 	vk_format_parse.py \
@@ -155,7 +155,7 @@ libvulkan_radeon_la_LDFLAGS = \


 icdconfdir = @VULKAN_ICD_INSTALL_DIR@
-icdconf_DATA = radeon_icd.json
+icdconf_DATA = radeon_icd.@host_cpu@.json
 # The following is used for development purposes, by setting VK_ICD_FILENAMES.
 noinst_DATA = dev_icd.json

@@ -164,4 +164,9 @@ dev_icd.json : dev_icd.json.in
 		-e "s#@build_libdir@#${abs_top_builddir}/${LIB_DIR}#" \
 		< $(srcdir)/dev_icd.json.in > $@

+radeon_icd.@host_cpu@.json : radeon_icd.json.in
+	$(AM_V_GEN) $(SED) \
+		-e "s#@install_libdir@#${libdir}#" \
+		< $(srcdir)/radeon_icd.json.in > $@
+
 include $(top_srcdir)/install-lib-links.mk
--- a/src/amd/vulkan/dev_icd.json.in
+++ b/src/amd/vulkan/dev_icd.json.in
@@ -2,6 +2,6 @@
    "file_format_version": "1.0.0",
    "ICD": {
        "library_path": "@build_libdir@/libvulkan_radeon.so",
-        "abi_versions": "1.0.3"
+        "api_version": "1.0.3"
    }
 }
--- a/src/amd/vulkan/radeon_icd.json
+++ b/src/amd/vulkan/radeon_icd.json
@@ -1,7 +0,0 @@
-{
-    "file_format_version": "1.0.0",
-    "ICD": {
-        "library_path": "libvulkan_radeon.so",
-        "abi_versions": "1.0.3"
-    }
-}
--- a/src/amd/vulkan/radeon_icd.json.in
+++ b/src/amd/vulkan/radeon_icd.json.in
@@ -0,0 +1,7 @@
+{
+    "file_format_version": "1.0.0",
+    "ICD": {
+        "library_path": "@install_libdir@/libvulkan_radeon.so",
+        "api_version": "1.0.3"
+    }
+}
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -2163,9 +2163,6 @@ static void radv_handle_cmask_image_transition(struct radv_cmd_buffer *cmd_buffe
 			radv_initialise_cmask(cmd_buffer, image, 0xffffffffu);
 	} else if (radv_layout_has_cmask(image, src_layout) &&
 		   !radv_layout_has_cmask(image, dst_layout)) {
-
-		if (!cmd_buffer->device->allow_fast_clears)
-			return;
 		radv_fast_clear_flush_image_inplace(cmd_buffer, image);
 	}
 }
@@ -2286,14 +2283,17 @@ void radv_CmdPipelineBarrier(
 		case VK_ACCESS_INDIRECT_COMMAND_READ_BIT:
 		case VK_ACCESS_INDEX_READ_BIT:
 		case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
-		case VK_ACCESS_UNIFORM_READ_BIT:
 			flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1;
 			break;
+		case VK_ACCESS_UNIFORM_READ_BIT:
+			flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1 | RADV_CMD_FLAG_INV_SMEM_L1;
+			break;
 		case VK_ACCESS_SHADER_READ_BIT:
 			flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2;
 			break;
 		case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT:
 		case VK_ACCESS_TRANSFER_READ_BIT:
+		case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT:
 			flush_bits |= RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER | RADV_CMD_FLAG_INV_GLOBAL_L2;
 		default:
 			break;
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -44,12 +44,6 @@
 #include "util/debug.h"
 struct radv_dispatch_table dtable;

-struct radv_fence {
-	struct radeon_winsys_fence *fence;
-	bool submitted;
-	bool signalled;
-};
-
 static VkResult
 radv_physical_device_init(struct radv_physical_device *device,
 			  struct radv_instance *instance,
@@ -97,6 +91,7 @@ radv_physical_device_init(struct radv_physical_device *device,

 	fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
 	device->name = device->rad_info.name;
+	close(fd);
 	return VK_SUCCESS;

 fail:
@@ -119,13 +114,19 @@ static const VkExtensionProperties global_extensions[] = {
 #ifdef VK_USE_PLATFORM_XCB_KHR
 	{
 		.extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
-		.specVersion = 5,
+		.specVersion = 6,
+	},
+#endif
+#ifdef VK_USE_PLATFORM_XLIB_KHR
+	{
+		.extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
+		.specVersion = 6,
 	},
 #endif
 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
 	{
 		.extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
-		.specVersion = 4,
+		.specVersion = 5,
 	},
 #endif
 };
@@ -133,7 +134,7 @@ static const VkExtensionProperties global_extensions[] = {
 static const VkExtensionProperties device_extensions[] = {
 	{
 		.extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
-		.specVersion = 67,
+		.specVersion = 68,
 	},
 };

@@ -424,7 +425,7 @@ void radv_GetPhysicalDeviceProperties(
 		.maxGeometryTotalOutputComponents         = 1024,
 		.maxFragmentInputComponents               = 128,
 		.maxFragmentOutputAttachments             = 8,
-		.maxFragmentDualSrcAttachments            = 2,
+		.maxFragmentDualSrcAttachments            = 1,
 		.maxFragmentCombinedOutputResources       = 8,
 		.maxComputeSharedMemorySize               = 32768,
 		.maxComputeWorkGroupCount                 = { 65535, 65535, 65535 },
@@ -659,17 +660,15 @@ VkResult radv_EnumerateInstanceExtensionProperties(
 	uint32_t*                                   pPropertyCount,
 	VkExtensionProperties*                      pProperties)
 {
-	unsigned i;
 	if (pProperties == NULL) {
 		*pPropertyCount = ARRAY_SIZE(global_extensions);
 		return VK_SUCCESS;
 	}

-	for (i = 0; i < *pPropertyCount; i++)
-		memcpy(&pProperties[i], &global_extensions[i], sizeof(VkExtensionProperties));
+	*pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(global_extensions));
+	typed_memcpy(pProperties, global_extensions, *pPropertyCount);

-	*pPropertyCount = i;
-	if (i < ARRAY_SIZE(global_extensions))
+	if (*pPropertyCount < ARRAY_SIZE(global_extensions))
 		return VK_INCOMPLETE;

 	return VK_SUCCESS;
@@ -681,19 +680,17 @@ VkResult radv_EnumerateDeviceExtensionProperties(
 	uint32_t*                                   pPropertyCount,
 	VkExtensionProperties*                      pProperties)
 {
-	unsigned i;
-
 	if (pProperties == NULL) {
 		*pPropertyCount = ARRAY_SIZE(device_extensions);
 		return VK_SUCCESS;
 	}

-	for (i = 0; i < *pPropertyCount; i++)
-		memcpy(&pProperties[i], &device_extensions[i], sizeof(VkExtensionProperties));
+	*pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(device_extensions));
+	typed_memcpy(pProperties, device_extensions, *pPropertyCount);

-	*pPropertyCount = i;
-	if (i < ARRAY_SIZE(device_extensions))
+	if (*pPropertyCount < ARRAY_SIZE(device_extensions))
 		return VK_INCOMPLETE;
+
 	return VK_SUCCESS;
 }

@@ -1172,6 +1169,8 @@ VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
 	RADV_FROM_HANDLE(radv_device, device, _device);
 	RADV_FROM_HANDLE(radv_fence, fence, _fence);

+	if (fence->signalled)
+		return VK_SUCCESS;
 	if (!fence->submitted)
 		return VK_NOT_READY;

@@ -1734,26 +1733,50 @@ radv_tex_bordercolor(VkBorderColor bcolor)
 	return 0;
 }

+static unsigned
+radv_tex_aniso_filter(unsigned filter)
+{
+	if (filter < 2)
+		return 0;
+	if (filter < 4)
+		return 1;
+	if (filter < 8)
+		return 2;
+	if (filter < 16)
+		return 3;
+	return 4;
+}
+
 static void
 radv_init_sampler(struct radv_device *device,
 		  struct radv_sampler *sampler,
 		  const VkSamplerCreateInfo *pCreateInfo)
 {
-	uint32_t max_aniso = 0;
-	uint32_t max_aniso_ratio = 0;//TODO
+	uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
+					(uint32_t) pCreateInfo->maxAnisotropy : 0;
+	uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
 	bool is_vi;
 	is_vi = (device->instance->physicalDevice.rad_info.chip_class >= VI);

+	if (!is_vi && max_aniso > 0) {
+		radv_finishme("Anisotropic filtering must be disabled manually "
+		              "by the shader on SI-CI when BASE_LEVEL == LAST_LEVEL\n");
+		max_aniso = max_aniso_ratio = 0;
+	}
+
 	sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
 			     S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
 			     S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
 			     S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
 			     S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
 			     S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
+			     S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
+			     S_008F30_ANISO_BIAS(max_aniso_ratio) |
 			     S_008F30_DISABLE_CUBE_WRAP(0) |
 			     S_008F30_COMPAT_MODE(is_vi));
 	sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
-			     S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)));
+			     S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
+			     S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
 	sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
 			     S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
 			     S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
@@ -1800,3 +1823,48 @@ void radv_DestroySampler(
 		return;
 	vk_free2(&device->alloc, pAllocator, sampler);
 }
+
+
+/* vk_icd.h does not declare this function, so we declare it here to
+ * suppress Wmissing-prototypes.
+ */
+PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
+vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
+
+PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
+vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
+{
+	/* For the full details on loader interface versioning, see
+	* <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
+	* What follows is a condensed summary, to help you navigate the large and
+	* confusing official doc.
+	*
+	*   - Loader interface v0 is incompatible with later versions. We don't
+	*     support it.
+	*
+	*   - In loader interface v1:
+	*       - The first ICD entrypoint called by the loader is
+	*         vk_icdGetInstanceProcAddr(). The ICD must statically expose this
+	*         entrypoint.
+	*       - The ICD must statically expose no other Vulkan symbol unless it is
+	*         linked with -Bsymbolic.
+	*       - Each dispatchable Vulkan handle created by the ICD must be
+	*         a pointer to a struct whose first member is VK_LOADER_DATA. The
+	*         ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
+	*       - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
+	*         vkDestroySurfaceKHR(). The ICD must be capable of working with
+	*         such loader-managed surfaces.
+	*
+	*    - Loader interface v2 differs from v1 in:
+	*       - The first ICD entrypoint called by the loader is
+	*         vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
+	*         statically expose this entrypoint.
+	*
+	*    - Loader interface v3 differs from v2 in:
+	*        - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
+	*          vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
+	*          because the loader no longer does so.
+	*/
+	*pSupportedVersion = MIN2(*pSupportedVersion, 3u);
+	return VK_SUCCESS;
+}
--- a/src/amd/vulkan/radv_formats.c
+++ b/src/amd/vulkan/radv_formats.c
@@ -154,6 +154,7 @@ uint32_t radv_translate_tex_dataformat(VkFormat format,
 		case VK_FORMAT_D16_UNORM:
 			return V_008F14_IMG_DATA_FORMAT_16;
 		case VK_FORMAT_D24_UNORM_S8_UINT:
+		case VK_FORMAT_X8_D24_UNORM_PACK32:
 			return V_008F14_IMG_DATA_FORMAT_8_24;
 		case VK_FORMAT_S8_UINT:
 			return V_008F14_IMG_DATA_FORMAT_8;
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -267,17 +267,7 @@ si_make_texture_descriptor(struct radv_device *device,

 	if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
 		const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
-		const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1};
-
-		switch (vk_format) {
-		case VK_FORMAT_X8_D24_UNORM_PACK32:
-		case VK_FORMAT_D24_UNORM_S8_UINT:
-		case VK_FORMAT_D32_SFLOAT_S8_UINT:
-			vk_format_compose_swizzles(mapping, swizzle_yyyy, swizzle);
-			break;
-		default:
-			vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
-		}
+		vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
 	} else {
 		vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
 	}
@@ -775,8 +765,13 @@ radv_image_view_init(struct radv_image_view *iview,
 	iview->vk_format = pCreateInfo->format;
 	iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask;

-	if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
+	if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
 		is_stencil = true;
+		iview->vk_format = vk_format_stencil_only(iview->vk_format);
+	} else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
+		iview->vk_format = vk_format_depth_only(iview->vk_format);
+	}
+
 	iview->extent = (VkExtent3D) {
 		.width  = radv_minify(image->extent.width , range->baseMipLevel),
 		.height = radv_minify(image->extent.height, range->baseMipLevel),
@@ -794,7 +789,7 @@ radv_image_view_init(struct radv_image_view *iview,

 	si_make_texture_descriptor(device, image, false,
 				   iview->type,
-				   pCreateInfo->format,
+				   iview->vk_format,
 				   &pCreateInfo->components,
 				   0, radv_get_levelCount(image, range) - 1,
 				   range->baseArrayLayer,
@@ -836,29 +831,29 @@ void radv_image_set_optimal_micro_tile_mode(struct radv_device *device,
 		switch (micro_tile_mode) {
 		case 0: /* displayable */
 			switch (image->surface.bpe) {
-			case 8:
+			case 1:
                            image->surface.tiling_index[0] = 10;
                            break;
-			case 16:
+			case 2:
                            image->surface.tiling_index[0] = 11;
                            break;
-			default: /* 32, 64 */
+			default: /* 4, 8 */
                            image->surface.tiling_index[0] = 12;
                            break;
 			}
 			break;
 		case 1: /* thin */
 			switch (image->surface.bpe) {
-			case 8:
+			case 1:
                                image->surface.tiling_index[0] = 14;
                                break;
-			case 16:
+			case 2:
                                image->surface.tiling_index[0] = 15;
                                break;
-			case 32:
+			case 4:
                                image->surface.tiling_index[0] = 16;
                                break;
-			default: /* 64, 128 */
+			default: /* 8, 16 */
                                image->surface.tiling_index[0] = 17;
                                break;
 			}
--- a/src/amd/vulkan/radv_meta_bufimage.c
+++ b/src/amd/vulkan/radv_meta_bufimage.c
@@ -1,6 +1,33 @@
+/*
+ * Copyright © 2016 Red Hat.
+ * Copyright © 2016 Bas Nieuwenhuizen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
 #include "radv_meta.h"
 #include "nir/nir_builder.h"

+/*
+ * Compute shader implementation of image->buffer copy.
+ */
+
 static nir_shader *
 build_nir_itob_compute_shader(struct radv_device *dev)
 {
--- a/src/amd/vulkan/radv_meta_clear.c
+++ b/src/amd/vulkan/radv_meta_clear.c
@@ -998,7 +998,7 @@ radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer,
 		const VkImageSubresourceRange *range = &ranges[r];
 		for (uint32_t l = 0; l < radv_get_levelCount(image, range); ++l) {
 			const uint32_t layer_count = image->type == VK_IMAGE_TYPE_3D ?
-				radv_minify(image->extent.depth, l) :
+				radv_minify(image->extent.depth, range->baseMipLevel + l) :
 				radv_get_layerCount(image, range);
 			for (uint32_t s = 0; s < layer_count; ++s) {
 				struct radv_image_view iview;
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -144,6 +144,7 @@ radv_optimize_nir(struct nir_shader *shader)
                NIR_PASS(progress, shader, nir_opt_algebraic);
                NIR_PASS(progress, shader, nir_opt_constant_folding);
                NIR_PASS(progress, shader, nir_opt_undef);
+                NIR_PASS(progress, shader, nir_opt_conditional_discard);
        } while (progress);
 }

@@ -642,7 +643,8 @@ radv_pipeline_compute_spi_color_formats(struct radv_pipeline *pipeline,
 					const VkGraphicsPipelineCreateInfo *pCreateInfo,
 					uint32_t blend_enable,
 					uint32_t blend_need_alpha,
-					bool single_cb_enable)
+					bool single_cb_enable,
+					bool blend_mrt0_is_dual_src)
 {
 	RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
 	struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
@@ -664,6 +666,8 @@ radv_pipeline_compute_spi_color_formats(struct radv_pipeline *pipeline,

 	blend->cb_shader_mask = si_get_cb_shader_mask(col_format);

+	if (blend_mrt0_is_dual_src)
+		col_format |= (col_format & 0xf) << 4;
 	if (!col_format)
 		col_format |= V_028714_SPI_SHADER_32_R;
 	blend->spi_shader_col_format = col_format;
@@ -715,8 +719,13 @@ radv_pipeline_init_blend_state(struct radv_pipeline *pipeline,
 	struct radv_blend_state *blend = &pipeline->graphics.blend;
 	unsigned mode = V_028808_CB_NORMAL;
 	uint32_t blend_enable = 0, blend_need_alpha = 0;
+	bool blend_mrt0_is_dual_src = false;
 	int i;
 	bool single_cb_enable = false;
+
+	if (!vkblend)
+		return;
+
 	if (extra && extra->custom_blend_mode) {
 		single_cb_enable = true;
 		mode = extra->custom_blend_mode;
@@ -755,7 +764,9 @@ radv_pipeline_init_blend_state(struct radv_pipeline *pipeline,
 		}

 		if (is_dual_src(srcRGB) || is_dual_src(dstRGB) || is_dual_src(srcA) || is_dual_src(dstA))
-			radv_finishme("dual source blending");
+			if (i == 0)
+				blend_mrt0_is_dual_src = true;
+
 		if (eqRGB == VK_BLEND_OP_MIN || eqRGB == VK_BLEND_OP_MAX) {
 			srcRGB = VK_BLEND_FACTOR_ONE;
 			dstRGB = VK_BLEND_FACTOR_ONE;
@@ -797,7 +808,7 @@ radv_pipeline_init_blend_state(struct radv_pipeline *pipeline,
 		blend->cb_color_control |= S_028808_MODE(V_028808_CB_DISABLE);

 	radv_pipeline_compute_spi_color_formats(pipeline, pCreateInfo,
-						blend_enable, blend_need_alpha, single_cb_enable);
+						blend_enable, blend_need_alpha, single_cb_enable, blend_mrt0_is_dual_src);
 }

 static uint32_t si_translate_stencil_op(enum VkStencilOp op)
@@ -1069,18 +1080,27 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,

 	struct radv_dynamic_state *dynamic = &pipeline->dynamic_state;

-	dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount;
-	if (states & (1 << VK_DYNAMIC_STATE_VIEWPORT)) {
-		typed_memcpy(dynamic->viewport.viewports,
-			     pCreateInfo->pViewportState->pViewports,
-			     pCreateInfo->pViewportState->viewportCount);
-	}
+	/* Section 9.2 of the Vulkan 1.0.15 spec says:
+	 *
+	 *    pViewportState is [...] NULL if the pipeline
+	 *    has rasterization disabled.
+	 */
+	if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable) {
+		assert(pCreateInfo->pViewportState);

-	dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount;
-	if (states & (1 << VK_DYNAMIC_STATE_SCISSOR)) {
-		typed_memcpy(dynamic->scissor.scissors,
-			     pCreateInfo->pViewportState->pScissors,
-			     pCreateInfo->pViewportState->scissorCount);
+		dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount;
+		if (states & (1 << VK_DYNAMIC_STATE_VIEWPORT)) {
+			typed_memcpy(dynamic->viewport.viewports,
+				     pCreateInfo->pViewportState->pViewports,
+				     pCreateInfo->pViewportState->viewportCount);
+		}
+
+		dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount;
+		if (states & (1 << VK_DYNAMIC_STATE_SCISSOR)) {
+			typed_memcpy(dynamic->scissor.scissors,
+				     pCreateInfo->pViewportState->pScissors,
+				     pCreateInfo->pViewportState->scissorCount);
+		}
 	}

 	if (states & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) {
@@ -1098,7 +1118,21 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
 			pCreateInfo->pRasterizationState->depthBiasSlopeFactor;
 	}

-	if (states & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) {
+	/* Section 9.2 of the Vulkan 1.0.15 spec says:
+	 *
+	 *    pColorBlendState is [...] NULL if the pipeline has rasterization
+	 *    disabled or if the subpass of the render pass the pipeline is
+	 *    created against does not use any color attachments.
+	 */
+	bool uses_color_att = false;
+	for (unsigned i = 0; i < subpass->color_count; ++i) {
+		if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED) {
+			uses_color_att = true;
+			break;
+		}
+	}
+
+	if (uses_color_att && states & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) {
 		assert(pCreateInfo->pColorBlendState);
 		typed_memcpy(dynamic->blend_constants,
 			     pCreateInfo->pColorBlendState->blendConstants, 4);
@@ -1110,14 +1144,17 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
 	 * no need to override the depthstencil defaults in
 	 * radv_pipeline::dynamic_state when there is no depthstencil attachment.
 	 *
-	 * From the Vulkan spec (20 Oct 2015, git-aa308cb):
+	 * Section 9.2 of the Vulkan 1.0.15 spec says:
 	 *
-	 *    pDepthStencilState [...] may only be NULL if renderPass and subpass
-	 *    specify a subpass that has no depth/stencil attachment.
+	 *    pDepthStencilState is [...] NULL if the pipeline has rasterization
+	 *    disabled or if the subpass of the render pass the pipeline is created
+	 *    against does not use a depth/stencil attachment.
 	 */
-	if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
+	if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
+	    subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
+		assert(pCreateInfo->pDepthStencilState);
+
 		if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) {
-			assert(pCreateInfo->pDepthStencilState);
 			dynamic->depth_bounds.min =
 				pCreateInfo->pDepthStencilState->minDepthBounds;
 			dynamic->depth_bounds.max =
@@ -1125,7 +1162,6 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
 		}

 		if (states & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) {
-			assert(pCreateInfo->pDepthStencilState);
 			dynamic->stencil_compare_mask.front =
 				pCreateInfo->pDepthStencilState->front.compareMask;
 			dynamic->stencil_compare_mask.back =
@@ -1133,7 +1169,6 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
 		}

 		if (states & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) {
-			assert(pCreateInfo->pDepthStencilState);
 			dynamic->stencil_write_mask.front =
 				pCreateInfo->pDepthStencilState->front.writeMask;
 			dynamic->stencil_write_mask.back =
@@ -1141,7 +1176,6 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
 		}

 		if (states & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) {
-			assert(pCreateInfo->pDepthStencilState);
 			dynamic->stencil_reference.front =
 				pCreateInfo->pDepthStencilState->front.reference;
 			dynamic->stencil_reference.back =
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -1206,6 +1206,13 @@ void radv_initialise_cmask(struct radv_cmd_buffer *cmd_buffer,
 			   struct radv_image *image, uint32_t value);
 void radv_initialize_dcc(struct radv_cmd_buffer *cmd_buffer,
 			 struct radv_image *image, uint32_t value);
+
+struct radv_fence {
+	struct radeon_winsys_fence *fence;
+	bool submitted;
+	bool signalled;
+};
+
 #define RADV_DEFINE_HANDLE_CASTS(__radv_type, __VkType)		\
 								\
 	static inline struct __radv_type *			\
--- a/src/amd/vulkan/radv_wsi.c
+++ b/src/amd/vulkan/radv_wsi.c
@@ -75,7 +75,7 @@ void radv_DestroySurfaceKHR(
 	const VkAllocationCallbacks*                 pAllocator)
 {
 	RADV_FROM_HANDLE(radv_instance, instance, _instance);
-	RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
+	ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface);

 	vk_free2(&instance->alloc, pAllocator, surface);
 }
@@ -87,7 +87,7 @@ VkResult radv_GetPhysicalDeviceSurfaceSupportKHR(
 	VkBool32*                                   pSupported)
 {
 	RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
-	RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
+	ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface);
 	struct wsi_interface *iface = device->wsi_device.wsi[surface->platform];

 	return iface->get_support(surface, &device->wsi_device,
@@ -101,7 +101,7 @@ VkResult radv_GetPhysicalDeviceSurfaceCapabilitiesKHR(
 	VkSurfaceCapabilitiesKHR*                   pSurfaceCapabilities)
 {
 	RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
-	RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
+	ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface);
 	struct wsi_interface *iface = device->wsi_device.wsi[surface->platform];

 	return iface->get_capabilities(surface, pSurfaceCapabilities);
@@ -114,7 +114,7 @@ VkResult radv_GetPhysicalDeviceSurfaceFormatsKHR(
 	VkSurfaceFormatKHR*                         pSurfaceFormats)
 {
 	RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
-	RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
+	ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface);
 	struct wsi_interface *iface = device->wsi_device.wsi[surface->platform];

 	return iface->get_formats(surface, &device->wsi_device, pSurfaceFormatCount,
@@ -128,7 +128,7 @@ VkResult radv_GetPhysicalDeviceSurfacePresentModesKHR(
 	VkPresentModeKHR*                           pPresentModes)
 {
 	RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
-	RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
+	ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface);
 	struct wsi_interface *iface = device->wsi_device.wsi[surface->platform];

 	return iface->get_present_modes(surface, pPresentModeCount,
@@ -249,7 +249,7 @@ VkResult radv_CreateSwapchainKHR(
 	VkSwapchainKHR*                              pSwapchain)
 {
 	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, pCreateInfo->surface);
+	ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, pCreateInfo->surface);
 	struct wsi_interface *iface =
 		device->instance->physicalDevice.wsi_device.wsi[surface->platform];
 	struct wsi_swapchain *swapchain;
@@ -288,6 +288,9 @@ void radv_DestroySwapchainKHR(
 	RADV_FROM_HANDLE(wsi_swapchain, swapchain, _swapchain);
 	const VkAllocationCallbacks *alloc;

+	if (!_swapchain)
+		return;
+
 	if (pAllocator)
 		alloc = pAllocator;
 	else
@@ -318,13 +321,21 @@ VkResult radv_AcquireNextImageKHR(
 	VkSwapchainKHR                               _swapchain,
 	uint64_t                                     timeout,
 	VkSemaphore                                  semaphore,
-	VkFence                                      fence,
+	VkFence                                      _fence,
 	uint32_t*                                    pImageIndex)
 {
 	RADV_FROM_HANDLE(wsi_swapchain, swapchain, _swapchain);
+	RADV_FROM_HANDLE(radv_fence, fence, _fence);

-	return swapchain->acquire_next_image(swapchain, timeout, semaphore,
-					     pImageIndex);
+	VkResult result = swapchain->acquire_next_image(swapchain, timeout, semaphore,
+	                                                pImageIndex);
+
+	if (fence && result == VK_SUCCESS) {
+		fence->submitted = true;
+		fence->signalled = true;
+	}
+
+	return result;
 }

 VkResult radv_QueuePresentKHR(
--- a/src/compiler/Makefile.glsl.am
+++ b/src/compiler/Makefile.glsl.am
@@ -62,8 +62,11 @@ glsl_tests_blob_test_LDADD =				\

 glsl_tests_cache_test_SOURCES =				\
 	glsl/tests/cache_test.c
+glsl_tests_cache_test_CFLAGS =				\
+	$(PTHREAD_CFLAGS)
 glsl_tests_cache_test_LDADD =				\
-	glsl/libglsl.la
+	glsl/libglsl.la					\
+	$(PTHREAD_LIBS)

 glsl_tests_general_ir_test_SOURCES =			\
 	glsl/tests/builtin_variable_test.cpp		\
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -227,6 +227,7 @@ NIR_FILES = \
 	nir/nir_metadata.c \
 	nir/nir_move_vec_src_uses_to_dest.c \
 	nir/nir_normalize_cubemap_coords.c \
+	nir/nir_opt_conditional_discard.c \
 	nir/nir_opt_constant_folding.c \
 	nir/nir_opt_copy_propagate.c \
 	nir/nir_opt_cse.c \
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -4330,6 +4330,8 @@ handle_tess_ctrl_shader_output_decl(struct _mesa_glsl_parse_state *state,
   if (var->data.patch)
      return;

+   var->data.tess_varying_implicit_sized_array = var->type->is_unsized_array();
+
   validate_layout_qualifier_vertex_count(state, loc, var, num_vertices,
                                          &state->tcs_output_size,
                                          "tessellation control shader output");
@@ -4366,6 +4368,7 @@ handle_tess_shader_input_decl(struct _mesa_glsl_parse_state *state,
   if (var->type->is_unsized_array()) {
      var->type = glsl_type::get_array_instance(var->type->fields.array,
            state->Const.MaxPatchVertices);
+      var->data.tess_varying_implicit_sized_array = true;
   } else if (var->type->length != state->Const.MaxPatchVertices) {
      _mesa_glsl_error(&loc, state,
                       "per-vertex tessellation shader input arrays must be "
--- a/src/compiler/glsl/builtin_functions.cpp
+++ b/src/compiler/glsl/builtin_functions.cpp
@@ -3563,9 +3563,17 @@ builtin_builder::_tanh(const glsl_type *type)
   ir_variable *x = in_var(type, "x");
   MAKE_SIG(type, v130, 1, x);

+   /* Clamp x to [-10, +10] to avoid precision problems.
+    * When x > 10, e^(-x) is so small relative to e^x that it gets flushed to
+    * zero in the computation e^x + e^(-x). The same happens in the other
+    * direction when x < -10.
+    */
+   ir_variable *t = body.make_temp(type, "tmp");
+   body.emit(assign(t, min2(max2(x, imm(-10.0f)), imm(10.0f))));
+
   /* (e^x - e^(-x)) / (e^x + e^(-x)) */
-   body.emit(ret(div(sub(exp(x), exp(neg(x))),
-                     add(exp(x), exp(neg(x))))));
+   body.emit(ret(div(sub(exp(t), exp(neg(t))),
+                     add(exp(t), exp(neg(t))))));

   return sig;
 }
--- a/src/compiler/glsl/cache.c
+++ b/src/compiler/glsl/cache.c
@@ -612,19 +612,18 @@ cache_put(struct program_cache *cache,

   p_atomic_add(cache->size, size);

+ done:
+   if (fd_final != -1)
+      close(fd_final);
   /* This close finally releases the flock, (now that the final dile
    * has been renamed into place and the size has been added).
    */
-   close(fd);
-   fd = -1;
-
- done:
+   if (fd != -1)
+      close(fd);
   if (filename_tmp)
      ralloc_free(filename_tmp);
   if (filename)
      ralloc_free(filename);
-   if (fd != -1)
-      close(fd);
 }

 void *
--- a/src/compiler/glsl/glcpp/glcpp-parse.y
+++ b/src/compiler/glsl/glcpp/glcpp-parse.y
@@ -176,7 +176,7 @@ add_builtin_define(glcpp_parser_t *parser, const char *name, int value);
         * (such as the <HASH> and <DEFINE> start conditions in the lexer). */
 %token DEFINED ELIF_EXPANDED HASH_TOKEN DEFINE_TOKEN FUNC_IDENTIFIER OBJ_IDENTIFIER ELIF ELSE ENDIF ERROR_TOKEN IF IFDEF IFNDEF LINE PRAGMA UNDEF VERSION_TOKEN GARBAGE IDENTIFIER IF_EXPANDED INTEGER INTEGER_STRING LINE_EXPANDED NEWLINE OTHER PLACEHOLDER SPACE PLUS_PLUS MINUS_MINUS
 %token PASTE
-%type <ival> INTEGER operator SPACE integer_constant
+%type <ival> INTEGER operator SPACE integer_constant version_constant
 %type <expression_value> expression
 %type <str> IDENTIFIER FUNC_IDENTIFIER OBJ_IDENTIFIER INTEGER_STRING OTHER ERROR_TOKEN PRAGMA
 %type <string_list> identifier_list
@@ -424,14 +424,14 @@ control_line_success:
 |	HASH_TOKEN ENDIF {
 		_glcpp_parser_skip_stack_pop (parser, & @1);
 	} NEWLINE
-|	HASH_TOKEN VERSION_TOKEN integer_constant NEWLINE {
-		if (parser->version != 0) {
+|	HASH_TOKEN VERSION_TOKEN version_constant NEWLINE {
+		if (parser->version_set) {
 			glcpp_error(& @1, parser, "#version must appear on the first line");
 		}
 		_glcpp_parser_handle_version_declaration(parser, $3, NULL, true);
 	}
-|	HASH_TOKEN VERSION_TOKEN integer_constant IDENTIFIER NEWLINE {
-		if (parser->version != 0) {
+|	HASH_TOKEN VERSION_TOKEN version_constant IDENTIFIER NEWLINE {
+		if (parser->version_set) {
 			glcpp_error(& @1, parser, "#version must appear on the first line");
 		}
 		_glcpp_parser_handle_version_declaration(parser, $3, $4, true);
@@ -470,6 +470,17 @@ integer_constant:
 		$$ = $1;
 	}

+version_constant:
+	INTEGER_STRING {
+	   /* Both octal and hexadecimal constants begin with 0. */
+	   if ($1[0] == '0' && $1[1] != '\0') {
+		glcpp_error(&@1, parser, "invalid #version \"%s\" (not a decimal constant)", $1);
+		$$ = 0;
+	   } else {
+		$$ = strtoll($1, NULL, 10);
+	   }
+	}
+
 expression:
 	integer_constant {
 		$$.value = $1;
@@ -1376,6 +1387,7 @@ glcpp_parser_create(glcpp_extension_iterator extensions, void *state, gl_api api
   parser->state = state;
   parser->api = api;
   parser->version = 0;
+   parser->version_set = false;

   parser->has_new_line_number = 0;
   parser->new_line_number = 1;
@@ -2318,10 +2330,11 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
                                         const char *es_identifier,
                                         bool explicitly_set)
 {
-   if (parser->version != 0)
+   if (parser->version_set)
      return;

   parser->version = version;
+   parser->version_set = true;

   add_builtin_define (parser, "__VERSION__", version);

--- a/src/compiler/glsl/glcpp/glcpp.h
+++ b/src/compiler/glsl/glcpp/glcpp.h
@@ -207,6 +207,15 @@ struct glcpp_parser {
 	void *state;
 	gl_api api;
 	unsigned version;
+
+	/**
+	 * Has the #version been set?
+	 *
+	 * A separate flag is used because any possible sentinel value in
+	 * \c ::version could also be set by a #version line.
+	 */
+	bool version_set;
+
 	bool has_new_line_number;
 	int new_line_number;
 	bool has_new_source_number;
--- a/src/compiler/glsl/glsl_lexer.ll
+++ b/src/compiler/glsl/glsl_lexer.ll
@@ -253,6 +253,10 @@ HASH		^{SPC}#{SPC}
 				    yylval->n = strtol(yytext, NULL, 10);
 				    return INTCONSTANT;
 				}
+<PP>0				{
+				    yylval->n = 0;
+				    return INTCONSTANT;
+				}
 <PP>\n				{ BEGIN 0; yylineno++; yycolumn = 0; return EOL; }
 <PP>.				{ return yytext[0]; }

--- a/src/compiler/glsl/glsl_symbol_table.cpp
+++ b/src/compiler/glsl/glsl_symbol_table.cpp
@@ -126,7 +126,7 @@ void glsl_symbol_table::pop_scope()

 bool glsl_symbol_table::name_declared_this_scope(const char *name)
 {
-   return _mesa_symbol_table_symbol_scope(table, -1, name) == 0;
+   return _mesa_symbol_table_symbol_scope(table, name) == 0;
 }

 bool glsl_symbol_table::add_variable(ir_variable *v)
@@ -152,7 +152,7 @@ bool glsl_symbol_table::add_variable(ir_variable *v)
 	 symbol_table_entry *entry = new(mem_ctx) symbol_table_entry(v);
 	 if (existing != NULL)
 	    entry->f = existing->f;
-	 int added = _mesa_symbol_table_add_symbol(table, -1, v->name, entry);
+	 int added = _mesa_symbol_table_add_symbol(table, v->name, entry);
 	 assert(added == 0);
 	 (void)added;
 	 return true;
@@ -162,13 +162,13 @@ bool glsl_symbol_table::add_variable(ir_variable *v)

   /* 1.20+ rules: */
   symbol_table_entry *entry = new(mem_ctx) symbol_table_entry(v);
-   return _mesa_symbol_table_add_symbol(table, -1, v->name, entry) == 0;
+   return _mesa_symbol_table_add_symbol(table, v->name, entry) == 0;
 }

 bool glsl_symbol_table::add_type(const char *name, const glsl_type *t)
 {
   symbol_table_entry *entry = new(mem_ctx) symbol_table_entry(t);
-   return _mesa_symbol_table_add_symbol(table, -1, name, entry) == 0;
+   return _mesa_symbol_table_add_symbol(table, name, entry) == 0;
 }

 bool glsl_symbol_table::add_interface(const char *name, const glsl_type *i,
@@ -180,7 +180,7 @@ bool glsl_symbol_table::add_interface(const char *name, const glsl_type *i,
      symbol_table_entry *entry =
         new(mem_ctx) symbol_table_entry(i, mode);
      bool add_interface_symbol_result =
-         _mesa_symbol_table_add_symbol(table, -1, name, entry) == 0;
+         _mesa_symbol_table_add_symbol(table, name, entry) == 0;
      assert(add_interface_symbol_result);
      return add_interface_symbol_result;
   } else {
@@ -199,7 +199,7 @@ bool glsl_symbol_table::add_function(ir_function *f)
      }
   }
   symbol_table_entry *entry = new(mem_ctx) symbol_table_entry(f);
-   return _mesa_symbol_table_add_symbol(table, -1, f->name, entry) == 0;
+   return _mesa_symbol_table_add_symbol(table, f->name, entry) == 0;
 }

 bool glsl_symbol_table::add_default_precision_qualifier(const char *type_name,
@@ -213,13 +213,16 @@ bool glsl_symbol_table::add_default_precision_qualifier(const char *type_name,
   symbol_table_entry *entry =
      new(mem_ctx) symbol_table_entry(default_specifier);

-   return _mesa_symbol_table_add_symbol(table, -1, name, entry) == 0;
+   if (!get_entry(name))
+      return _mesa_symbol_table_add_symbol(table, name, entry) == 0;
+
+   return _mesa_symbol_table_replace_symbol(table, name, entry) == 0;
 }

 void glsl_symbol_table::add_global_function(ir_function *f)
 {
   symbol_table_entry *entry = new(mem_ctx) symbol_table_entry(f);
-   int added = _mesa_symbol_table_add_global_symbol(table, -1, f->name, entry);
+   int added = _mesa_symbol_table_add_global_symbol(table, f->name, entry);
   assert(added == 0);
   (void)added;
 }
@@ -261,7 +264,7 @@ int glsl_symbol_table::get_default_precision_qualifier(const char *type_name)
 symbol_table_entry *glsl_symbol_table::get_entry(const char *name)
 {
   return (symbol_table_entry *)
-      _mesa_symbol_table_find_symbol(table, -1, name);
+      _mesa_symbol_table_find_symbol(table, name);
 }

 void
--- a/src/compiler/glsl/ir.h
+++ b/src/compiler/glsl/ir.h
@@ -832,6 +832,12 @@ public:

      unsigned implicit_sized_array:1;

+      /**
+       * Is this a non-patch TCS output / TES input array that was implicitly
+       * sized to gl_MaxPatchVertices?
+       */
+      unsigned tess_varying_implicit_sized_array:1;
+
      /**
       * Whether this is a fragment shader output implicitly initialized with
       * the previous contents of the specified render target at the
--- a/src/compiler/glsl/ir_print_visitor.cpp
+++ b/src/compiler/glsl/ir_print_visitor.cpp
@@ -130,14 +130,14 @@ ir_print_visitor::unique_name(ir_variable *var)

   /* If there's no conflict, just use the original name */
   const char* name = NULL;
-   if (_mesa_symbol_table_find_symbol(this->symbols, -1, var->name) == NULL) {
+   if (_mesa_symbol_table_find_symbol(this->symbols, var->name) == NULL) {
      name = var->name;
   } else {
      static unsigned i = 1;
      name = ralloc_asprintf(this->mem_ctx, "%s@%u", var->name, ++i);
   }
   _mesa_hash_table_insert(this->printable_names, var, (void *) name);
-   _mesa_symbol_table_add_symbol(this->symbols, -1, name, var);
+   _mesa_symbol_table_add_symbol(this->symbols, name, var);
   return name;
 }

--- a/src/compiler/glsl/link_uniforms.cpp
+++ b/src/compiler/glsl/link_uniforms.cpp
@@ -633,6 +633,8 @@ private:
         uniform->opaque[shader_type].index = this->next_subroutine;
         uniform->opaque[shader_type].active = true;

+         prog->_LinkedShaders[shader_type]->NumSubroutineUniforms++;
+
         /* Increment the subroutine index by 1 for non-arrays and by the
          * number of array elements for arrays.
          */
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -181,7 +181,43 @@ private:
 };


-class array_resize_visitor : public ir_hierarchical_visitor {
+/**
+ * A visitor helper that provides methods for updating the types of
+ * ir_dereferences.  Classes that update variable types (say, updating
+ * array sizes) will want to use this so that dereference types stay in sync.
+ */
+class deref_type_updater : public ir_hierarchical_visitor {
+public:
+   virtual ir_visitor_status visit(ir_dereference_variable *ir)
+   {
+      ir->type = ir->var->type;
+      return visit_continue;
+   }
+
+   virtual ir_visitor_status visit_leave(ir_dereference_array *ir)
+   {
+      const glsl_type *const vt = ir->array->type;
+      if (vt->is_array())
+         ir->type = vt->fields.array;
+      return visit_continue;
+   }
+
+   virtual ir_visitor_status visit_leave(ir_dereference_record *ir)
+   {
+      for (unsigned i = 0; i < ir->record->type->length; i++) {
+         const struct glsl_struct_field *field =
+            &ir->record->type->fields.structure[i];
+         if (strcmp(field->name, ir->field) == 0) {
+            ir->type = field->type;
+            break;
+         }
+      }
+      return visit_continue;
+   }
+};
+
+
+class array_resize_visitor : public deref_type_updater {
 public:
   unsigned num_vertices;
   gl_shader_program *prog;
@@ -240,24 +276,6 @@ public:

      return visit_continue;
   }
-
-   /* Dereferences of input variables need to be updated so that their type
-    * matches the newly assigned type of the variable they are accessing. */
-   virtual ir_visitor_status visit(ir_dereference_variable *ir)
-   {
-      ir->type = ir->var->type;
-      return visit_continue;
-   }
-
-   /* Dereferences of 2D input arrays need to be updated so that their type
-    * matches the newly assigned type of the array they are accessing. */
-   virtual ir_visitor_status visit_leave(ir_dereference_array *ir)
-   {
-      const glsl_type *const vt = ir->array->type;
-      if (vt->is_array())
-         ir->type = vt->fields.array;
-      return visit_continue;
-   }
 };

 /**
@@ -1353,7 +1371,7 @@ move_non_declarations(exec_list *instructions, exec_node *last,
 * it inside that function leads to compiler warnings with some versions of
 * gcc.
 */
-class array_sizing_visitor : public ir_hierarchical_visitor {
+class array_sizing_visitor : public deref_type_updater {
 public:
   array_sizing_visitor()
      : mem_ctx(ralloc_context(NULL)),
@@ -2273,6 +2291,8 @@ update_array_sizes(struct gl_shader_program *prog)
         if (prog->_LinkedShaders[i] == NULL)
            continue;

+      bool types_were_updated = false;
+
      foreach_in_list(ir_instruction, node, prog->_LinkedShaders[i]->ir) {
         ir_variable *const var = node->as_variable();

@@ -2328,11 +2348,15 @@ update_array_sizes(struct gl_shader_program *prog)

            var->type = glsl_type::get_array_instance(var->type->fields.array,
                                                      size + 1);
-            /* FINISHME: We should update the types of array
-             * dereferences of this variable now.
-             */
+            types_were_updated = true;
         }
      }
+
+      /* Update the types of dereferences in case we changed any. */
+      if (types_were_updated) {
+         deref_type_updater v;
+         v.run(prog->_LinkedShaders[i]->ir);
+      }
   }
 }

@@ -3094,7 +3118,6 @@ link_calculate_subroutine_compat(struct gl_shader_program *prog)
         if (!uni)
            continue;

-         sh->NumSubroutineUniforms++;
         count = 0;
         if (sh->NumSubroutineFunctions == 0) {
            linker_error(prog, "subroutine uniform %s defined but no valid functions found\n", uni->type->name);
@@ -3574,6 +3597,7 @@ static gl_shader_variable *
 create_shader_variable(struct gl_shader_program *shProg,
                       const ir_variable *in,
                       const char *name, const glsl_type *type,
+                       const glsl_type *interface_type,
                       bool use_implicit_location, int location,
                       const glsl_type *outermost_struct_type)
 {
@@ -3631,7 +3655,7 @@ create_shader_variable(struct gl_shader_program *shProg,

   out->type = type;
   out->outermost_struct_type = outermost_struct_type;
-   out->interface_type = in->get_interface_type();
+   out->interface_type = interface_type;
   out->component = in->data.location_frac;
   out->index = in->data.index;
   out->patch = in->data.patch;
@@ -3643,8 +3667,21 @@ create_shader_variable(struct gl_shader_program *shProg,
   return out;
 }

+static const glsl_type *
+resize_to_max_patch_vertices(const struct gl_context *ctx,
+                             const glsl_type *type)
+{
+   if (!type)
+      return NULL;
+
+   return glsl_type::get_array_instance(type->fields.array,
+                                        ctx->Const.MaxPatchVertices);
+}
+
 static bool
-add_shader_variable(struct gl_shader_program *shProg, struct set *resource_set,
+add_shader_variable(const struct gl_context *ctx,
+                    struct gl_shader_program *shProg,
+                    struct set *resource_set,
                    unsigned stage_mask,
                    GLenum programInterface, ir_variable *var,
                    const char *name, const glsl_type *type,
@@ -3673,7 +3710,7 @@ add_shader_variable(struct gl_shader_program *shProg, struct set *resource_set,
      for (unsigned i = 0; i < type->length; i++) {
         const struct glsl_struct_field *field = &type->fields.structure[i];
         char *field_name = ralloc_asprintf(shProg, "%s.%s", name, field->name);
-         if (!add_shader_variable(shProg, resource_set,
+         if (!add_shader_variable(ctx, shProg, resource_set,
                                  stage_mask, programInterface,
                                  var, field_name, field->type,
                                  use_implicit_location, field_location,
@@ -3687,6 +3724,29 @@ add_shader_variable(struct gl_shader_program *shProg, struct set *resource_set,
   }

   default: {
+      const glsl_type *interface_type = var->get_interface_type();
+
+      /* Unsized (non-patch) TCS output/TES input arrays are implicitly
+       * sized to gl_MaxPatchVertices.  Internally, we shrink them to a
+       * smaller size.
+       *
+       * This can cause trouble with SSO programs.  Since the TCS declares
+       * the number of output vertices, we can always shrink TCS output
+       * arrays.  However, the TES might not be linked with a TCS, in
+       * which case it won't know the size of the patch.  In other words,
+       * the TCS and TES may disagree on the (smaller) array sizes.  This
+       * can result in the resource names differing across stages, causing
+       * SSO validation failures and other cascading issues.
+       *
+       * Expanding the array size to the full gl_MaxPatchVertices fixes
+       * these issues.  It's also what program interface queries expect,
+       * as that is the official size of the array.
+       */
+      if (var->data.tess_varying_implicit_sized_array) {
+         type = resize_to_max_patch_vertices(ctx, type);
+         interface_type = resize_to_max_patch_vertices(ctx, interface_type);
+      }
+
      /* Issue #16 of the ARB_program_interface_query spec says:
       *
       * "* If a variable is a member of an interface block without an
@@ -3699,8 +3759,7 @@ add_shader_variable(struct gl_shader_program *shProg, struct set *resource_set,
       */
      const char *prefixed_name = (var->data.from_named_ifc_block &&
                                   !is_gl_identifier(var->name))
-         ? ralloc_asprintf(shProg, "%s.%s", var->get_interface_type()->name,
-                           name)
+         ? ralloc_asprintf(shProg, "%s.%s", interface_type->name, name)
         : name;

      /* The ARB_program_interface_query spec says:
@@ -3711,6 +3770,7 @@ add_shader_variable(struct gl_shader_program *shProg, struct set *resource_set,
       */
      gl_shader_variable *sha_v =
         create_shader_variable(shProg, var, prefixed_name, type,
+                                interface_type,
                                use_implicit_location, location,
                                outermost_struct_type);
      if (!sha_v)
@@ -3723,7 +3783,8 @@ add_shader_variable(struct gl_shader_program *shProg, struct set *resource_set,
 }

 static bool
-add_interface_variables(struct gl_shader_program *shProg,
+add_interface_variables(const struct gl_context *ctx,
+                        struct gl_shader_program *shProg,
                        struct set *resource_set,
                        unsigned stage, GLenum programInterface)
 {
@@ -3774,7 +3835,7 @@ add_interface_variables(struct gl_shader_program *shProg,
         (stage == MESA_SHADER_VERTEX && var->data.mode == ir_var_shader_in) ||
         (stage == MESA_SHADER_FRAGMENT && var->data.mode == ir_var_shader_out);

-      if (!add_shader_variable(shProg, resource_set,
+      if (!add_shader_variable(ctx, shProg, resource_set,
                               1 << stage, programInterface,
                               var, var->name, var->type, vs_input_or_fs_output,
                               var->data.location - loc_bias))
@@ -3784,7 +3845,9 @@ add_interface_variables(struct gl_shader_program *shProg,
 }

 static bool
-add_packed_varyings(struct gl_shader_program *shProg, struct set *resource_set,
+add_packed_varyings(const struct gl_context *ctx,
+                    struct gl_shader_program *shProg,
+                    struct set *resource_set,
                    int stage, GLenum type)
 {
   struct gl_linked_shader *sh = shProg->_LinkedShaders[stage];
@@ -3810,7 +3873,7 @@ add_packed_varyings(struct gl_shader_program *shProg, struct set *resource_set,
         if (type == iface) {
            const int stage_mask =
               build_stageref(shProg, var->name, var->data.mode);
-            if (!add_shader_variable(shProg, resource_set,
+            if (!add_shader_variable(ctx, shProg, resource_set,
                                     stage_mask,
                                     iface, var, var->name, var->type, false,
                                     var->data.location - VARYING_SLOT_VAR0))
@@ -3822,7 +3885,9 @@ add_packed_varyings(struct gl_shader_program *shProg, struct set *resource_set,
 }

 static bool
-add_fragdata_arrays(struct gl_shader_program *shProg, struct set *resource_set)
+add_fragdata_arrays(const struct gl_context *ctx,
+                    struct gl_shader_program *shProg,
+                    struct set *resource_set)
 {
   struct gl_linked_shader *sh = shProg->_LinkedShaders[MESA_SHADER_FRAGMENT];

@@ -3834,7 +3899,7 @@ add_fragdata_arrays(struct gl_shader_program *shProg, struct set *resource_set)
      if (var) {
         assert(var->data.mode == ir_var_shader_out);

-         if (!add_shader_variable(shProg, resource_set,
+         if (!add_shader_variable(ctx, shProg, resource_set,
                                  1 << MESA_SHADER_FRAGMENT,
                                  GL_PROGRAM_OUTPUT, var, var->name, var->type,
                                  true, var->data.location - FRAG_RESULT_DATA0))
@@ -4093,24 +4158,24 @@ build_program_resource_list(struct gl_context *ctx,

   /* Program interface needs to expose varyings in case of SSO. */
   if (shProg->SeparateShader) {
-      if (!add_packed_varyings(shProg, resource_set,
+      if (!add_packed_varyings(ctx, shProg, resource_set,
                               input_stage, GL_PROGRAM_INPUT))
         return;

-      if (!add_packed_varyings(shProg, resource_set,
+      if (!add_packed_varyings(ctx, shProg, resource_set,
                               output_stage, GL_PROGRAM_OUTPUT))
         return;
   }

-   if (!add_fragdata_arrays(shProg, resource_set))
+   if (!add_fragdata_arrays(ctx, shProg, resource_set))
      return;

   /* Add inputs and outputs to the resource list. */
-   if (!add_interface_variables(shProg, resource_set,
+   if (!add_interface_variables(ctx, shProg, resource_set,
                                input_stage, GL_PROGRAM_INPUT))
      return;

-   if (!add_interface_variables(shProg, resource_set,
+   if (!add_interface_variables(ctx, shProg, resource_set,
                                output_stage, GL_PROGRAM_OUTPUT))
      return;

@@ -4743,14 +4808,6 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
                   "type of shader\n");
   }

-   for (unsigned int i = 0; i < MESA_SHADER_STAGES; i++) {
-      if (prog->_LinkedShaders[i] != NULL) {
-         _mesa_delete_linked_shader(ctx, prog->_LinkedShaders[i]);
-      }
-
-      prog->_LinkedShaders[i] = NULL;
-   }
-
   /* Link all shaders for a particular stage and validate the result.
    */
   for (int stage = 0; stage < MESA_SHADER_STAGES; stage++) {
--- a/src/compiler/glsl/lower_blend_equation_advanced.cpp
+++ b/src/compiler/glsl/lower_blend_equation_advanced.cpp
@@ -308,12 +308,18 @@ calc_blend_result(ir_factory f,
   f.emit(assign(dst_alpha, swizzle_w(fb)));
   f.emit(if_tree(equal(dst_alpha, imm1(0)),
                     assign(dst_rgb, imm3(0)),
-                     assign(dst_rgb, div(swizzle_xyz(fb), dst_alpha))));
+                     assign(dst_rgb, csel(equal(swizzle_xyz(fb),
+                                                swizzle(fb, SWIZZLE_WWWW, 3)),
+                                          imm3(1),
+                                          div(swizzle_xyz(fb), dst_alpha)))));

   f.emit(assign(src_alpha, swizzle_w(src)));
   f.emit(if_tree(equal(src_alpha, imm1(0)),
                     assign(src_rgb, imm3(0)),
-                     assign(src_rgb, div(swizzle_xyz(src), src_alpha))));
+                     assign(src_rgb, csel(equal(swizzle_xyz(src),
+                                                swizzle(src, SWIZZLE_WWWW, 3)),
+                                          imm3(1),
+                                          div(swizzle_xyz(src), src_alpha)))));

   ir_variable *factor = f.make_temp(glsl_type::vec3_type, "__blend_factor");

--- a/src/compiler/glsl/lower_named_interface_blocks.cpp
+++ b/src/compiler/glsl/lower_named_interface_blocks.cpp
@@ -193,6 +193,8 @@ flatten_named_interface_blocks_declarations::run(exec_list *instructions)
            new_var->data.patch = iface_t->fields.structure[i].patch;
            new_var->data.stream = var->data.stream;
            new_var->data.how_declared = var->data.how_declared;
+            new_var->data.tess_varying_implicit_sized_array =
+               var->data.tess_varying_implicit_sized_array;
            new_var->data.from_named_ifc_block = 1;

            new_var->init_interface_type(var->type);
--- a/src/compiler/glsl/lower_output_reads.cpp
+++ b/src/compiler/glsl/lower_output_reads.cpp
@@ -157,7 +157,6 @@ ir_visitor_status
 output_read_remover::visit_leave(ir_emit_vertex *ir)
 {
   hash_table_call_foreach(replacements, emit_return_copy, ir);
-   _mesa_hash_table_clear(replacements, NULL);
   return visit_continue;
 }

--- a/src/compiler/glsl/lower_ubo_reference.cpp
+++ b/src/compiler/glsl/lower_ubo_reference.cpp
@@ -107,7 +107,6 @@ public:

   struct gl_linked_shader *shader;
   bool clamp_block_indices;
-   struct gl_uniform_buffer_variable *ubo_var;
   const struct glsl_struct_field *struct_field;
   ir_variable *variable;
   ir_rvalue *uniform_block;
@@ -308,8 +307,11 @@ lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx,
            this->uniform_block = index;
         }

-         this->ubo_var = var->is_interface_instance()
-            ? &blocks[i]->Uniforms[0] : &blocks[i]->Uniforms[var->data.location];
+         if (var->is_interface_instance()) {
+            *const_offset = 0;
+         } else {
+            *const_offset = blocks[i]->Uniforms[var->data.location].Offset;
+         }

         break;
      }
@@ -317,8 +319,6 @@ lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx,

   assert(this->uniform_block);

-   *const_offset = ubo_var->Offset;
-
   this->struct_field = NULL;
   setup_buffer_access(mem_ctx, deref, offset, const_offset, row_major,
                       matrix_columns, &this->struct_field, packing);
--- a/src/compiler/glsl/opt_function_inlining.cpp
+++ b/src/compiler/glsl/opt_function_inlining.cpp
@@ -128,7 +128,7 @@ ir_call::generate_inline(ir_instruction *next_ir)
 	 parameters[i] = NULL;
      } else {
 	 parameters[i] = sig_param->clone(ctx, ht);
-	 parameters[i]->data.mode = ir_var_auto;
+	 parameters[i]->data.mode = ir_var_temporary;

 	 /* Remove the read-only decoration because we're going to write
 	  * directly to this variable.  If the cloned variable is left
--- a/src/compiler/glsl/opt_minmax.cpp
+++ b/src/compiler/glsl/opt_minmax.cpp
@@ -355,7 +355,7 @@ ir_minmax_visitor::prune_expression(ir_expression *expr, minmax_range baserange)
          */
         if (!is_redundant && limits[i].low && baserange.high) {
            cr = compare_components(limits[i].low, baserange.high);
-            if (cr >= EQUAL && cr != MIXED)
+            if (cr > EQUAL && cr != MIXED)
               is_redundant = true;
         }
      } else {
@@ -373,7 +373,7 @@ ir_minmax_visitor::prune_expression(ir_expression *expr, minmax_range baserange)
          */
         if (!is_redundant && limits[i].high && baserange.low) {
            cr = compare_components(limits[i].high, baserange.low);
-            if (cr <= EQUAL)
+            if (cr < EQUAL)
               is_redundant = true;
         }
      }
--- a/src/compiler/glsl/standalone.cpp
+++ b/src/compiler/glsl/standalone.cpp
@@ -421,7 +421,7 @@ standalone_compile_shader(const struct standalone_options *_options,
   }

   if ((status == EXIT_SUCCESS) && options->do_link)  {
-      _mesa_clear_shader_program_data(whole_program);
+      _mesa_clear_shader_program_data(ctx, whole_program);

      link_shaders(ctx, whole_program);
      status = (whole_program->LinkStatus) ? EXIT_SUCCESS : EXIT_FAILURE;
--- a/src/compiler/glsl/standalone_scaffolding.cpp
+++ b/src/compiler/glsl/standalone_scaffolding.cpp
@@ -123,8 +123,16 @@ _mesa_delete_linked_shader(struct gl_context *ctx,
 }

 void
-_mesa_clear_shader_program_data(struct gl_shader_program *shProg)
+_mesa_clear_shader_program_data(struct gl_context *ctx,
+                                struct gl_shader_program *shProg)
 {
+   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+      if (shProg->_LinkedShaders[i] != NULL) {
+         _mesa_delete_linked_shader(ctx, shProg->_LinkedShaders[i]);
+         shProg->_LinkedShaders[i] = NULL;
+      }
+   }
+
   shProg->NumUniformStorage = 0;
   shProg->UniformStorage = NULL;
   shProg->NumUniformRemapTable = 0;
--- a/src/compiler/glsl/standalone_scaffolding.h
+++ b/src/compiler/glsl/standalone_scaffolding.h
@@ -56,7 +56,8 @@ _mesa_delete_linked_shader(struct gl_context *ctx,
                           struct gl_linked_shader *sh);

 extern "C" void
-_mesa_clear_shader_program_data(struct gl_shader_program *);
+_mesa_clear_shader_program_data(struct gl_context *ctx,
+                                struct gl_shader_program *);

 extern "C" void
 _mesa_shader_debug(struct gl_context *ctx, GLenum type, GLuint *id,
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2625,6 +2625,8 @@ bool nir_opt_remove_phis(nir_shader *shader);

 bool nir_opt_undef(nir_shader *shader);

+bool nir_opt_conditional_discard(nir_shader *shader);
+
 void nir_sweep(nir_shader *shader);

 nir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val);
--- a/src/compiler/nir/nir_lower_wpos_ytransform.c
+++ b/src/compiler/nir/nir_lower_wpos_ytransform.c
@@ -272,6 +272,26 @@ lower_interp_var_at_offset(lower_wpos_ytransform_state *state,
                                                     flip_y)));
 }

+static void
+lower_load_sample_pos(lower_wpos_ytransform_state *state,
+                      nir_intrinsic_instr *intr)
+{
+   nir_builder *b = &state->b;
+   b->cursor = nir_after_instr(&intr->instr);
+
+   nir_ssa_def *pos = &intr->dest.ssa;
+   nir_ssa_def *scale = nir_channel(b, get_transform(state), 0);
+   nir_ssa_def *neg_scale = nir_channel(b, get_transform(state), 2);
+   /* Either y or 1-y for scale equal to 1 or -1 respectively. */
+   nir_ssa_def *flipped_y =
+               nir_fadd(b, nir_fmax(b, neg_scale, nir_imm_float(b, 0.0)),
+                        nir_fmul(b, nir_channel(b, pos, 1), scale));
+   nir_ssa_def *flipped_pos = nir_vec2(b, nir_channel(b, pos, 0), flipped_y);
+
+   nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, nir_src_for_ssa(flipped_pos),
+                                  flipped_pos->parent_instr);
+}
+
 static void
 lower_wpos_ytransform_block(lower_wpos_ytransform_state *state, nir_block *block)
 {
@@ -287,6 +307,10 @@ lower_wpos_ytransform_block(lower_wpos_ytransform_state *state, nir_block *block
               /* gl_FragCoord should not have array/struct deref's: */
               assert(dvar->deref.child == NULL);
               lower_fragcoord(state, intr);
+            } else if (var->data.mode == nir_var_system_value &&
+                       var->data.location == SYSTEM_VALUE_SAMPLE_POS) {
+               assert(dvar->deref.child == NULL);
+               lower_load_sample_pos(state, intr);
            }
         } else if (intr->intrinsic == nir_intrinsic_interp_var_at_offset) {
            lower_interp_var_at_offset(state, intr);
--- a/src/compiler/nir/nir_opt_conditional_discard.c
+++ b/src/compiler/nir/nir_opt_conditional_discard.c
@@ -0,0 +1,125 @@
+/*
+ * Copyright © 2016 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+/** @file nir_opt_conditional_discard.c
+ *
+ * Handles optimization of lowering if (cond) discard to discard_if(cond).
+ */
+
+static bool
+nir_opt_conditional_discard_block(nir_block *block, void *mem_ctx)
+{
+   nir_builder bld;
+
+   if (nir_cf_node_is_first(&block->cf_node))
+      return false;
+
+   nir_cf_node *prev_node = nir_cf_node_prev(&block->cf_node);
+   if (prev_node->type != nir_cf_node_if)
+      return false;
+
+   nir_if *if_stmt = nir_cf_node_as_if(prev_node);
+   nir_block *then_block = nir_if_first_then_block(if_stmt);
+   nir_block *else_block = nir_if_first_else_block(if_stmt);
+
+   /* check there is only one else block and it is empty */
+   if (nir_if_last_else_block(if_stmt) != else_block)
+      return false;
+   if (!exec_list_is_empty(&else_block->instr_list))
+      return false;
+
+   /* check there is only one then block and it has only one instruction in it */
+   if (nir_if_last_then_block(if_stmt) != then_block)
+      return false;
+   if (exec_list_is_empty(&then_block->instr_list))
+      return false;
+   if (exec_list_length(&then_block->instr_list) > 1)
+      return false;
+   /*
+    * make sure no subsequent phi nodes point at this if.
+    */
+   nir_block *after = nir_cf_node_as_block(nir_cf_node_next(&if_stmt->cf_node));
+   nir_foreach_instr_safe(instr, after) {
+      if (instr->type != nir_instr_type_phi)
+         break;
+      nir_phi_instr *phi = nir_instr_as_phi(instr);
+
+      nir_foreach_phi_src(phi_src, phi) {
+         if (phi_src->pred == then_block ||
+             phi_src->pred == else_block)
+            return false;
+      }
+   }
+
+   /* Get the first instruction in the then block and confirm it is
+    * a discard or a discard_if
+    */
+   nir_instr *instr = nir_block_first_instr(then_block);
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
+
+   nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+   if (intrin->intrinsic != nir_intrinsic_discard &&
+       intrin->intrinsic != nir_intrinsic_discard_if)
+      return false;
+
+   nir_src cond;
+
+   nir_builder_init(&bld, mem_ctx);
+   bld.cursor = nir_before_cf_node(prev_node);
+   if (intrin->intrinsic == nir_intrinsic_discard)
+      cond = if_stmt->condition;
+   else
+      cond = nir_src_for_ssa(nir_iand(&bld,
+                                      nir_ssa_for_src(&bld, if_stmt->condition, 1),
+                                      nir_ssa_for_src(&bld, intrin->src[0], 1)));
+
+   nir_intrinsic_instr *discard_if =
+      nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_discard_if);
+   nir_src_copy(&discard_if->src[0], &cond, discard_if);
+
+   nir_instr_insert_before_cf(prev_node, &discard_if->instr);
+   nir_instr_remove(&intrin->instr);
+   nir_cf_node_remove(&if_stmt->cf_node);
+
+   return true;
+}
+
+bool
+nir_opt_conditional_discard(nir_shader *shader)
+{
+   bool progress = false;
+
+   nir_foreach_function(function, shader) {
+      if (function->impl) {
+         void *mem_ctx = ralloc_parent(function->impl);
+         nir_foreach_block_safe(block, function->impl) {
+            progress |= nir_opt_conditional_discard_block(block, mem_ctx);
+         }
+      }
+   }
+   return progress;
+}
--- a/src/compiler/nir/nir_opt_undef.c
+++ b/src/compiler/nir/nir_opt_undef.c
@@ -86,17 +86,15 @@ opt_undef_vecN(nir_builder *b, nir_alu_instr *alu)

   assert(alu->dest.dest.is_ssa);

-   unsigned num_components = nir_op_infos[alu->op].num_inputs;
-
-   for (unsigned i = 0; i < num_components; i++) {
+   for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
      if (!alu->src[i].src.is_ssa ||
          alu->src[i].src.ssa->parent_instr->type != nir_instr_type_ssa_undef)
         return false;
   }

   b->cursor = nir_before_instr(&alu->instr);
-   nir_ssa_def *undef =
-      nir_ssa_undef(b, num_components, nir_dest_bit_size(alu->dest.dest));
+   nir_ssa_def *undef = nir_ssa_undef(b, alu->dest.dest.ssa.num_components,
+                                      nir_dest_bit_size(alu->dest.dest));
   nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(undef));

   return true;
--- a/src/compiler/nir/nir_search.c
+++ b/src/compiler/nir/nir_search.c
@@ -98,6 +98,16 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
 {
   uint8_t new_swizzle[4];

+   /* Searching only works on SSA values because, if it's not SSA, we can't
+    * know if the value changed between one instance of that value in the
+    * expression and another.  Also, the replace operation will place reads of
+    * that value right before the last instruction in the expression we're
+    * replacing so those reads will happen after the original reads and may
+    * not be valid if they're register reads.
+    */
+   if (!instr->src[src].src.is_ssa)
+      return false;
+
   /* If the source is an explicitly sized source, then we need to reset
    * both the number of components and the swizzle.
    */
@@ -116,9 +126,6 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,

   switch (value->type) {
   case nir_search_value_expression:
-      if (!instr->src[src].src.is_ssa)
-         return false;
-
      if (instr->src[src].src.ssa->parent_instr->type != nir_instr_type_alu)
         return false;

@@ -131,8 +138,7 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
      assert(var->variable < NIR_SEARCH_MAX_VARIABLES);

      if (state->variables_seen & (1 << var->variable)) {
-         if (!nir_srcs_equal(state->variables[var->variable].src,
-                             instr->src[src].src))
+         if (state->variables[var->variable].src.ssa != instr->src[src].src.ssa)
            return false;

         assert(!instr->src[src].abs && !instr->src[src].negate);
--- a/src/compiler/spirv/vtn_cfg.c
+++ b/src/compiler/spirv/vtn_cfg.c
@@ -527,12 +527,13 @@ vtn_handle_phi_second_pass(struct vtn_builder *b, SpvOp opcode,
   nir_variable *phi_var = phi_entry->data;

   for (unsigned i = 3; i < count; i += 2) {
-      struct vtn_ssa_value *src = vtn_ssa_value(b, w[i]);
      struct vtn_block *pred =
         vtn_value(b, w[i + 1], vtn_value_type_block)->block;

      b->nb.cursor = nir_after_instr(&pred->end_nop->instr);

+      struct vtn_ssa_value *src = vtn_ssa_value(b, w[i]);
+
      vtn_local_store(b, src, nir_deref_var_create(b, phi_var));
   }

--- a/src/compiler/spirv/vtn_glsl450.c
+++ b/src/compiler/spirv/vtn_glsl450.c
@@ -565,16 +565,21 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint,
                                   build_exp(nb, nir_fneg(nb, src[0]))));
      return;

-   case GLSLstd450Tanh:
-      /* (0.5 * (e^x - e^(-x))) / (0.5 * (e^x + e^(-x))) */
-      val->ssa->def =
-         nir_fdiv(nb, nir_fmul(nb, nir_imm_float(nb, 0.5f),
-                                   nir_fsub(nb, build_exp(nb, src[0]),
-                                                build_exp(nb, nir_fneg(nb, src[0])))),
-                      nir_fmul(nb, nir_imm_float(nb, 0.5f),
-                                   nir_fadd(nb, build_exp(nb, src[0]),
-                                                build_exp(nb, nir_fneg(nb, src[0])))));
+   case GLSLstd450Tanh: {
+      /* tanh(x) := (0.5 * (e^x - e^(-x))) / (0.5 * (e^x + e^(-x)))
+       *
+       * With a little algebra this reduces to (e^2x - 1) / (e^2x + 1)
+       *
+       * We clamp x to (-inf, +10] to avoid precision problems.  When x > 10,
+       * e^2x is so much larger than 1.0 that 1.0 gets flushed to zero in the
+       * computation e^2x +/- 1 so it can be ignored.
+       */
+      nir_ssa_def *x = nir_fmin(nb, src[0], nir_imm_float(nb, 10));
+      nir_ssa_def *exp2x = build_exp(nb, nir_fmul(nb, x, nir_imm_float(nb, 2)));
+      val->ssa->def = nir_fdiv(nb, nir_fsub(nb, exp2x, nir_imm_float(nb, 1)),
+                                   nir_fadd(nb, exp2x, nir_imm_float(nb, 1)));
      return;
+   }

   case GLSLstd450Asinh:
      val->ssa->def = nir_fmul(nb, nir_fsign(nb, src[0]),
--- a/src/compiler/spirv/vtn_variables.c
+++ b/src/compiler/spirv/vtn_variables.c
@@ -805,8 +805,12 @@ vtn_get_builtin_location(struct vtn_builder *b,
      set_mode_system_value(mode);
      break;
   case SpvBuiltInPrimitiveId:
-      *location = VARYING_SLOT_PRIMITIVE_ID;
-      *mode = nir_var_shader_out;
+      if (*mode == nir_var_shader_out) {
+         *location = VARYING_SLOT_PRIMITIVE_ID;
+      } else {
+         *location = SYSTEM_VALUE_PRIMITIVE_ID;
+         set_mode_system_value(mode);
+      }
      break;
   case SpvBuiltInInvocationId:
      *location = SYSTEM_VALUE_INVOCATION_ID;
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -241,6 +241,15 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,
            return NULL;
         break;

+      case __DRI_ATTRIB_MAX_PBUFFER_WIDTH:
+         _eglSetConfigKey(&base, EGL_MAX_PBUFFER_WIDTH,
+                          _EGL_MAX_PBUFFER_WIDTH);
+         break;
+      case __DRI_ATTRIB_MAX_PBUFFER_HEIGHT:
+         _eglSetConfigKey(&base, EGL_MAX_PBUFFER_HEIGHT,
+                          _EGL_MAX_PBUFFER_HEIGHT);
+         break;
+
      default:
         key = dri2_to_egl_attribute_map[attrib];
         if (key != 0)
--- a/src/egl/drivers/dri2/egl_dri2.h
+++ b/src/egl/drivers/dri2/egl_dri2.h
@@ -80,8 +80,6 @@
 #include "eglimage.h"
 #include "eglsync.h"

-#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
-
 struct wl_buffer;

 struct dri2_egl_driver
--- a/src/egl/drivers/dri2/egl_dri2_fallbacks.h
+++ b/src/egl/drivers/dri2/egl_dri2_fallbacks.h
@@ -66,7 +66,8 @@ dri2_fallback_swap_buffers_with_damage(_EGLDriver *drv, _EGLDisplay *dpy,
                                      _EGLSurface *surf,
                                      const EGLint *rects, EGLint n_rects)
 {
-   return EGL_FALSE;
+   struct dri2_egl_display *dri2_dpy = dri2_egl_display(dpy);
+   return dri2_dpy->vtbl->swap_buffers(drv, dpy, surf);
 }

 static inline EGLBoolean
--- a/src/egl/drivers/dri2/platform_android.c
+++ b/src/egl/drivers/dri2/platform_android.c
@@ -766,8 +766,6 @@ droid_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *dpy)
     EGL_NATIVE_VISUAL_TYPE, 0,
     EGL_FRAMEBUFFER_TARGET_ANDROID, EGL_TRUE,
     EGL_RECORDABLE_ANDROID, EGL_TRUE,
-     EGL_MAX_PBUFFER_WIDTH, _EGL_MAX_PBUFFER_WIDTH,
-     EGL_MAX_PBUFFER_HEIGHT, _EGL_MAX_PBUFFER_HEIGHT,
     EGL_NONE
   };
   unsigned int format_count[ARRAY_SIZE(visuals)] = { 0 };
--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -118,6 +118,13 @@ resize_callback(struct wl_egl_window *wl_win, void *data)
   (*dri2_dpy->flush->invalidate)(dri2_surf->dri_drawable);
 }

+static void
+destroy_window_callback(void *data)
+{
+   struct dri2_egl_surface *dri2_surf = data;
+   dri2_surf->wl_win = NULL;
+}
+
 /**
 * Called via eglCreateWindowSurface(), drv->API.CreateWindowSurface().
 */
@@ -159,6 +166,7 @@ dri2_wl_create_surface(_EGLDriver *drv, _EGLDisplay *disp,

   dri2_surf->wl_win->private = dri2_surf;
   dri2_surf->wl_win->resize_callback = resize_callback;
+   dri2_surf->wl_win->destroy_window_callback = destroy_window_callback;

   dri2_surf->base.Width =  -1;
   dri2_surf->base.Height = -1;
@@ -254,8 +262,11 @@ dri2_wl_destroy_surface(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf)
   if (dri2_surf->throttle_callback)
      wl_callback_destroy(dri2_surf->throttle_callback);

-   dri2_surf->wl_win->private = NULL;
-   dri2_surf->wl_win->resize_callback = NULL;
+   if (dri2_surf->wl_win) {
+      dri2_surf->wl_win->private = NULL;
+      dri2_surf->wl_win->resize_callback = NULL;
+      dri2_surf->wl_win->destroy_window_callback = NULL;
+   }

   free(surf);

@@ -1272,6 +1283,8 @@ dri2_initialize_wayland_drm(_EGLDriver *drv, _EGLDisplay *disp)
 cleanup_registry:
   wl_registry_destroy(dri2_dpy->wl_registry);
   wl_event_queue_destroy(dri2_dpy->wl_queue);
+   if (disp->PlatformDisplay == NULL)
+      wl_display_disconnect(dri2_dpy->wl_dpy);
 cleanup_dpy:
   free(dri2_dpy);
   disp->DriverData = NULL;
@@ -1731,6 +1744,8 @@ dri2_wl_swrast_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,
      dri2_surf->format = WL_SHM_FORMAT_ARGB8888;

   dri2_surf->wl_win = window;
+   dri2_surf->wl_win->private = dri2_surf;
+   dri2_surf->wl_win->destroy_window_callback = destroy_window_callback;

   dri2_surf->base.Width = -1;
   dri2_surf->base.Height = -1;
@@ -1913,6 +1928,8 @@ dri2_initialize_wayland_swrast(_EGLDriver *drv, _EGLDisplay *disp)
 cleanup_registry:
   wl_registry_destroy(dri2_dpy->wl_registry);
   wl_event_queue_destroy(dri2_dpy->wl_queue);
+   if (disp->PlatformDisplay == NULL)
+      wl_display_disconnect(dri2_dpy->wl_dpy);
 cleanup_dpy:
   free(dri2_dpy);
   disp->DriverData = NULL;
--- a/src/egl/main/eglapi.c
+++ b/src/egl/main/eglapi.c
@@ -734,7 +734,9 @@ eglCreateContext(EGLDisplay dpy, EGLConfig config, EGLContext share_list,

   _EGL_CHECK_DISPLAY(disp, EGL_NO_CONTEXT, drv);

-   if (!config && !disp->Extensions.KHR_no_config_context)
+   if (config != EGL_NO_CONFIG_KHR)
+      _EGL_CHECK_CONFIG(disp, conf, EGL_NO_CONTEXT, drv);
+   else if (!disp->Extensions.KHR_no_config_context)
      RETURN_EGL_ERROR(disp, EGL_BAD_CONFIG, EGL_NO_CONTEXT);

   if (!share && share_list != EGL_NO_CONTEXT)
@@ -847,7 +849,7 @@ _eglCreateWindowSurfaceCommon(_EGLDisplay *disp, EGLConfig config,
      RETURN_EGL_ERROR(disp, EGL_BAD_NATIVE_WINDOW, EGL_NO_SURFACE);

 #ifdef HAVE_SURFACELESS_PLATFORM
-   if (disp->Platform == _EGL_PLATFORM_SURFACELESS) {
+   if (disp && disp->Platform == _EGL_PLATFORM_SURFACELESS) {
      /* From the EGL_MESA_platform_surfaceless spec (v1):
       *
       *    eglCreatePlatformWindowSurface fails when called with a <display>
@@ -866,6 +868,9 @@ _eglCreateWindowSurfaceCommon(_EGLDisplay *disp, EGLConfig config,

   _EGL_CHECK_CONFIG(disp, conf, EGL_NO_SURFACE, drv);

+   if ((conf->SurfaceType & EGL_WINDOW_BIT) == 0)
+      RETURN_EGL_ERROR(disp, EGL_BAD_MATCH, EGL_NO_SURFACE);
+
   surf = drv->API.CreateWindowSurface(drv, disp, conf, native_window,
                                       attrib_list);
   ret = (surf) ? _eglLinkSurface(surf) : EGL_NO_SURFACE;
@@ -968,7 +973,7 @@ _eglCreatePixmapSurfaceCommon(_EGLDisplay *disp, EGLConfig config,
   EGLSurface ret;

 #if HAVE_SURFACELESS_PLATFORM
-   if (disp->Platform == _EGL_PLATFORM_SURFACELESS) {
+   if (disp && disp->Platform == _EGL_PLATFORM_SURFACELESS) {
      /* From the EGL_MESA_platform_surfaceless spec (v1):
       *
       *   [Like eglCreatePlatformWindowSurface,] eglCreatePlatformPixmapSurface
@@ -984,6 +989,10 @@ _eglCreatePixmapSurfaceCommon(_EGLDisplay *disp, EGLConfig config,
 #endif

   _EGL_CHECK_CONFIG(disp, conf, EGL_NO_SURFACE, drv);
+
+   if ((conf->SurfaceType & EGL_PIXMAP_BIT) == 0)
+      RETURN_EGL_ERROR(disp, EGL_BAD_MATCH, EGL_NO_SURFACE);
+
   surf = drv->API.CreatePixmapSurface(drv, disp, conf, native_pixmap,
                                       attrib_list);
   ret = (surf) ? _eglLinkSurface(surf) : EGL_NO_SURFACE;
@@ -1054,6 +1063,9 @@ eglCreatePbufferSurface(EGLDisplay dpy, EGLConfig config,
   _EGL_FUNC_START(disp, EGL_OBJECT_DISPLAY_KHR, NULL, EGL_NO_SURFACE);
   _EGL_CHECK_CONFIG(disp, conf, EGL_NO_SURFACE, drv);

+   if ((conf->SurfaceType & EGL_PBUFFER_BIT) == 0)
+      RETURN_EGL_ERROR(disp, EGL_BAD_MATCH, EGL_NO_SURFACE);
+
   surf = drv->API.CreatePbufferSurface(drv, disp, conf, attrib_list);
   ret = (surf) ? _eglLinkSurface(surf) : EGL_NO_SURFACE;

@@ -2382,7 +2394,7 @@ _eglLockDisplayInterop(EGLDisplay dpy, EGLContext context,
   return MESA_GLINTEROP_SUCCESS;
 }

-int
+PUBLIC int
 MesaGLInteropEGLQueryDeviceInfo(EGLDisplay dpy, EGLContext context,
                                struct mesa_glinterop_device_info *out)
 {
@@ -2404,7 +2416,7 @@ MesaGLInteropEGLQueryDeviceInfo(EGLDisplay dpy, EGLContext context,
   return ret;
 }

-int
+PUBLIC int
 MesaGLInteropEGLExportObject(EGLDisplay dpy, EGLContext context,
                             struct mesa_glinterop_export_in *in,
                             struct mesa_glinterop_export_out *out)
--- a/src/egl/main/eglcontext.c
+++ b/src/egl/main/eglcontext.c
@@ -184,19 +184,33 @@ _eglParseContextAttribList(_EGLContext *ctx, _EGLDisplay *dpy,
            break;
         }

-         /* The EGL_KHR_create_context_spec says:
-          *
-          *     "If the EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR bit is set in
-          *     EGL_CONTEXT_FLAGS_KHR, then a context supporting <robust buffer
-          *     access> will be created. Robust buffer access is defined in the
-          *     GL_ARB_robustness extension specification, and the resulting
-          *     context must also support either the GL_ARB_robustness
-          *     extension, or a version of OpenGL incorporating equivalent
-          *     functionality. This bit is supported for OpenGL contexts.
-          */
         if ((val & EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR) &&
-             (api != EGL_OPENGL_API ||
-              !dpy->Extensions.EXT_create_context_robustness)) {
+             api != EGL_OPENGL_API) {
+            /* The EGL_KHR_create_context spec says:
+             *
+             *   10) Which error should be generated if robust buffer access
+             *       or reset notifications are requested under OpenGL ES?
+             *
+             *       As per Issue 6, this extension does not support creating
+             *       robust contexts for OpenGL ES. This is only supported via
+             *       the EGL_EXT_create_context_robustness extension.
+             *
+             *       Attempting to use this extension to create robust OpenGL
+             *       ES context will generate an EGL_BAD_ATTRIBUTE error. This
+             *       specific error is generated because this extension does
+             *       not define the EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR
+             *       and EGL_CONTEXT_OPENGL_RESET_NOTIFICATION_STRATEGY_KHR
+             *       bits for OpenGL ES contexts. Thus, use of these bits fall
+             *       under condition described by: "If an attribute is
+             *       specified that is not meaningful for the client API
+             *       type.." in the above specification.
+             *
+             * The spec requires that we emit the error even if the display
+             * supports EGL_EXT_create_context_robustness. To create a robust
+             * GLES context, the *attribute*
+             * EGL_CONTEXT_OPENGL_ROBUST_ACCESS_EXT must be used, not the
+             * *flag* EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR.
+             */
            err = EGL_BAD_ATTRIBUTE;
            break;
         }
--- a/src/egl/main/egldefines.h
+++ b/src/egl/main/egldefines.h
@@ -34,6 +34,8 @@
 #ifndef EGLDEFINES_INCLUDED
 #define EGLDEFINES_INCLUDED

+#include "util/macros.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -48,9 +50,6 @@ extern "C" {

 #define _EGL_VENDOR_STRING "Mesa Project"

-#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
-#define MIN2(A, B)  (((A) < (B)) ? (A) : (B))
-
 #ifdef __cplusplus
 }
 #endif
--- a/src/egl/main/eglsurface.c
+++ b/src/egl/main/eglsurface.c
@@ -262,9 +262,13 @@ _eglInitSurface(_EGLSurface *surf, _EGLDisplay *dpy, EGLint type,
 {
   const char *func;
   EGLint renderBuffer = EGL_BACK_BUFFER;
-   EGLint swapBehavior = EGL_BUFFER_PRESERVED;
+   EGLint swapBehavior = EGL_BUFFER_DESTROYED;
   EGLint err;

+   /* Swap behavior can be preserved only if config supports this. */
+   if (conf->SurfaceType & EGL_SWAP_BEHAVIOR_PRESERVED_BIT)
+      swapBehavior = EGL_BUFFER_PRESERVED;
+
   switch (type) {
   case EGL_WINDOW_BIT:
      func = "eglCreateWindowSurface";
--- a/src/gallium/auxiliary/cso_cache/cso_cache.c
+++ b/src/gallium/auxiliary/cso_cache/cso_cache.c
@@ -188,7 +188,9 @@ cso_insert_state(struct cso_cache *sc,
                 void *state)
 {
   struct cso_hash *hash = _cso_hash_for_type(sc, type);
-   sanitize_hash(sc, hash, type, sc->max_size);
+
+   if (type != CSO_SAMPLER)
+      sanitize_hash(sc, hash, type, sc->max_size);

   return cso_hash_insert(hash, hash_key, state);
 }
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -1275,7 +1275,6 @@ cso_restore_fragment_samplers(struct cso_context *ctx)
 {
   struct sampler_info *info = &ctx->samplers[PIPE_SHADER_FRAGMENT];

-   info->nr_samplers = ctx->nr_fragment_samplers_saved;
   memcpy(info->samplers, ctx->fragment_samplers_saved,
          sizeof(info->samplers));
   cso_single_sampler_done(ctx, PIPE_SHADER_FRAGMENT);
--- a/src/gallium/auxiliary/hud/hud_cpufreq.c
+++ b/src/gallium/auxiliary/hud/hud_cpufreq.c
@@ -36,6 +36,7 @@
 #include "hud/hud_private.h"
 #include "util/list.h"
 #include "os/os_time.h"
+#include "os/os_thread.h"
 #include "util/u_memory.h"
 #include <stdio.h>
 #include <unistd.h>
@@ -61,6 +62,7 @@ struct cpufreq_info

 static int gcpufreq_count = 0;
 static struct list_head gcpufreq_list;
+pipe_static_mutex(gcpufreq_mutex);

 static struct cpufreq_info *
 find_cfi_by_index(int cpu_index, int mode)
@@ -112,14 +114,6 @@ query_cfi_load(struct hud_graph *gr)
   }
 }

-static void
-free_query_data(void *p)
-{
-   struct cpufreq_info *cfi = (struct cpufreq_info *)p;
-   list_del(&cfi->list);
-   FREE(cfi);
-}
-
 /**
  * Create and initialize a new object for a specific CPU.
  * \param  pane  parent context.
@@ -155,6 +149,7 @@ hud_cpufreq_graph_install(struct hud_pane *pane, int cpu_index,
      break;
   case CPUFREQ_MAXIMUM:
      snprintf(gr->name, sizeof(gr->name), "%s-Max", cfi->name);
+      break;
   default:
      return;
   }
@@ -162,11 +157,6 @@ hud_cpufreq_graph_install(struct hud_pane *pane, int cpu_index,
   gr->query_data = cfi;
   gr->query_new_value = query_cfi_load;

-   /* Don't use free() as our callback as that messes up Gallium's
-    * memory debugger.  Use simple free_query_data() wrapper.
-    */
-   gr->free_query_data = free_query_data;
-
   hud_pane_add_graph(pane, gr);
   hud_pane_set_max_value(pane, 3000000 /* 3 GHz */);
 }
@@ -199,16 +189,21 @@ hud_get_num_cpufreq(bool displayhelp)
   int cpu_index;

   /* Return the number of CPU metrics we support. */
-   if (gcpufreq_count)
+   pipe_mutex_lock(gcpufreq_mutex);
+   if (gcpufreq_count) {
+      pipe_mutex_unlock(gcpufreq_mutex);
      return gcpufreq_count;
+   }

   /* Scan /sys/devices.../cpu, for every object type we support, create
    * and persist an object to represent its different metrics.
    */
   list_inithead(&gcpufreq_list);
   DIR *dir = opendir("/sys/devices/system/cpu");
-   if (!dir)
+   if (!dir) {
+      pipe_mutex_unlock(gcpufreq_mutex);
      return 0;
+   }

   while ((dp = readdir(dir)) != NULL) {

@@ -238,6 +233,7 @@ hud_get_num_cpufreq(bool displayhelp)
      snprintf(fn, sizeof(fn), "%s/cpufreq/scaling_max_freq", basename);
      add_object(dp->d_name, fn, CPUFREQ_MAXIMUM, cpu_index);
   }
+   closedir(dir);

   if (displayhelp) {
      list_for_each_entry(struct cpufreq_info, cfi, &gcpufreq_list, list) {
@@ -251,6 +247,7 @@ hud_get_num_cpufreq(bool displayhelp)
      }
   }

+   pipe_mutex_unlock(gcpufreq_mutex);
   return gcpufreq_count;
 }

--- a/src/gallium/auxiliary/hud/hud_diskstat.c
+++ b/src/gallium/auxiliary/hud/hud_diskstat.c
@@ -35,6 +35,7 @@
 #include "hud/hud_private.h"
 #include "util/list.h"
 #include "os/os_time.h"
+#include "os/os_thread.h"
 #include "util/u_memory.h"
 #include <stdio.h>
 #include <unistd.h>
@@ -81,6 +82,7 @@ struct diskstat_info
 */
 static int gdiskstat_count = 0;
 static struct list_head gdiskstat_list;
+pipe_static_mutex(gdiskstat_mutex);

 static struct diskstat_info *
 find_dsi_by_name(const char *n, int mode)
@@ -162,14 +164,6 @@ query_dsi_load(struct hud_graph *gr)
   }
 }

-static void
-free_query_data(void *p)
-{
-   struct diskstat_info *nic = (struct diskstat_info *) p;
-   list_del(&nic->list);
-   FREE(nic);
-}
-
 /**
  * Create and initialize a new object for a specific block I/O device.
  * \param  pane  parent context.
@@ -208,11 +202,6 @@ hud_diskstat_graph_install(struct hud_pane *pane, const char *dev_name,
   gr->query_data = dsi;
   gr->query_new_value = query_dsi_load;

-   /* Don't use free() as our callback as that messes up Gallium's
-    * memory debugger.  Use simple free_query_data() wrapper.
-    */
-   gr->free_query_data = free_query_data;
-
   hud_pane_add_graph(pane, gr);
   hud_pane_set_max_value(pane, 100);
 }
@@ -257,16 +246,21 @@ hud_get_num_disks(bool displayhelp)
   char name[64];

   /* Return the number of block devices and partitions. */
-   if (gdiskstat_count)
+   pipe_mutex_lock(gdiskstat_mutex);
+   if (gdiskstat_count) {
+      pipe_mutex_unlock(gdiskstat_mutex);
      return gdiskstat_count;
+   }

   /* Scan /sys/block, for every object type we support, create and
    * persist an object to represent its different statistics.
    */
   list_inithead(&gdiskstat_list);
   DIR *dir = opendir("/sys/block/");
-   if (!dir)
+   if (!dir) {
+      pipe_mutex_unlock(gdiskstat_mutex);
      return 0;
+   }

   while ((dp = readdir(dir)) != NULL) {

@@ -290,8 +284,11 @@ hud_get_num_disks(bool displayhelp)
      /* Add any partitions */
      struct dirent *dpart;
      DIR *pdir = opendir(basename);
-      if (!pdir)
+      if (!pdir) {
+         pipe_mutex_unlock(gdiskstat_mutex);
+         closedir(dir);
         return 0;
+      }

      while ((dpart = readdir(pdir)) != NULL) {
         /* Avoid 'lo' and '..' and '.' */
@@ -311,6 +308,7 @@ hud_get_num_disks(bool displayhelp)
         add_object_part(basename, dpart->d_name, DISKSTAT_WR);
      }
   }
+   closedir(dir);

   if (displayhelp) {
      list_for_each_entry(struct diskstat_info, dsi, &gdiskstat_list, list) {
@@ -322,6 +320,7 @@ hud_get_num_disks(bool displayhelp)
         puts(line);
      }
   }
+   pipe_mutex_unlock(gdiskstat_mutex);

   return gdiskstat_count;
 }
--- a/src/gallium/auxiliary/hud/hud_nic.c
+++ b/src/gallium/auxiliary/hud/hud_nic.c
@@ -35,6 +35,7 @@
 #include "hud/hud_private.h"
 #include "util/list.h"
 #include "os/os_time.h"
+#include "os/os_thread.h"
 #include "util/u_memory.h"
 #include <stdio.h>
 #include <unistd.h>
@@ -66,6 +67,7 @@ struct nic_info
 */
 static int gnic_count = 0;
 static struct list_head gnic_list;
+pipe_static_mutex(gnic_mutex);

 static struct nic_info *
 find_nic_by_name(const char *n, int mode)
@@ -234,14 +236,6 @@ query_nic_load(struct hud_graph *gr)
   }
 }

-static void
-free_query_data(void *p)
-{
-   struct nic_info *nic = (struct nic_info *) p;
-   list_del(&nic->list);
-   FREE(nic);
-}
-
 /**
  * Create and initialize a new object for a specific network interface dev.
  * \param  pane  parent context.
@@ -284,11 +278,6 @@ hud_nic_graph_install(struct hud_pane *pane, const char *nic_name,
   gr->query_data = nic;
   gr->query_new_value = query_nic_load;

-   /* Don't use free() as our callback as that messes up Gallium's
-    * memory debugger.  Use simple free_query_data() wrapper.
-    */
-   gr->free_query_data = free_query_data;
-
   hud_pane_add_graph(pane, gr);
   hud_pane_set_max_value(pane, 100);
 }
@@ -342,16 +331,21 @@ hud_get_num_nics(bool displayhelp)
   char name[64];

   /* Return the number if network interfaces. */
-   if (gnic_count)
+   pipe_mutex_lock(gnic_mutex);
+   if (gnic_count) {
+      pipe_mutex_unlock(gnic_mutex);
      return gnic_count;
+   }

   /* Scan /sys/block, for every object type we support, create and
    * persist an object to represent its different statistics.
    */
   list_inithead(&gnic_list);
   DIR *dir = opendir("/sys/class/net/");
-   if (!dir)
+   if (!dir) {
+      pipe_mutex_unlock(gnic_mutex);
      return 0;
+   }

   while ((dp = readdir(dir)) != NULL) {

@@ -412,6 +406,7 @@ hud_get_num_nics(bool displayhelp)
      }

   }
+   closedir(dir);

   list_for_each_entry(struct nic_info, nic, &gnic_list, list) {
      char line[64];
@@ -424,6 +419,7 @@ hud_get_num_nics(bool displayhelp)

   }

+   pipe_mutex_unlock(gnic_mutex);
   return gnic_count;
 }

--- a/src/gallium/auxiliary/hud/hud_sensors_temp.c
+++ b/src/gallium/auxiliary/hud/hud_sensors_temp.c
@@ -32,6 +32,7 @@
 #include "hud/hud_private.h"
 #include "util/list.h"
 #include "os/os_time.h"
+#include "os/os_thread.h"
 #include "util/u_memory.h"
 #include <stdio.h>
 #include <unistd.h>
@@ -49,6 +50,7 @@
 */
 static int gsensors_temp_count = 0;
 static struct list_head gsensors_temp_list;
+pipe_static_mutex(gsensor_temp_mutex);

 struct sensors_temp_info
 {
@@ -189,17 +191,6 @@ query_sti_load(struct hud_graph *gr)
   }
 }

-static void
-free_query_data(void *p)
-{
-   struct sensors_temp_info *sti = (struct sensors_temp_info *) p;
-   list_del(&sti->list);
-   if (sti->chip)
-      sensors_free_chip_name(sti->chip);
-   FREE(sti);
-   sensors_cleanup();
-}
-
 /**
  * Create and initialize a new object for a specific sensor interface dev.
  * \param  pane  parent context.
@@ -237,11 +228,6 @@ hud_sensors_temp_graph_install(struct hud_pane *pane, const char *dev_name,
   gr->query_data = sti;
   gr->query_new_value = query_sti_load;

-   /* Don't use free() as our callback as that messes up Gallium's
-    * memory debugger.  Use simple free_query_data() wrapper.
-    */
-   gr->free_query_data = free_query_data;
-
   hud_pane_add_graph(pane, gr);
   switch (sti->mode) {
   case SENSORS_TEMP_CURRENT:
@@ -338,12 +324,17 @@ int
 hud_get_num_sensors(bool displayhelp)
 {
   /* Return the number of sensors detected. */
-   if (gsensors_temp_count)
+   pipe_mutex_lock(gsensor_temp_mutex);
+   if (gsensors_temp_count) {
+      pipe_mutex_unlock(gsensor_temp_mutex);
      return gsensors_temp_count;
+   }

   int ret = sensors_init(NULL);
-   if (ret)
+   if (ret) {
+      pipe_mutex_unlock(gsensor_temp_mutex);
      return 0;
+   }

   list_inithead(&gsensors_temp_list);

@@ -377,6 +368,7 @@ hud_get_num_sensors(bool displayhelp)
      }
   }

+   pipe_mutex_unlock(gsensor_temp_mutex);
   return gsensors_temp_count;
 }

--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -485,6 +485,7 @@ tgsi_opcode_infer_src_type( uint opcode )
   case TGSI_OPCODE_UMUL_HI:
   case TGSI_OPCODE_UP2H:
   case TGSI_OPCODE_U2I64:
+   case TGSI_OPCODE_MEMBAR:
      return TGSI_TYPE_UNSIGNED;
   case TGSI_OPCODE_IMUL_HI:
   case TGSI_OPCODE_I2F:
--- a/src/gallium/auxiliary/vl/vl_zscan.c
+++ b/src/gallium/auxiliary/vl/vl_zscan.c
@@ -152,7 +152,7 @@ create_vert_shader(struct vl_zscan *zscan)
   for (i = 0; i < zscan->num_channels; ++i) {
      ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y),
               ureg_imm1f(shader, 1.0f / (zscan->blocks_per_line * VL_BLOCK_WIDTH)
-                * (i - (signed)zscan->num_channels / 2)));
+                * ((signed)i - (signed)zscan->num_channels / 2)));

      ureg_MAD(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_X), vrect,
               ureg_imm1f(shader, 1.0f / zscan->blocks_per_line), ureg_src(tmp));
--- a/src/gallium/drivers/freedreno/freedreno_batch.c
+++ b/src/gallium/drivers/freedreno/freedreno_batch.c
@@ -234,7 +234,6 @@ batch_flush_func(void *job, int id)

 	fd_gmem_render_tiles(batch);
 	batch_reset_resources(batch);
-	batch->ctx->last_fence = fd_ringbuffer_timestamp(batch->gmem);
 }

 static void
@@ -275,7 +274,6 @@ batch_flush(struct fd_batch *batch)
 	} else {
 		fd_gmem_render_tiles(batch);
 		batch_reset_resources(batch);
-		batch->ctx->last_fence = fd_ringbuffer_timestamp(batch->gmem);
 	}

 	debug_assert(batch->reference.count > 0);
--- a/src/gallium/drivers/freedreno/freedreno_batch_cache.c
+++ b/src/gallium/drivers/freedreno/freedreno_batch_cache.c
@@ -124,7 +124,7 @@ fd_bc_fini(struct fd_batch_cache *cache)
 	_mesa_hash_table_destroy(cache->ht, NULL);
 }

-uint32_t
+void
 fd_bc_flush(struct fd_batch_cache *cache, struct fd_context *ctx)
 {
 	struct hash_entry *entry;
@@ -150,8 +150,6 @@ fd_bc_flush(struct fd_batch_cache *cache, struct fd_context *ctx)
 		fd_batch_sync(last_batch);
 		fd_batch_reference(&last_batch, NULL);
 	}
-
-	return ctx->last_fence;
 }

 void
--- a/src/gallium/drivers/freedreno/freedreno_batch_cache.h
+++ b/src/gallium/drivers/freedreno/freedreno_batch_cache.h
@@ -62,7 +62,7 @@ struct fd_batch_cache {
 void fd_bc_init(struct fd_batch_cache *cache);
 void fd_bc_fini(struct fd_batch_cache *cache);

-uint32_t fd_bc_flush(struct fd_batch_cache *cache, struct fd_context *ctx);
+void fd_bc_flush(struct fd_batch_cache *cache, struct fd_context *ctx);

 void fd_bc_invalidate_context(struct fd_context *ctx);
 void fd_bc_invalidate_batch(struct fd_batch *batch, bool destroy);
--- a/src/gallium/drivers/freedreno/freedreno_context.c
+++ b/src/gallium/drivers/freedreno/freedreno_context.c
@@ -43,22 +43,15 @@ fd_context_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
 		unsigned flags)
 {
 	struct fd_context *ctx = fd_context(pctx);
-	uint32_t timestamp;

 	if (!ctx->screen->reorder) {
-		struct fd_batch *batch = NULL;
-		fd_batch_reference(&batch, ctx->batch);
-		fd_batch_flush(batch, true);
-		timestamp = fd_ringbuffer_timestamp(batch->gmem);
-		fd_batch_reference(&batch, NULL);
+		fd_batch_flush(ctx->batch, true);
 	} else {
-		timestamp = fd_bc_flush(&ctx->screen->batch_cache, ctx);
+		fd_bc_flush(&ctx->screen->batch_cache, ctx);
 	}

-	if (fence) {
-		fd_screen_fence_ref(pctx->screen, fence, NULL);
-		*fence = fd_fence_create(pctx, timestamp);
-	}
+	if (fence)
+		fd_fence_ref(pctx->screen, fence, ctx->last_fence);
 }

 /**
@@ -109,6 +102,8 @@ fd_context_destroy(struct pipe_context *pctx)
 	fd_batch_reference(&ctx->batch, NULL);  /* unref current batch */
 	fd_bc_invalidate_context(ctx);

+	fd_fence_ref(pctx->screen, &ctx->last_fence, NULL);
+
 	fd_prog_fini(pctx);
 	fd_hw_query_fini(pctx);

--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -164,7 +164,7 @@ struct fd_context {
 	 */
 	struct fd_batch *batch;

-	uint32_t last_fence;
+	struct pipe_fence_handle *last_fence;

 	/* Are we in process of shadowing a resource? Used to detect recursion
 	 * in transfer_map, and skip unneeded synchronization.
--- a/src/gallium/drivers/freedreno/freedreno_fence.c
+++ b/src/gallium/drivers/freedreno/freedreno_fence.c
@@ -40,7 +40,7 @@ struct pipe_fence_handle {
 };

 void
-fd_screen_fence_ref(struct pipe_screen *pscreen,
+fd_fence_ref(struct pipe_screen *pscreen,
 		struct pipe_fence_handle **ptr,
 		struct pipe_fence_handle *pfence)
 {
@@ -50,7 +50,7 @@ fd_screen_fence_ref(struct pipe_screen *pscreen,
 	*ptr = pfence;
 }

-boolean fd_screen_fence_finish(struct pipe_screen *screen,
+boolean fd_fence_finish(struct pipe_screen *pscreen,
 		struct pipe_context *ctx,
 		struct pipe_fence_handle *fence,
 		uint64_t timeout)
@@ -61,11 +61,10 @@ boolean fd_screen_fence_finish(struct pipe_screen *screen,
 	return true;
 }

-struct pipe_fence_handle * fd_fence_create(struct pipe_context *pctx,
+struct pipe_fence_handle * fd_fence_create(struct fd_context *ctx,
 		uint32_t timestamp)
 {
 	struct pipe_fence_handle *fence;
-	struct fd_context *ctx = fd_context(pctx);

 	fence = CALLOC_STRUCT(pipe_fence_handle);
 	if (!fence)
--- a/src/gallium/drivers/freedreno/freedreno_fence.h
+++ b/src/gallium/drivers/freedreno/freedreno_fence.h
@@ -31,14 +31,16 @@

 #include "pipe/p_context.h"

-void fd_screen_fence_ref(struct pipe_screen *pscreen,
+void fd_fence_ref(struct pipe_screen *pscreen,
 		struct pipe_fence_handle **ptr,
 		struct pipe_fence_handle *pfence);
-boolean fd_screen_fence_finish(struct pipe_screen *screen,
+boolean fd_fence_finish(struct pipe_screen *screen,
 		struct pipe_context *ctx,
 		struct pipe_fence_handle *pfence,
 		uint64_t timeout);
-struct pipe_fence_handle * fd_fence_create(struct pipe_context *pctx,
+
+struct fd_context;
+struct pipe_fence_handle * fd_fence_create(struct fd_context *ctx,
 		uint32_t timestamp);

 #endif /* FREEDRENO_FENCE_H_ */
--- a/src/gallium/drivers/freedreno/freedreno_gmem.c
+++ b/src/gallium/drivers/freedreno/freedreno_gmem.c
@@ -34,6 +34,7 @@

 #include "freedreno_gmem.h"
 #include "freedreno_context.h"
+#include "freedreno_fence.h"
 #include "freedreno_resource.h"
 #include "freedreno_query_hw.h"
 #include "freedreno_util.h"
@@ -394,6 +395,9 @@ fd_gmem_render_tiles(struct fd_batch *batch)
 	}

 	fd_ringbuffer_flush(batch->gmem);
+
+	fd_fence_ref(&ctx->screen->base, &ctx->last_fence, NULL);
+	ctx->last_fence = fd_fence_create(ctx, fd_ringbuffer_timestamp(batch->gmem));
 }

 /* tile needs restore if it isn't completely contained within the
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -696,8 +696,8 @@ fd_screen_create(struct fd_device *dev)

 	pscreen->get_timestamp = fd_screen_get_timestamp;

-	pscreen->fence_reference = fd_screen_fence_ref;
-	pscreen->fence_finish = fd_screen_fence_finish;
+	pscreen->fence_reference = fd_fence_ref;
+	pscreen->fence_finish = fd_fence_finish;

 	slab_create_parent(&screen->transfer_pool, sizeof(struct fd_transfer), 16);

--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -727,7 +727,7 @@ void
 CodeEmitterGK110::emitIMAD(const Instruction *i)
 {
   uint8_t addOp =
-      (i->src(2).mod.neg() << 1) | (i->src(0).mod.neg() ^ i->src(1).mod.neg());
+      i->src(2).mod.neg() | ((i->src(0).mod.neg() ^ i->src(1).mod.neg()) << 1);

   emitForm_21(i, 0x100, 0xa00);

@@ -760,7 +760,7 @@ CodeEmitterGK110::emitISAD(const Instruction *i)
 void
 CodeEmitterGK110::emitSHLADD(const Instruction *i)
 {
-   uint8_t addOp = (i->src(2).mod.neg() << 1) | i->src(0).mod.neg();
+   uint8_t addOp = (i->src(0).mod.neg() << 1) | i->src(2).mod.neg();
   const ImmediateValue *imm = i->src(1).get()->asImm();
   assert(imm);

--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -737,7 +737,7 @@ void
 CodeEmitterNVC0::emitIMAD(const Instruction *i)
 {
   uint8_t addOp =
-      (i->src(2).mod.neg() << 1) | (i->src(0).mod.neg() ^ i->src(1).mod.neg());
+      i->src(2).mod.neg() | ((i->src(0).mod.neg() ^ i->src(1).mod.neg()) << 1);

   assert(i->encSize == 8);
   emitForm_A(i, HEX64(20000000, 00000003));
@@ -762,7 +762,7 @@ CodeEmitterNVC0::emitIMAD(const Instruction *i)
 void
 CodeEmitterNVC0::emitSHLADD(const Instruction *i)
 {
-   uint8_t addOp = (i->src(2).mod.neg() << 1) | i->src(0).mod.neg();
+   uint8_t addOp = (i->src(0).mod.neg() << 1) | i->src(2).mod.neg();
   const ImmediateValue *imm = i->src(1).get()->asImm();
   assert(imm);

--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -182,6 +182,7 @@ public:

   // mask of used components of source s
   unsigned int srcMask(unsigned int s) const;
+   unsigned int texOffsetMask() const;

   SrcRegister getSrc(unsigned int s) const
   {
@@ -234,6 +235,35 @@ private:
   const struct tgsi_full_instruction *insn;
 };

+unsigned int Instruction::texOffsetMask() const
+{
+   const struct tgsi_instruction_texture *tex = &insn->Texture;
+   assert(insn->Instruction.Texture);
+
+   switch (tex->Texture) {
+   case TGSI_TEXTURE_BUFFER:
+   case TGSI_TEXTURE_1D:
+   case TGSI_TEXTURE_SHADOW1D:
+   case TGSI_TEXTURE_1D_ARRAY:
+   case TGSI_TEXTURE_SHADOW1D_ARRAY:
+      return 0x1;
+   case TGSI_TEXTURE_2D:
+   case TGSI_TEXTURE_SHADOW2D:
+   case TGSI_TEXTURE_2D_ARRAY:
+   case TGSI_TEXTURE_SHADOW2D_ARRAY:
+   case TGSI_TEXTURE_RECT:
+   case TGSI_TEXTURE_SHADOWRECT:
+   case TGSI_TEXTURE_2D_MSAA:
+   case TGSI_TEXTURE_2D_ARRAY_MSAA:
+      return 0x3;
+   case TGSI_TEXTURE_3D:
+      return 0x7;
+   default:
+      assert(!"Unexpected texture target");
+      return 0xf;
+   }
+}
+
 unsigned int Instruction::srcMask(unsigned int s) const
 {
   unsigned int mask = insn->Dst[0].Register.WriteMask;
@@ -955,6 +985,9 @@ private:
   int inferSysValDirection(unsigned sn) const;
   bool scanDeclaration(const struct tgsi_full_declaration *);
   bool scanInstruction(const struct tgsi_full_instruction *);
+   void scanInstructionSrc(const Instruction& insn,
+                           const Instruction::SrcRegister& src,
+                           unsigned mask);
   void scanProperty(const struct tgsi_full_property *);
   void scanImmediate(const struct tgsi_full_immediate *);

@@ -1364,6 +1397,61 @@ inline bool Source::isEdgeFlagPassthrough(const Instruction& insn) const
      insn.getSrc(0).getFile() == TGSI_FILE_INPUT;
 }

+void Source::scanInstructionSrc(const Instruction& insn,
+                                const Instruction::SrcRegister& src,
+                                unsigned mask)
+{
+   if (src.getFile() == TGSI_FILE_TEMPORARY) {
+      if (src.isIndirect(0))
+         indirectTempArrays.insert(src.getArrayId());
+   } else
+   if (src.getFile() == TGSI_FILE_BUFFER ||
+       src.getFile() == TGSI_FILE_IMAGE ||
+       (src.getFile() == TGSI_FILE_MEMORY &&
+        memoryFiles[src.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) {
+      info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
+         0x1 : 0x2;
+   } else
+   if (src.getFile() == TGSI_FILE_OUTPUT) {
+      if (src.isIndirect(0)) {
+         // We don't know which one is accessed, just mark everything for
+         // reading. This is an extremely unlikely occurrence.
+         for (unsigned i = 0; i < info->numOutputs; ++i)
+            info->out[i].oread = 1;
+      } else {
+         info->out[src.getIndex(0)].oread = 1;
+      }
+   }
+   if (src.getFile() != TGSI_FILE_INPUT)
+      return;
+
+   if (src.isIndirect(0)) {
+      for (unsigned i = 0; i < info->numInputs; ++i)
+         info->in[i].mask = 0xf;
+   } else {
+      const int i = src.getIndex(0);
+      for (unsigned c = 0; c < 4; ++c) {
+         if (!(mask & (1 << c)))
+            continue;
+         int k = src.getSwizzle(c);
+         if (k <= TGSI_SWIZZLE_W)
+            info->in[i].mask |= 1 << k;
+      }
+      switch (info->in[i].sn) {
+      case TGSI_SEMANTIC_PSIZE:
+      case TGSI_SEMANTIC_PRIMID:
+      case TGSI_SEMANTIC_FOG:
+         info->in[i].mask &= 0x1;
+         break;
+      case TGSI_SEMANTIC_PCOORD:
+         info->in[i].mask &= 0x3;
+         break;
+      default:
+         break;
+      }
+   }
+}
+
 bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
 {
   Instruction insn(inst);
@@ -1396,66 +1484,19 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
            indirectTempArrays.insert(dst.getArrayId());
      } else
      if (dst.getFile() == TGSI_FILE_BUFFER ||
-          dst.getFile() == TGSI_FILE_IMAGE || 
+          dst.getFile() == TGSI_FILE_IMAGE ||
          (dst.getFile() == TGSI_FILE_MEMORY &&
           memoryFiles[dst.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) {
         info->io.globalAccess |= 0x2;
      }
   }

-   for (unsigned s = 0; s < insn.srcCount(); ++s) {
-      Instruction::SrcRegister src = insn.getSrc(s);
-      if (src.getFile() == TGSI_FILE_TEMPORARY) {
-         if (src.isIndirect(0))
-            indirectTempArrays.insert(src.getArrayId());
-      } else
-      if (src.getFile() == TGSI_FILE_BUFFER ||
-          src.getFile() == TGSI_FILE_IMAGE ||
-          (src.getFile() == TGSI_FILE_MEMORY &&
-           memoryFiles[src.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) {
-         info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
-               0x1 : 0x2;
-      } else
-      if (src.getFile() == TGSI_FILE_OUTPUT) {
-         if (src.isIndirect(0)) {
-            // We don't know which one is accessed, just mark everything for
-            // reading. This is an extremely unlikely occurrence.
-            for (unsigned i = 0; i < info->numOutputs; ++i)
-               info->out[i].oread = 1;
-         } else {
-            info->out[src.getIndex(0)].oread = 1;
-         }
-      }
-      if (src.getFile() != TGSI_FILE_INPUT)
-         continue;
-      unsigned mask = insn.srcMask(s);
+   for (unsigned s = 0; s < insn.srcCount(); ++s)
+      scanInstructionSrc(insn, insn.getSrc(s), insn.srcMask(s));
+
+   for (unsigned s = 0; s < insn.getNumTexOffsets(); ++s)
+      scanInstructionSrc(insn, insn.getTexOffset(s), insn.texOffsetMask());

-      if (src.isIndirect(0)) {
-         for (unsigned i = 0; i < info->numInputs; ++i)
-            info->in[i].mask = 0xf;
-      } else {
-         const int i = src.getIndex(0);
-         for (unsigned c = 0; c < 4; ++c) {
-            if (!(mask & (1 << c)))
-               continue;
-            int k = src.getSwizzle(c);
-            if (k <= TGSI_SWIZZLE_W)
-               info->in[i].mask |= 1 << k;
-         }
-         switch (info->in[i].sn) {
-         case TGSI_SEMANTIC_PSIZE:
-         case TGSI_SEMANTIC_PRIMID:
-         case TGSI_SEMANTIC_FOG:
-            info->in[i].mask &= 0x1;
-            break;
-         case TGSI_SEMANTIC_PCOORD:
-            info->in[i].mask &= 0x3;
-            break;
-         default:
-            break;
-         }
-      }
-   }
   return true;
 }

--- a/src/gallium/drivers/nouveau/nouveau_video.c
+++ b/src/gallium/drivers/nouveau/nouveau_video.c
@@ -73,7 +73,7 @@ nouveau_vpe_fini(struct nouveau_decoder *dec) {
   if (!dec->cmds)
      return;

-   nouveau_pushbuf_space(push, 8, 2, 0);
+   nouveau_pushbuf_space(push, 16, 2, 0);
   nouveau_bufctx_reset(dec->bufctx, NV31_VIDEO_BIND_CMD);

 #define BCTX_ARGS dec->bufctx, NV31_VIDEO_BIND_CMD, NOUVEAU_BO_RD
--- a/src/gallium/drivers/nouveau/nv30/nv30_clear.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_clear.c
@@ -128,7 +128,7 @@ nv30_clear_render_target(struct pipe_context *pipe, struct pipe_surface *ps,

   refn.bo = mt->base.bo;
   refn.flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR;
-   if (nouveau_pushbuf_space(push, 16, 1, 0) ||
+   if (nouveau_pushbuf_space(push, 32, 1, 0) ||
       nouveau_pushbuf_refn (push, &refn, 1))
      return;

--- a/src/gallium/drivers/nouveau/nv30/nv30_transfer.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_transfer.c
@@ -431,7 +431,7 @@ nv30_transfer_rect_sifm(XFER_ARGS)
      si_arg |= NV03_SIFM_FORMAT_FILTER_BILINEAR;
   }

-   if (nouveau_pushbuf_space(push, 32, 6, 0) ||
+   if (nouveau_pushbuf_space(push, 64, 6, 0) ||
       nouveau_pushbuf_refn (push, refs, 2))
      return;

@@ -516,7 +516,7 @@ nv30_transfer_rect_m2mf(XFER_ARGS)
   while (h) {
      unsigned lines = (h > 2047) ? 2047 : h;

-      if (nouveau_pushbuf_space(push, 13, 2, 0) ||
+      if (nouveau_pushbuf_space(push, 32, 2, 0) ||
          nouveau_pushbuf_refn (push, refs, 2))
         return;

@@ -708,7 +708,7 @@ nv30_transfer_copy_data(struct nouveau_context *nv,
      lines  = (pages > 2047) ? 2047 : pages;
      pages -= lines;

-      if (nouveau_pushbuf_space(push, 13, 2, 0) ||
+      if (nouveau_pushbuf_space(push, 32, 2, 0) ||
          nouveau_pushbuf_refn (push, refs, 2))
         return;

@@ -732,7 +732,7 @@ nv30_transfer_copy_data(struct nouveau_context *nv,
   }

   if (size) {
-      if (nouveau_pushbuf_space(push, 13, 2, 0) ||
+      if (nouveau_pushbuf_space(push, 32, 2, 0) ||
          nouveau_pushbuf_refn (push, refs, 2))
         return;

--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -307,6 +307,9 @@ nv50_program_create_strmout_state(const struct nv50_ir_prog_info *info,
      const unsigned r = pso->output[i].register_index;
      b = pso->output[i].output_buffer;

+      if (r >= info->numOutputs)
+         continue;
+
      for (c = 0; c < pso->output[i].num_components; ++c)
         so->map[base[b] + p + c] = info->out[r].slot[s + c];
   }
--- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -295,7 +295,7 @@ nv50_clear_render_target(struct pipe_context *pipe,
   PUSH_DATAf(push, color->f[2]);
   PUSH_DATAf(push, color->f[3]);

-   if (nouveau_pushbuf_space(push, 32 + sf->depth, 1, 0))
+   if (nouveau_pushbuf_space(push, 64 + sf->depth, 1, 0))
      return;

   PUSH_REFN(push, bo, mt->base.domain | NOUVEAU_BO_WR);
@@ -394,7 +394,7 @@ nv50_clear_depth_stencil(struct pipe_context *pipe,
      mode |= NV50_3D_CLEAR_BUFFERS_S;
   }

-   if (nouveau_pushbuf_space(push, 32 + sf->depth, 1, 0))
+   if (nouveau_pushbuf_space(push, 64 + sf->depth, 1, 0))
      return;

   PUSH_REFN(push, bo, mt->base.domain | NOUVEAU_BO_WR);
@@ -752,7 +752,7 @@ nv50_clear_buffer(struct pipe_context *pipe,
   PUSH_DATAf(push, color.f[2]);
   PUSH_DATAf(push, color.f[3]);

-   if (nouveau_pushbuf_space(push, 32, 1, 0))
+   if (nouveau_pushbuf_space(push, 64, 1, 0))
      return;

   PUSH_REFN(push, buf->bo, buf->domain | NOUVEAU_BO_WR);
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -636,7 +636,7 @@ nv50_draw_elements(struct nv50_context *nv50, bool shorten,
         BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1);
         PUSH_DATA (push, prim);

-         nouveau_pushbuf_space(push, 8, 0, 1);
+         nouveau_pushbuf_space(push, 16, 0, 1);
         PUSH_REFN(push, buf->bo, NOUVEAU_BO_RD | buf->domain);

         switch (index_size) {
--- a/src/gallium/drivers/nouveau/nv50/nv98_video.c
+++ b/src/gallium/drivers/nouveau/nv50/nv98_video.c
@@ -273,7 +273,7 @@ nv98_create_decoder(struct pipe_context *context,
   dec->comm = (struct comm *)(dec->fence_map + (COMM_OFFSET/sizeof(*dec->fence_map)));

   /* So lets test if the fence is working? */
-   nouveau_pushbuf_space(push[0], 6, 1, 0);
+   nouveau_pushbuf_space(push[0], 16, 1, 0);
   PUSH_REFN (push[0], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
   BEGIN_NV04(push[0], SUBC_BSP(0x240), 3);
   PUSH_DATAh(push[0], dec->fence_bo->offset);
@@ -284,7 +284,7 @@ nv98_create_decoder(struct pipe_context *context,
   PUSH_DATA (push[0], 0);
   PUSH_KICK (push[0]);

-   nouveau_pushbuf_space(push[1], 6, 1, 0);
+   nouveau_pushbuf_space(push[1], 16, 1, 0);
   PUSH_REFN (push[1], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
   BEGIN_NV04(push[1], SUBC_VP(0x240), 3);
   PUSH_DATAh(push[1], (dec->fence_bo->offset + 0x10));
@@ -295,7 +295,7 @@ nv98_create_decoder(struct pipe_context *context,
   PUSH_DATA (push[1], 0);
   PUSH_KICK (push[1]);

-   nouveau_pushbuf_space(push[2], 6, 1, 0);
+   nouveau_pushbuf_space(push[2], 16, 1, 0);
   PUSH_REFN (push[2], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
   BEGIN_NV04(push[2], SUBC_PPP(0x240), 3);
   PUSH_DATAh(push[2], (dec->fence_bo->offset + 0x20));
--- a/src/gallium/drivers/nouveau/nv50/nv98_video_bsp.c
+++ b/src/gallium/drivers/nouveau/nv50/nv98_video_bsp.c
@@ -47,7 +47,6 @@ nv98_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
   int ret;
   struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];
   struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];
-   unsigned fence_extra = 0;
   struct nouveau_pushbuf_refn bo_refs[] = {
      { bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM },
      { inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
@@ -61,10 +60,6 @@ nv98_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
   if (!dec->bitplane_bo)
      num_refs--;

-#if NOUVEAU_VP3_DEBUG_FENCE
-   fence_extra = 4;
-#endif
-
   bsp_size = NOUVEAU_VP3_BSP_RESERVED_SIZE;
   for (i = 0; i < num_buffers; i++)
      bsp_size += num_bytes[i];
@@ -112,7 +107,7 @@ nv98_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,

   nouveau_vp3_vp_caps(dec, desc, target, comm_seq, vp_caps, is_ref, refs);

-   nouveau_pushbuf_space(push, 6 + (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC ? 9 : 8) + fence_extra + 2, num_refs, 0);
+   nouveau_pushbuf_space(push, 32, num_refs, 0);
   nouveau_pushbuf_refn(push, bo_refs, num_refs);

   bsp_addr = bsp_bo->offset >> 8;
--- a/src/gallium/drivers/nouveau/nv50/nv98_video_ppp.c
+++ b/src/gallium/drivers/nouveau/nv50/nv98_video_ppp.c
@@ -93,13 +93,8 @@ nv98_decoder_ppp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, struct n
   enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile);
   struct nouveau_pushbuf *push = dec->pushbuf[2];
   unsigned ppp_caps = 0x10;
-   unsigned fence_extra = 0;

-#if NOUVEAU_VP3_DEBUG_FENCE
-   fence_extra = 4;
-#endif
-
-   nouveau_pushbuf_space(push, 11 + (codec == PIPE_VIDEO_FORMAT_VC1 ? 2 : 0) + 3 + fence_extra + 2, 4, 0);
+   nouveau_pushbuf_space(push, 32, 4, 0);

   switch (codec) {
   case PIPE_VIDEO_FORMAT_MPEG12: {
--- a/src/gallium/drivers/nouveau/nv50/nv98_video_vp.c
+++ b/src/gallium/drivers/nouveau/nv50/nv98_video_vp.c
@@ -76,7 +76,7 @@ nv98_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
   enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile);
   struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];
   struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];
-   u32 fence_extra = 0, codec_extra = 0;
+   u32 codec_extra = 0;
   struct nouveau_pushbuf_refn bo_refs[] = {
      { inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
      { dec->ref_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
@@ -88,10 +88,6 @@ nv98_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
   };
   int num_refs = ARRAY_SIZE(bo_refs) - !dec->fw_bo;

-#if NOUVEAU_VP3_DEBUG_FENCE
-   fence_extra = 4;
-#endif
-
   if (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC) {
      nouveau_vp3_inter_sizes(dec, desc.h264->slice_count, &slice_size, &bucket_size, &ring_size);
      codec_extra += 2;
@@ -115,8 +111,7 @@ nv98_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
   if (!is_ref && (dec->refs[target->valid_ref].decoded_top && dec->refs[target->valid_ref].decoded_bottom))
      nv98_decoder_kick_ref(dec, target);

-   nouveau_pushbuf_space(push, 8 + 3 * (codec != PIPE_VIDEO_FORMAT_MPEG12) +
-              6 + codec_extra + fence_extra + 2, num_refs, 0);
+   nouveau_pushbuf_space(push, 32 + codec_extra, num_refs, 0);

   nouveau_pushbuf_refn(push, bo_refs, num_refs);

--- a/Show More
+++ b/Show More