docs: add release notes for 10.6.4

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Update version to 10.6.4
2015-08-11 16:39:10 +01:00 · 2015-08-11 16:35:06 +01:00 · 2015-08-06 19:04:59 +01:00 · 2015-08-06 19:04:59 +01:00 · 2015-08-06 19:04:19 +01:00 · 2015-08-06 16:49:47 +01:00
89 changed files with 1409 additions and 1112 deletions
--- a/2
+++ b/2
@@ -1 +1 @@
-10.6.1
+10.6.4
--- a/bin/bugzilla_mesa.sh
+++ b/bin/bugzilla_mesa.sh
@@ -15,17 +15,14 @@
 # $ DRYRUN=yes bin/bugzilla_mesa.sh mesa-9.0.2..mesa-9.0.3 | wc -l


-# regex pattern: trim before url
-trim_before='s/.*\(http\)/\1/'
+# regex pattern: trim before bug number
+trim_before='s/.*show_bug.cgi?id=\([0-9]*\).*/\1/'

-# regex pattern: trim after url
-trim_after='s/\(show_bug.cgi?id=[0-9]*\).*/\1/'
-
-# regex pattern: always use https
-use_https='s/http:/https:/'
+# regex pattern: reconstruct the url
+use_after='s,^,https://bugs.freedesktop.org/show_bug.cgi?id=,'

 # extract fdo urls from commit log
-urls=$(git log $* | grep 'bugs.freedesktop.org/show_bug' | sed -e $trim_before -e $trim_after -e $use_https | sort | uniq)
+urls=$(git log $* | grep 'bugs.freedesktop.org/show_bug' | sed -e $trim_before | sort -n -u | sed -e $use_after)

 # if DRYRUN is set to "yes", simply print the URLs and don't fetch the
 # details from fdo bugzilla.
--- a/configure.ac
+++ b/configure.ac
@@ -9,7 +9,6 @@ dnl Copyright © 2009-2014 Jon TURNEY
 dnl Copyright © 2011-2012 Benjamin Franzke
 dnl Copyright © 2008-2014 David Airlie
 dnl Copyright © 2009-2013 Brian Paul
-dnl Copyright © 2003-2007 Keith Packard, Daniel Stone
 dnl
 dnl Permission is hereby granted, free of charge, to any person obtaining a
 dnl copy of this software and associated documentation files (the "Software"),
@@ -64,6 +63,8 @@ m4_ifdef([AM_PROG_AR], [AM_PROG_AR])
 dnl Set internal versions
 OSMESA_VERSION=8
 AC_SUBST([OSMESA_VERSION])
+OPENCL_VERSION=1
+AC_SUBST([OPENCL_VERSION])

 dnl Versions for external dependencies
 LIBDRM_REQUIRED=2.4.38
@@ -910,6 +911,13 @@ fi
 AM_CONDITIONAL(HAVE_DRI_GLX, test "x$enable_glx" = xyes -a \
                                  "x$enable_dri" = xyes)

+# Check for libdrm
+PKG_CHECK_MODULES([LIBDRM], [libdrm >= $LIBDRM_REQUIRED],
+                  [have_libdrm=yes], [have_libdrm=no])
+if test "x$have_libdrm" = xyes; then
+	DEFINES="$DEFINES -DHAVE_LIBDRM"
+fi
+
 # Select which platform-dependent DRI code gets built
 case "$host_os" in
 darwin*)
@@ -922,8 +930,8 @@ esac

 AM_CONDITIONAL(HAVE_DRICOMMON, test "x$enable_dri" = xyes )
 AM_CONDITIONAL(HAVE_DRISW, test "x$enable_dri" = xyes )
-AM_CONDITIONAL(HAVE_DRI2, test "x$enable_dri" = xyes -a "x$dri_platform" = xdrm )
-AM_CONDITIONAL(HAVE_DRI3, test "x$enable_dri3" = xyes -a "x$dri_platform" = xdrm )
+AM_CONDITIONAL(HAVE_DRI2, test "x$enable_dri" = xyes -a "x$dri_platform" = xdrm -a "x$have_libdrm" = xyes )
+AM_CONDITIONAL(HAVE_DRI3, test "x$enable_dri3" = xyes -a "x$dri_platform" = xdrm -a "x$have_libdrm" = xyes )
 AM_CONDITIONAL(HAVE_APPLEDRI, test "x$enable_dri" = xyes -a "x$dri_platform" = xapple )

 AC_ARG_ENABLE([shared-glapi],
@@ -979,151 +987,6 @@ fi

 AC_SUBST([MESA_LLVM])

-# SHA1 hashing
-AC_ARG_WITH([sha1],
-        [AS_HELP_STRING([--with-sha1=libc|libmd|libnettle|libgcrypt|libcrypto|libsha1|CommonCrypto|CryptoAPI],
-        [choose SHA1 implementation])])
-case "x$with_sha1" in
-x | xlibc | xlibmd | xlibnettle | xlibgcrypt | xlibcrypto | xlibsha1 | xCommonCrypto | xCryptoAPI)
-  ;;
-*)
-        AC_MSG_ERROR([Illegal value for --with-sha1: $with_sha1])
-esac
-
-AC_CHECK_FUNC([SHA1Init], [HAVE_SHA1_IN_LIBC=yes])
-if test "x$with_sha1" = x && test "x$HAVE_SHA1_IN_LIBC" = xyes; then
-	with_sha1=libc
-fi
-if test "x$with_sha1" = xlibc && test "x$HAVE_SHA1_IN_LIBC" != xyes; then
-	AC_MSG_ERROR([sha1 in libc requested but not found])
-fi
-if test "x$with_sha1" = xlibc; then
-	AC_DEFINE([HAVE_SHA1_IN_LIBC], [1],
-		[Use libc SHA1 functions])
-	SHA1_LIBS=""
-fi
-AC_CHECK_FUNC([CC_SHA1_Init], [HAVE_SHA1_IN_COMMONCRYPTO=yes])
-if test "x$with_sha1" = x && test "x$HAVE_SHA1_IN_COMMONCRYPTO" = xyes; then
-	with_sha1=CommonCrypto
-fi
-if test "x$with_sha1" = xCommonCrypto && test "x$HAVE_SHA1_IN_COMMONCRYPTO" != xyes; then
-	AC_MSG_ERROR([CommonCrypto requested but not found])
-fi
-if test "x$with_sha1" = xCommonCrypto; then
-	AC_DEFINE([HAVE_SHA1_IN_COMMONCRYPTO], [1],
-		[Use CommonCrypto SHA1 functions])
-	SHA1_LIBS=""
-fi
-dnl stdcall functions cannot be tested with AC_CHECK_LIB
-AC_CHECK_HEADER([wincrypt.h], [HAVE_SHA1_IN_CRYPTOAPI=yes], [], [#include <windows.h>])
-if test "x$with_sha1" = x && test "x$HAVE_SHA1_IN_CRYPTOAPI" = xyes; then
-	with_sha1=CryptoAPI
-fi
-if test "x$with_sha1" = xCryptoAPI && test "x$HAVE_SHA1_IN_CRYPTOAPI" != xyes; then
-	AC_MSG_ERROR([CryptoAPI requested but not found])
-fi
-if test "x$with_sha1" = xCryptoAPI; then
-	AC_DEFINE([HAVE_SHA1_IN_CRYPTOAPI], [1],
-		[Use CryptoAPI SHA1 functions])
-	SHA1_LIBS=""
-fi
-AC_CHECK_LIB([md], [SHA1Init], [HAVE_LIBMD=yes])
-if test "x$with_sha1" = x && test "x$HAVE_LIBMD" = xyes; then
-	with_sha1=libmd
-fi
-if test "x$with_sha1" = xlibmd && test "x$HAVE_LIBMD" != xyes; then
-	AC_MSG_ERROR([libmd requested but not found])
-fi
-if test "x$with_sha1" = xlibmd; then
-	AC_DEFINE([HAVE_SHA1_IN_LIBMD], [1],
-	          [Use libmd SHA1 functions])
-	SHA1_LIBS=-lmd
-fi
-PKG_CHECK_MODULES([LIBSHA1], [libsha1], [HAVE_LIBSHA1=yes], [HAVE_LIBSHA1=no])
-if test "x$with_sha1" = x && test "x$HAVE_LIBSHA1" = xyes; then
-   with_sha1=libsha1
-fi
-if test "x$with_sha1" = xlibsha1 && test "x$HAVE_LIBSHA1" != xyes; then
-	AC_MSG_ERROR([libsha1 requested but not found])
-fi
-if test "x$with_sha1" = xlibsha1; then
-	AC_DEFINE([HAVE_SHA1_IN_LIBSHA1], [1],
-	          [Use libsha1 for SHA1])
-	SHA1_LIBS=-lsha1
-fi
-AC_CHECK_LIB([nettle], [nettle_sha1_init], [HAVE_LIBNETTLE=yes])
-if test "x$with_sha1" = x && test "x$HAVE_LIBNETTLE" = xyes; then
-	with_sha1=libnettle
-fi
-if test "x$with_sha1" = xlibnettle && test "x$HAVE_LIBNETTLE" != xyes; then
-	AC_MSG_ERROR([libnettle requested but not found])
-fi
-if test "x$with_sha1" = xlibnettle; then
-	AC_DEFINE([HAVE_SHA1_IN_LIBNETTLE], [1],
-	          [Use libnettle SHA1 functions])
-	SHA1_LIBS=-lnettle
-fi
-AC_CHECK_LIB([gcrypt], [gcry_md_open], [HAVE_LIBGCRYPT=yes])
-if test "x$with_sha1" = x && test "x$HAVE_LIBGCRYPT" = xyes; then
-	with_sha1=libgcrypt
-fi
-if test "x$with_sha1" = xlibgcrypt && test "x$HAVE_LIBGCRYPT" != xyes; then
-	AC_MSG_ERROR([libgcrypt requested but not found])
-fi
-if test "x$with_sha1" = xlibgcrypt; then
-	AC_DEFINE([HAVE_SHA1_IN_LIBGCRYPT], [1],
-	          [Use libgcrypt SHA1 functions])
-	SHA1_LIBS=-lgcrypt
-fi
-# We don't need all of the OpenSSL libraries, just libcrypto
-AC_CHECK_LIB([crypto], [SHA1_Init], [HAVE_LIBCRYPTO=yes])
-PKG_CHECK_MODULES([OPENSSL], [openssl], [HAVE_OPENSSL_PKC=yes],
-                  [HAVE_OPENSSL_PKC=no])
-if test "x$HAVE_LIBCRYPTO" = xyes || test "x$HAVE_OPENSSL_PKC" = xyes; then
-	if test "x$with_sha1" = x; then
-		with_sha1=libcrypto
-	fi
-else
-	if test "x$with_sha1" = xlibcrypto; then
-		AC_MSG_ERROR([OpenSSL libcrypto requested but not found])
-	fi
-fi
-if test "x$with_sha1" = xlibcrypto; then
-	if test "x$HAVE_LIBCRYPTO" = xyes; then
-		SHA1_LIBS=-lcrypto
-	else
-		SHA1_LIBS="$OPENSSL_LIBS"
-		SHA1_CFLAGS="$OPENSSL_CFLAGS"
-	fi
-fi
-AC_MSG_CHECKING([for SHA1 implementation])
-AC_MSG_RESULT([$with_sha1])
-AC_SUBST(SHA1_LIBS)
-AC_SUBST(SHA1_CFLAGS)
-
-# Allow user to configure out the shader-cache feature
-AC_ARG_ENABLE([shader-cache],
-    AS_HELP_STRING([--disable-shader-cache], [Disable binary shader cache]),
-    [enable_shader_cache="$enableval"],
-    [if test "x$with_sha1" != "x"; then
-        enable_shader_cache=yes
-     else
-        enable_shader_cache=no
-     fi])
-if test "x$with_sha1" = "x"; then
-    if test "x$enable_shader_cache" = "xyes"; then
-        AC_MSG_ERROR([Cannot enable shader cache (no SHA-1 implementation found)])
-    fi
-fi
-AM_CONDITIONAL([ENABLE_SHADER_CACHE], [test x$enable_shader_cache = xyes])
-
-# Check for libdrm
-PKG_CHECK_MODULES([LIBDRM], [libdrm >= $LIBDRM_REQUIRED],
-                  [have_libdrm=yes], [have_libdrm=no])
-if test "x$have_libdrm" = xyes; then
-	DEFINES="$DEFINES -DHAVE_LIBDRM"
-fi
-
 case "$host_os" in
 linux*)
    need_pci_id=yes ;;
@@ -1766,6 +1629,9 @@ egl_platforms=`IFS=', '; echo $with_egl_platforms`
 for plat in $egl_platforms; do
 	case "$plat" in
 	wayland)
+		test "x$have_libdrm" != xyes &&
+			AC_MSG_ERROR([EGL platform wayland requires libdrm >= $LIBDRM_REQUIRED])
+
 		PKG_CHECK_MODULES([WAYLAND], [wayland-client >= $WAYLAND_REQUIRED wayland-server >= $WAYLAND_REQUIRED])

 		if test "x$WAYLAND_SCANNER" = x; then
@@ -2377,6 +2243,7 @@ AC_CONFIG_FILES([Makefile
 		src/gallium/targets/libgl-xlib/Makefile
 		src/gallium/targets/omx/Makefile
 		src/gallium/targets/opencl/Makefile
+		src/gallium/targets/opencl/mesa.icd
 		src/gallium/targets/osmesa/Makefile
 		src/gallium/targets/osmesa/osmesa.pc
 		src/gallium/targets/pipe-loader/Makefile
@@ -2512,12 +2379,6 @@ else
    echo "        Gallium:         no"
 fi

-dnl Shader cache
-echo ""
-echo "        Shader cache:    $enable_shader_cache"
-if test "x$enable_shader_cache" = "xyes"; then
-    echo "        With SHA1 from:  $with_sha1"
-fi

 dnl Libraries
 echo ""
--- a/docs/relnotes/10.6.1.html
+++ b/docs/relnotes/10.6.1.html
@@ -31,7 +31,8 @@ because compatibility contexts are not supported.

 <h2>SHA256 checksums</h2>
 <pre>
-TBD
+b4cccd4d0eabcc2bca00c3175d3ad88fdda57ffdb883a7998525b873a21fe607  mesa-10.6.1.tar.gz
+6c80a2b647e57c85dc36e609d9aed17f878f0d8e0cf9ace86d14cf604101e1eb  mesa-10.6.1.tar.xz
 </pre>


--- a/docs/relnotes/10.6.2.html
+++ b/docs/relnotes/10.6.2.html
@@ -0,0 +1,165 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.6.2 Release Notes / July 11, 2015</h1>
+
+<p>
+Mesa 10.6.2 is a bug fix release which fixes bugs found since the 10.6.1 release.
+</p>
+<p>
+Mesa 10.6.2 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+9c7ab9300dda6c912faaaff97995ec1820ba21d114d9cf555f145cbad90995f4  mesa-10.6.2.tar.gz
+05753d3db4212900927b9894221a1669a10f56786e86a7e818b6e18a0817dca9  mesa-10.6.2.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=73528">Bug 73528</a> - Deferred lighting in Second Life causes system hiccups and screen flickering</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=80500">Bug 80500</a> - Flickering shadows in unreleased title trace</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=82186">Bug 82186</a> - [r600g] BARTS GPU lockup with minecraft shaders</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=84225">Bug 84225</a> - Allow constant-index-expression sampler array indexing with GLSL-ES &lt; 300</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90537">Bug 90537</a> - radeonsi bo/va conflict on RADEON_GEM_VA (rscreen-&gt;ws-&gt;buffer_from_handle returns NULL)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90873">Bug 90873</a> - Kernel hang, TearFree On, Mate desktop environment</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91022">Bug 91022</a> - [g45 g965 bisected] assertions generated from textureGrad cube samplers fix</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91047">Bug 91047</a> - [SNB Bisected] Messed up Fog in Super Smash Bros. Melee in Dolphin</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91056">Bug 91056</a> - The Bard's Tale (2005, native)  has rendering issues</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91117">Bug 91117</a> - Nimbus (running in wine) has rendering issues, objects are semi-transparent</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91124">Bug 91124</a> - Civilization V (in Wine) has rendering issues: text missing, menu bar corrupted</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91173">Bug 91173</a> - Oddworld: Stranger's Wrath HD: disfigured models in wrong colors</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91226">Bug 91226</a> - Crash in glLinkProgram (NEW)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91231">Bug 91231</a> - [NV92] Psychonauts (native) segfaults on start when DRI3 enabled</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Chris Wilson (1):</p>
+<ul>
+  <li>loader: Look for any version of currently linked libudev.so</li>
+</ul>
+
+<p>Emil Velikov (2):</p>
+<ul>
+  <li>docs: Add sha256 checksums for the 10.6.1 release</li>
+  <li>Update version to 10.6.2</li>
+</ul>
+
+<p>Ilia Mirkin (8):</p>
+<ul>
+  <li>nv50/ir: propagate modifier to right arg when const-folding mad</li>
+  <li>nv50/ir: fix emission of address reg in 3rd source</li>
+  <li>nv50/ir: copy joinAt when splitting both before and after</li>
+  <li>mesa: reset the source packing when creating temp transfer image</li>
+  <li>nv50/ir: don't emit src2 in immediate form</li>
+  <li>mesa/prog: relative offsets into constbufs are not constant</li>
+  <li>nv50/ir: UCMP arguments are float, so make sure modifiers are applied</li>
+  <li>nvc0: turn sample counts off during blit</li>
+</ul>
+
+<p>Kenneth Graunke (5):</p>
+<ul>
+  <li>i965/fs: Fix ir_txs in emit_texture_gen4_simd16().</li>
+  <li>i965: Reserve more batch space to accomodate Gen6 perfmonitors.</li>
+  <li>i965/vs: Fix matNxM vertex attributes where M != 4.</li>
+  <li>Revert "glsl: clone inputs and outputs during linking"</li>
+  <li>Revert "i965: Delete linked GLSL IR when using NIR."</li>
+</ul>
+
+<p>Marek Olšák (3):</p>
+<ul>
+  <li>r600g: disable single-sample fast color clear due to hangs</li>
+  <li>radeonsi: fix a hang with DrawTransformFeedback on 4 SE chips</li>
+  <li>st/dri: don't set PIPE_BIND_SCANOUT for MSAA surfaces</li>
+</ul>
+
+<p>Mario Kleiner (2):</p>
+<ul>
+  <li>nouveau: Use dup fd as key in drm-winsys hash table to fix ZaphodHeads.</li>
+  <li>winsys/radeon: Use dup fd as key in drm-winsys hash table to fix ZaphodHeads.</li>
+</ul>
+
+<p>Matt Turner (2):</p>
+<ul>
+  <li>i965/fs: Don't mess up stride for uniform integer multiplication.</li>
+  <li>Revert SHA1 additions.</li>
+</ul>
+
+<p>Michel Dänzer (1):</p>
+<ul>
+  <li>winsys/radeon: Unmap GPU VM address range when destroying BO</li>
+</ul>
+
+<p>Mike Stroyan (2):</p>
+<ul>
+  <li>meta: Only change and restore viewport 0 in mesa meta mode</li>
+  <li>i965: allocate at least 1 BLEND_STATE element</li>
+</ul>
+
+<p>Neil Roberts (4):</p>
+<ul>
+  <li>i965/skl: Set the pulls bary bit in 3DSTATE_PS_EXTRA</li>
+  <li>glsl: Add missing check for whether an expression is an add operation</li>
+  <li>glsl: Make sure not to dereference NULL</li>
+  <li>i965: Don't try to print the GLSL IR if it has been freed</li>
+</ul>
+
+<p>Tapani Pälli (8):</p>
+<ul>
+  <li>glsl: clone inputs and outputs during linking</li>
+  <li>i965: Delete linked GLSL IR when using NIR.</li>
+  <li>glsl: Allow dynamic sampler array indexing with GLSL ES &lt; 3.00</li>
+  <li>mesa/glsl: new compiler option EmitNoIndirectSampler</li>
+  <li>i965: use EmitNoIndirectSampler for gen &lt; 7</li>
+  <li>i915: use EmitNoIndirectSampler</li>
+  <li>mesa/st: use EmitNoIndirectSampler if !ARB_gpu_shader5</li>
+  <li>glsl: validate sampler array indexing for 'constant-index-expression'</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/10.6.3.html
+++ b/docs/relnotes/10.6.3.html
@@ -0,0 +1,106 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.6.3 Release Notes / July 26, 2015</h1>
+
+<p>
+Mesa 10.6.3 is a bug fix release which fixes bugs found since the 10.6.2 release.
+</p>
+<p>
+Mesa 10.6.3 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+c27e1e33798e69a6d2d2425aee8ac7b4c0b243066a65dd76cbb182ea31b1c7f2  mesa-10.6.3.tar.gz
+58592e07c350cd2e8969b73fa83048c657a39fe2f13f3b88f5e5818fe2e4676d  mesa-10.6.3.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90728">Bug 90728</a> - dvd playback with vlc and vdpau causes segmentation fault</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91337">Bug 91337</a> - OSMesaGetProcAdress(&quot;OSMesaPixelStore&quot;) returns nil</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Brian Paul (1):</p>
+<ul>
+  <li>osmesa: fix OSMesaPixelsStore typo</li>
+</ul>
+
+<p>Chad Versace (1):</p>
+<ul>
+  <li>mesa: Fix generation of git_sha1.h.tmp for gitlinks</li>
+</ul>
+
+<p>Christian König (2):</p>
+<ul>
+  <li>vl: cleanup video buffer private when the decoder is destroyed</li>
+  <li>st/vdpau: fix mixer size checks</li>
+</ul>
+
+<p>Emil Velikov (3):</p>
+<ul>
+  <li>docs: Add sha256 checksums for the 10.6.2 release</li>
+  <li>auxiliary/vl: use the correct screen index</li>
+  <li>Update version to 10.6.3</li>
+</ul>
+
+<p>Francisco Jerez (1):</p>
+<ul>
+  <li>i965/gen9: Use custom MOCS entries set up by the kernel.</li>
+</ul>
+
+<p>Ilia Mirkin (5):</p>
+<ul>
+  <li>nv50, nvc0: enable at least one color RT if alphatest is enabled</li>
+  <li>nvc0/ir: fix txq on indirect samplers</li>
+  <li>nvc0/ir: don't worry about sampler in txq handling</li>
+  <li>gm107/ir: fix indirect txq emission</li>
+  <li>nv50: fix max level clamping on G80</li>
+</ul>
+
+<p>Kenneth Graunke (1):</p>
+<ul>
+  <li>program: Allow redundant OPTION ARB_fog_* directives.</li>
+</ul>
+
+<p>Rob Clark (1):</p>
+<ul>
+  <li>xa: don't leak fences</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/10.6.4.html
+++ b/docs/relnotes/10.6.4.html
@@ -0,0 +1,136 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.6.4 Release Notes / August 11, 2015</h1>
+
+<p>
+Mesa 10.6.4 is a bug fix release which fixes bugs found since the 10.6.3 release.
+</p>
+<p>
+Mesa 10.6.4 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+TBD
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=73512">Bug 73512</a> - [clover] mesa.icd. should contain full path</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91290">Bug 91290</a> - SIGSEGV glcpp/glcpp-parse.y:1077</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Anuj Phogat (6):</p>
+<ul>
+  <li>mesa: Turn get_readpixels_transfer_ops() in to a global function</li>
+  <li>meta: Fix transfer operations check in meta pbo path for readpixels</li>
+  <li>meta: Abort meta pbo path if readpixels need signed-unsigned conversion</li>
+  <li>meta: Don't do fragment color clamping in _mesa_meta_pbo_GetTexSubImage</li>
+  <li>mesa: Add a helper function _mesa_need_luminance_to_rgb_conversion()</li>
+  <li>meta: Fix reading luminance texture as rgba in _mesa_meta_pbo_GetTexSubImage()</li>
+</ul>
+
+<p>Ben Widawsky (1):</p>
+<ul>
+  <li>i965/skl: Add production thread counts and URB size</li>
+</ul>
+
+<p>Eduardo Lima Mitev (3):</p>
+<ul>
+  <li>mesa: Fix errors values returned by glShaderBinary()</li>
+  <li>mesa: Validate target before resolving tex obj in glTex(ture)SubImageXD</li>
+  <li>mesa: Fix error returned by glCopyTexImage2D() upon an invalid internal format</li>
+</ul>
+
+<p>Emil Velikov (6):</p>
+<ul>
+  <li>docs: Add checksums for mesa 10.6.3 tarballs</li>
+  <li>configure.ac: do not set HAVE_DRI(23) when libdrm is missing</li>
+  <li>egl/wayland: libdrm is a hard requirement, treat it as such</li>
+  <li>winsys/radeon: don't leak the fd when it is 0</li>
+  <li>bugzilla_mesa.sh: sort the bugs list by number</li>
+  <li>Update version to 10.6.4</li>
+</ul>
+
+<p>Francisco Jerez (1):</p>
+<ul>
+  <li>i965/fs: Fix fs_inst::regs_read() for sources in the ATTR file.</li>
+</ul>
+
+<p>Frank Binns (2):</p>
+<ul>
+  <li>egl/dri: Add error info needed for EGL_EXT_image_dma_buf_import extension</li>
+  <li>egl: Add eglQuerySurface surface type check for EGL_LARGEST_PBUFFER attrib</li>
+</ul>
+
+<p>Igor Gnatenko (1):</p>
+<ul>
+  <li>opencl: use versioned .so in mesa.icd</li>
+</ul>
+
+<p>Ilia Mirkin (1):</p>
+<ul>
+  <li>nvc0: fix geometry program revalidation of clipping params</li>
+</ul>
+
+<p>Kenneth Graunke (1):</p>
+<ul>
+  <li>glsl: Fix a bug where LHS swizzles of swizzles were too small.</li>
+</ul>
+
+<p>Marek Olšák (6):</p>
+<ul>
+  <li>st/mesa: don't call st_validate_state in BlitFramebuffer</li>
+  <li>radeonsi: upload shader rodata after updating scratch relocations</li>
+  <li>st/mesa: don't ignore texture buffer state changes</li>
+  <li>radeonsi: rework how shader pointers to descriptors are set</li>
+  <li>radeonsi: completely rework updating descriptors without CP DMA</li>
+  <li>r600g: fix the CB_SHADER_MASK setup</li>
+</ul>
+
+<p>Samuel Iglesias Gonsalvez (1):</p>
+<ul>
+  <li>glsl/glcpp: fix SIGSEGV when checking error condition for macro redefinition</li>
+</ul>
+
+<p>Samuel Pitoiset (1):</p>
+<ul>
+  <li>nv50: avoid segfault with enabled but unbound vertex attrib</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/include/GL/internal/dri_interface.h
+++ b/include/GL/internal/dri_interface.h
@@ -1180,7 +1180,8 @@ enum __DRIChromaSiting {
 };

 /**
- * \name Reasons that __DRIimageExtensionRec::createImageFromTexture might fail
+ * \name Reasons that __DRIimageExtensionRec::createImageFromTexture or
+ * __DRIimageExtensionRec::createImageFromDmaBufs might fail
 */
 /*@{*/
 /** Success! */
@@ -1189,11 +1190,14 @@ enum __DRIChromaSiting {
 /** Memory allocation failure */
 #define __DRI_IMAGE_ERROR_BAD_ALLOC     1

-/** Client requested an invalid attribute for a texture object  */
+/** Client requested an invalid attribute */
 #define __DRI_IMAGE_ERROR_BAD_MATCH     2

 /** Client requested an invalid texture object */
 #define __DRI_IMAGE_ERROR_BAD_PARAMETER 3
+
+/** Client requested an invalid pitch and/or offset */
+#define __DRI_IMAGE_ERROR_BAD_ACCESS    4
 /*@}*/

 /**
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -1510,6 +1510,10 @@ dri2_create_image_khr_texture_error(int dri_error)
      egl_error = EGL_BAD_PARAMETER;
      break;

+   case __DRI_IMAGE_ERROR_BAD_ACCESS:
+      egl_error = EGL_BAD_ACCESS;
+      break;
+
   default:
      assert(0);
      egl_error = EGL_BAD_MATCH;
@@ -2123,13 +2127,11 @@ dri2_bind_wayland_display_wl(_EGLDriver *drv, _EGLDisplay *disp,
   wl_drm_callbacks.authenticate =
      (int(*)(void *, uint32_t)) dri2_dpy->vtbl->authenticate;

-#ifdef HAVE_LIBDRM
   if (drmGetCap(dri2_dpy->fd, DRM_CAP_PRIME, &cap) == 0 &&
       cap == (DRM_PRIME_CAP_IMPORT | DRM_PRIME_CAP_EXPORT) &&
       dri2_dpy->image->base.version >= 7 &&
       dri2_dpy->image->createImageFromFds != NULL)
      flags |= WAYLAND_DRM_PRIME;
-#endif

   dri2_dpy->wl_server_drm =
 	   wayland_drm_init(wl_dpy, dri2_dpy->device_name,
--- a/src/egl/main/eglsurface.c
+++ b/src/egl/main/eglsurface.c
@@ -309,7 +309,8 @@ _eglQuerySurface(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surface,
      *value = surface->Config->ConfigID;
      break;
   case EGL_LARGEST_PBUFFER:
-      *value = surface->LargestPbuffer;
+      if (surface->Type == EGL_PBUFFER_BIT)
+         *value = surface->LargestPbuffer;
      break;
   case EGL_TEXTURE_FORMAT:
      /* texture attributes: only for pbuffers, no error otherwise */
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -84,6 +84,9 @@ static const unsigned const_empty_block_mask_420[3][2][2] = {

 struct video_buffer_private
 {
+   struct list_head list;
+   struct pipe_video_buffer *video_buffer;
+
   struct pipe_sampler_view *sampler_view_planes[VL_NUM_COMPONENTS];
   struct pipe_surface      *surfaces[VL_MAX_SURFACES];

@@ -99,6 +102,8 @@ destroy_video_buffer_private(void *private)
   struct video_buffer_private *priv = private;
   unsigned i;

+   list_del(&priv->list);
+
   for (i = 0; i < VL_NUM_COMPONENTS; ++i)
      pipe_sampler_view_reference(&priv->sampler_view_planes[i], NULL);

@@ -126,6 +131,9 @@ get_video_buffer_private(struct vl_mpeg12_decoder *dec, struct pipe_video_buffer

   priv = CALLOC_STRUCT(video_buffer_private);

+   list_add(&priv->list, &dec->buffer_privates);
+   priv->video_buffer = buf;
+
   sv = buf->get_sampler_view_planes(buf);
   for (i = 0; i < VL_NUM_COMPONENTS; ++i)
      if (sv[i])
@@ -141,6 +149,18 @@ get_video_buffer_private(struct vl_mpeg12_decoder *dec, struct pipe_video_buffer
   return priv;
 }

+static void
+free_video_buffer_privates(struct vl_mpeg12_decoder *dec)
+{
+   struct video_buffer_private *priv, *next;
+
+   LIST_FOR_EACH_ENTRY_SAFE(priv, next, &dec->buffer_privates, list) {
+      struct pipe_video_buffer *buf = priv->video_buffer;
+
+      vl_video_buffer_set_associated_data(buf, &dec->base, NULL, NULL);
+   }
+}
+
 static bool
 init_zscan_buffer(struct vl_mpeg12_decoder *dec, struct vl_mpeg12_buffer *buffer)
 {
@@ -464,6 +484,8 @@ vl_mpeg12_destroy(struct pipe_video_codec *decoder)

   assert(decoder);

+   free_video_buffer_privates(dec);
+
   /* Asserted in softpipe_delete_fs_state() for some reason */
   dec->context->bind_vs_state(dec->context, NULL);
   dec->context->bind_fs_state(dec->context, NULL);
@@ -1187,6 +1209,8 @@ vl_create_mpeg12_decoder(struct pipe_context *context,
   if (!init_pipe_state(dec))
      goto error_pipe_state;

+   list_inithead(&dec->buffer_privates);
+
   return &dec->base;

 error_pipe_state:
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
@@ -30,6 +30,8 @@

 #include "pipe/p_video_codec.h"

+#include "util/list.h"
+
 #include "vl_mpeg12_bitstream.h"
 #include "vl_zscan.h"
 #include "vl_idct.h"
@@ -77,6 +79,8 @@ struct vl_mpeg12_decoder

   unsigned current_buffer;
   struct vl_mpeg12_buffer *dec_buffers[4];
+
+   struct list_head buffer_privates;
 };

 struct vl_mpeg12_buffer
--- a/src/gallium/auxiliary/vl/vl_winsys_dri.c
+++ b/src/gallium/auxiliary/vl/vl_winsys_dri.c
@@ -293,6 +293,16 @@ vl_screen_get_private(struct vl_screen *vscreen)
   return vscreen;
 }

+static xcb_screen_t *
+get_xcb_screen(xcb_screen_iterator_t iter, int screen)
+{
+    for (; iter.rem; --screen, xcb_screen_next(&iter))
+        if (screen == 0)
+            return iter.data;
+
+    return NULL;
+}
+
 struct vl_screen*
 vl_screen_create(Display *display, int screen)
 {
@@ -334,8 +344,7 @@ vl_screen_create(Display *display, int screen)
      goto free_query;

   s = xcb_setup_roots_iterator(xcb_get_setup(scrn->conn));
-   while (screen--)
-	xcb_screen_next(&s);
+
   driverType = XCB_DRI2_DRIVER_TYPE_DRI;
 #ifdef DRI2DriverPrimeShift
   {
@@ -351,7 +360,7 @@ vl_screen_create(Display *display, int screen)
   }
 #endif

-   connect_cookie = xcb_dri2_connect_unchecked(scrn->conn, s.data->root, driverType);
+   connect_cookie = xcb_dri2_connect_unchecked(scrn->conn, get_xcb_screen(s, screen)->root, driverType);
   connect = xcb_dri2_connect_reply(scrn->conn, connect_cookie, NULL);
   if (connect == NULL || connect->driver_name_length + connect->device_name_length == 0)
      goto free_connect;
@@ -370,7 +379,7 @@ vl_screen_create(Display *display, int screen)
   if (drmGetMagic(fd, &magic))
      goto free_connect;

-   authenticate_cookie = xcb_dri2_authenticate_unchecked(scrn->conn, s.data->root, magic);
+   authenticate_cookie = xcb_dri2_authenticate_unchecked(scrn->conn, get_xcb_screen(s, screen)->root, magic);
   authenticate = xcb_dri2_authenticate_reply(scrn->conn, authenticate_cookie, NULL);

   if (authenticate == NULL || !authenticate->authenticated)
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_bb.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_bb.cpp
@@ -332,6 +332,9 @@ BasicBlock::splitBefore(Instruction *insn, bool attach)
   BasicBlock *bb = new BasicBlock(func);
   assert(!insn || insn->op != OP_PHI);

+   bb->joinAt = joinAt;
+   joinAt = NULL;
+
   splitCommon(insn, bb, attach);
   return bb;
 }
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -2437,8 +2437,14 @@ CodeEmitterGM107::emitTXQ()
      break;
   }

-   emitInsn (0xdf4a0000);
-   emitField(0x24, 13, insn->tex.r);
+   if (insn->tex.rIndirectSrc >= 0) {
+      emitInsn (0xdf500000);
+   } else {
+      emitInsn (0xdf480000);
+      emitField(0x24, 13, insn->tex.r);
+   }
+
+   emitField(0x31, 1, insn->tex.liveOnly);
   emitField(0x1f, 4, insn->tex.mask);
   emitField(0x16, 6, type);
   emitGPR  (0x08, insn->src(0));
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
@@ -499,10 +499,14 @@ CodeEmitterNV50::emitForm_MAD(const Instruction *i)
   setSrc(i, 2, 2);

   if (i->getIndirect(0, 0)) {
-      assert(!i->getIndirect(1, 0));
+      assert(!i->srcExists(1) || !i->getIndirect(1, 0));
+      assert(!i->srcExists(2) || !i->getIndirect(2, 0));
      setAReg16(i, 0);
-   } else {
+   } else if (i->srcExists(1) && i->getIndirect(1, 0)) {
+      assert(!i->srcExists(2) || !i->getIndirect(2, 0));
      setAReg16(i, 1);
+   } else {
+      setAReg16(i, 2);
   }
 }

@@ -546,7 +550,7 @@ CodeEmitterNV50::emitForm_MUL(const Instruction *i)
 }

 // usual immediate form
-// - 1 to 3 sources where last is immediate (rir, gir)
+// - 1 to 3 sources where second is immediate (rir, gir)
 // - no address or predicate possible
 void
 CodeEmitterNV50::emitForm_IMM(const Instruction *i)
@@ -562,7 +566,7 @@ CodeEmitterNV50::emitForm_IMM(const Instruction *i)
   if (Target::operationSrcNr[i->op] > 1) {
      setSrc(i, 0, 0);
      setImmediate(i, 1);
-      setSrc(i, 2, 1);
+      // If there is another source, it has to be the same as the dest reg.
   } else {
      setImmediate(i, 0);
   }
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -434,7 +434,6 @@ nv50_ir::DataType Instruction::inferSrcType() const
   case TGSI_OPCODE_USLT:
   case TGSI_OPCODE_USNE:
   case TGSI_OPCODE_USHR:
-   case TGSI_OPCODE_UCMP:
   case TGSI_OPCODE_ATOMUADD:
   case TGSI_OPCODE_ATOMXCHG:
   case TGSI_OPCODE_ATOMCAS:
@@ -1698,6 +1697,7 @@ Converter::insertConvergenceOps(BasicBlock *conv, BasicBlock *fork)
   join->fixed = 1;
   conv->insertHead(join);

+   assert(!fork->joinAt);
   fork->joinAt = new_FlowInstruction(func, OP_JOINAT, conv);
   fork->insertBefore(fork->getExit(), fork->joinAt);
 }
@@ -1739,7 +1739,7 @@ Converter::handleTXQ(Value *dst0[4], enum TexQuery query)
   }
   tex->setSrc((c = 0), fetchSrc(0, 0)); // mip level

-   setTexRS(tex, c, 1, -1);
+   setTexRS(tex, ++c, 1, -1);

   bb->insertTail(tex);
 }
@@ -2580,6 +2580,8 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
      }
      break;
   case TGSI_OPCODE_UCMP:
+      srcTy = TYPE_U32;
+      /* fallthrough */
   case TGSI_OPCODE_CMP:
      FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
         src0 = fetchSrc(0, c);
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
@@ -871,6 +871,7 @@ NV50LoweringPreSSA::handleTXL(TexInstruction *i)
   BasicBlock *joinBB = i->bb->splitAfter(i);

   bld.setPosition(currBB, true);
+   assert(!currBB->joinAt);
   currBB->joinAt = bld.mkFlow(OP_JOINAT, joinBB, CC_ALWAYS, NULL);

   for (int l = 0; l <= 3; ++l) {
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -956,7 +956,47 @@ NVC0LoweringPass::handleTXD(TexInstruction *txd)
 bool
 NVC0LoweringPass::handleTXQ(TexInstruction *txq)
 {
-   // TODO: indirect resource/sampler index
+   if (txq->tex.rIndirectSrc < 0)
+      return true;
+
+   Value *ticRel = txq->getIndirectR();
+   const int chipset = prog->getTarget()->getChipset();
+
+   txq->setIndirectS(NULL);
+   txq->tex.sIndirectSrc = -1;
+
+   assert(ticRel);
+
+   if (chipset < NVISA_GK104_CHIPSET) {
+      LValue *src = new_LValue(func, FILE_GPR); // 0xttxsaaaa
+
+      txq->setSrc(txq->tex.rIndirectSrc, NULL);
+      if (txq->tex.r)
+         ticRel = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getScratch(),
+                             ticRel, bld.mkImm(txq->tex.r));
+
+      bld.mkOp2(OP_SHL, TYPE_U32, src, ticRel, bld.mkImm(0x17));
+
+      txq->moveSources(0, 1);
+      txq->setSrc(0, src);
+   } else {
+      Value *hnd = loadTexHandle(
+            bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
+                       txq->getIndirectR(), bld.mkImm(2)),
+            txq->tex.r);
+      txq->tex.r = 0xff;
+      txq->tex.s = 0x1f;
+
+      if (chipset < NVISA_GM107_CHIPSET) {
+         txq->setIndirectR(NULL);
+         txq->moveSources(0, 1);
+         txq->setSrc(0, hnd);
+         txq->tex.rIndirectSrc = 0;
+      } else {
+         txq->setIndirectR(hnd);
+      }
+   }
+
   return true;
 }

--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -601,9 +601,12 @@ ConstantFolding::expr(Instruction *i,
   case OP_FMA: {
      i->op = OP_ADD;

+      /* Move the immediate to the second arg, otherwise the ADD operation
+       * won't be emittable
+       */
      i->setSrc(1, i->getSrc(0));
-      i->src(1).mod = i->src(2).mod;
      i->setSrc(0, i->getSrc(2));
+      i->src(0).mod = i->src(2).mod;
      i->setSrc(2, NULL);

      ImmediateValue src0;
--- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
@@ -296,6 +296,23 @@ nv50_check_program_ucps(struct nv50_context *nv50,
   nv50_fp_linkage_validate(nv50);
 }

+/* alpha test is disabled if there are no color RTs, so make sure we have at
+ * least one if alpha test is enabled. Note that this must run after
+ * nv50_validate_fb, otherwise that will override the RT count setting.
+ */
+static void
+nv50_validate_derived_2(struct nv50_context *nv50)
+{
+   struct nouveau_pushbuf *push = nv50->base.pushbuf;
+
+   if (nv50->zsa && nv50->zsa->pipe.alpha.enabled &&
+       nv50->framebuffer.nr_cbufs == 0) {
+      nv50_fb_set_null_rt(push, 0);
+      BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1);
+      PUSH_DATA (push, (076543210 << 4) | 1);
+   }
+}
+
 static void
 nv50_validate_clip(struct nv50_context *nv50)
 {
@@ -456,6 +473,7 @@ static struct state_validate {
    { nv50_gp_linkage_validate,    NV50_NEW_GMTYPROG | NV50_NEW_VERTPROG },
    { nv50_validate_derived_rs,    NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER |
                                   NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
+    { nv50_validate_derived_2,     NV50_NEW_ZSA | NV50_NEW_FRAMEBUFFER },
    { nv50_validate_clip,          NV50_NEW_CLIP | NV50_NEW_RASTERIZER |
                                   NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
    { nv50_constbufs_validate,     NV50_NEW_CONSTBUF },
--- a/src/gallium/drivers/nouveau/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_tex.c
@@ -71,6 +71,7 @@ nv50_create_texture_view(struct pipe_context *pipe,
                         uint32_t flags,
                         enum pipe_texture_target target)
 {
+   const uint32_t class_3d = nouveau_context(pipe)->screen->class_3d;
   const struct util_format_description *desc;
   uint64_t addr;
   uint32_t *tic;
@@ -201,11 +202,17 @@ nv50_create_texture_view(struct pipe_context *pipe,

   tic[5] = (mt->base.base.height0 << mt->ms_y) & 0xffff;
   tic[5] |= depth << 16;
-   tic[5] |= mt->base.base.last_level << NV50_TIC_5_LAST_LEVEL__SHIFT;
+   if (class_3d > NV50_3D_CLASS)
+      tic[5] |= mt->base.base.last_level << NV50_TIC_5_LAST_LEVEL__SHIFT;
+   else
+      tic[5] |= view->pipe.u.tex.last_level << NV50_TIC_5_LAST_LEVEL__SHIFT;

   tic[6] = (mt->ms_x > 1) ? 0x88000000 : 0x03000000; /* sampling points */

-   tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;
+   if (class_3d > NV50_3D_CLASS)
+      tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;
+   else
+      tic[7] = 0;

   if (unlikely(!(tic[2] & NV50_TIC_2_NORMALIZED_COORDS)))
      if (mt->base.base.last_level)
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -382,6 +382,11 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
      if (nv50->vbo_user & (1 << b)) {
         address = addrs[b] + ve->pipe.src_offset;
         limit = addrs[b] + limits[b];
+      } else
+      if (!vb->buffer) {
+         BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FETCH(i)), 1);
+         PUSH_DATA (push, 0);
+         continue;
      } else {
         struct nv04_resource *buf = nv04_resource(vb->buffer);
         if (!(refd & (1 << b))) {
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -339,7 +339,7 @@ nvc0_check_program_ucps(struct nvc0_context *nvc0,
      nvc0_vertprog_validate(nvc0);
   else
   if (likely(vp == nvc0->gmtyprog))
-      nvc0_vertprog_validate(nvc0);
+      nvc0_gmtyprog_validate(nvc0);
   else
      nvc0_tevlprog_validate(nvc0);
 }
@@ -535,6 +535,23 @@ nvc0_validate_derived_1(struct nvc0_context *nvc0)
   }
 }

+/* alpha test is disabled if there are no color RTs, so make sure we have at
+ * least one if alpha test is enabled. Note that this must run after
+ * nvc0_validate_fb, otherwise that will override the RT count setting.
+ */
+static void
+nvc0_validate_derived_2(struct nvc0_context *nvc0)
+{
+   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+
+   if (nvc0->zsa && nvc0->zsa->pipe.alpha.enabled &&
+       nvc0->framebuffer.nr_cbufs == 0) {
+      nvc0_fb_set_null_rt(push, 0);
+      BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
+      PUSH_DATA (push, (076543210 << 4) | 1);
+   }
+}
+
 static void
 nvc0_switch_pipe_context(struct nvc0_context *ctx_to)
 {
@@ -597,6 +614,7 @@ static struct state_validate {
    { nvc0_fragprog_validate,      NVC0_NEW_FRAGPROG },
    { nvc0_validate_derived_1,     NVC0_NEW_FRAGPROG | NVC0_NEW_ZSA |
                                   NVC0_NEW_RASTERIZER },
+    { nvc0_validate_derived_2,     NVC0_NEW_ZSA | NVC0_NEW_FRAMEBUFFER },
    { nvc0_validate_clip,          NVC0_NEW_CLIP | NVC0_NEW_RASTERIZER |
                                   NVC0_NEW_VERTPROG |
                                   NVC0_NEW_TEVLPROG |
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -1376,6 +1376,7 @@ static void
 nvc0_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
 {
   struct nvc0_context *nvc0 = nvc0_context(pipe);
+   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
   boolean eng3d = FALSE;

   if (util_format_is_depth_or_stencil(info->dst.resource->format)) {
@@ -1439,11 +1440,17 @@ nvc0_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
        info->src.box.height != -info->dst.box.height))
      eng3d = TRUE;

+   if (nvc0->screen->num_occlusion_queries_active)
+      IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 0);
+
   if (!eng3d)
      nvc0_blit_eng2d(nvc0, info);
   else
      nvc0_blit_3d(nvc0, info);

+   if (nvc0->screen->num_occlusion_queries_active)
+      IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 1);
+
   NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_blit_count, 1);
 }

--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1732,10 +1732,10 @@ static void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_

 	r600_write_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2);
 	radeon_emit(cs, a->blend_colormask & fb_colormask); /* R_028238_CB_TARGET_MASK */
-	/* Always enable the first colorbuffer in CB_SHADER_MASK. This
-	 * will assure that the alpha-test will work even if there is
-	 * no colorbuffer bound. */
-	radeon_emit(cs, 0xf | (a->dual_src_blend ? ps_colormask : 0) | fb_colormask); /* R_02823C_CB_SHADER_MASK */
+	/* This must match the used export instructions exactly.
+	 * Other values may lead to undefined behavior and hangs.
+	 */
+	radeon_emit(cs, ps_colormask); /* R_02823C_CB_SHADER_MASK */
 }

 static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom *atom)
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -393,7 +393,12 @@ static void r600_clear(struct pipe_context *ctx, unsigned buffers,
 	struct r600_context *rctx = (struct r600_context *)ctx;
 	struct pipe_framebuffer_state *fb = &rctx->framebuffer.state;

-	if (buffers & PIPE_CLEAR_COLOR && rctx->b.chip_class >= EVERGREEN) {
+	/* Single-sample fast color clear is broken on r600g:
+	 *   https://bugs.freedesktop.org/show_bug.cgi?id=73528
+	 *   https://bugs.freedesktop.org/show_bug.cgi?id=82186
+	 */
+	if (buffers & PIPE_CLEAR_COLOR && rctx->b.chip_class >= EVERGREEN &&
+	    rctx->framebuffer.nr_samples > 1) {
 		evergreen_do_fast_color_clear(&rctx->b, fb, &rctx->framebuffer.atom,
 					      &buffers, color);
 	}
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -2482,6 +2482,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 			output[j].array_base = 0;
 			output[j].op = CF_OP_EXPORT;
 			j++;
+			shader->nr_ps_color_exports++;
 		}

 		noutput = j;
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -24,14 +24,23 @@
 *      Marek Olšák <marek.olsak@amd.com>
 */

-/* Resource binding slots and sampler states (each described with 8 or 4 dwords)
- * live in memory on SI.
+/* Resource binding slots and sampler states (each described with 8 or
+ * 4 dwords) are stored in lists in memory which is accessed by shaders
+ * using scalar load instructions.
 *
- * This file is responsible for managing lists of resources and sampler states
- * in memory and binding them, which means updating those structures in memory.
+ * This file is responsible for managing such lists. It keeps a copy of all
+ * descriptors in CPU memory and re-uploads a whole list if some slots have
+ * been changed.
 *
- * There is also code for updating shader pointers to resources and sampler
- * states. CP DMA functions are here too.
+ * This code is also reponsible for updating shader pointers to those lists.
+ *
+ * Note that CP DMA can't be used for updating the lists, because a GPU hang
+ * could leave the list in a mid-IB state and the next IB would get wrong
+ * descriptors and the whole context would be unusable at that point.
+ * (Note: The register shadowing can't be used due to the same reason)
+ *
+ * Also, uploading descriptors to newly allocated memory doesn't require
+ * a KCACHE flush.
 */

 #include "radeon/r600_cs.h"
@@ -42,7 +51,6 @@
 #include "util/u_memory.h"
 #include "util/u_upload_mgr.h"

-#define SI_NUM_CONTEXTS 16

 /* NULL image and buffer descriptor.
 *
@@ -139,209 +147,62 @@ static void si_emit_cp_dma_clear_buffer(struct si_context *sctx,
 	}
 }

-static void si_init_descriptors(struct si_context *sctx,
-				struct si_descriptors *desc,
-				unsigned shader_userdata_reg,
+static void si_init_descriptors(struct si_descriptors *desc,
+				unsigned shader_userdata_index,
 				unsigned element_dw_size,
-				unsigned num_elements,
-				void (*emit_func)(struct si_context *ctx, struct r600_atom *state))
+				unsigned num_elements)
 {
-	assert(num_elements <= sizeof(desc->enabled_mask)*8);
-	assert(num_elements <= sizeof(desc->dirty_mask)*8);
+	int i;

-	desc->atom.emit = (void*)emit_func;
-	desc->shader_userdata_reg = shader_userdata_reg;
+	assert(num_elements <= sizeof(desc->enabled_mask)*8);
+
+	desc->list = CALLOC(num_elements, element_dw_size * 4);
 	desc->element_dw_size = element_dw_size;
 	desc->num_elements = num_elements;
-	desc->context_size = num_elements * element_dw_size * 4;
+	desc->list_dirty = true; /* upload the list before the next draw */
+	desc->shader_userdata_offset = shader_userdata_index * 4;

-	desc->buffer = (struct r600_resource*)
-		pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
-				   PIPE_USAGE_DEFAULT,
-				   SI_NUM_CONTEXTS * desc->context_size);
-
-	r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, desc->buffer,
-			      RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA);
-
-	/* We don't check for CS space here, because this should be called
-	 * only once at context initialization. */
-	si_emit_cp_dma_clear_buffer(sctx, desc->buffer->gpu_address,
-				    desc->buffer->b.b.width0, 0,
-				    R600_CP_DMA_SYNC | CIK_CP_DMA_USE_L2);
+	/* Initialize the array to NULL descriptors if the element size is 8. */
+	if (element_dw_size == 8)
+		for (i = 0; i < num_elements; i++)
+			memcpy(desc->list + i*element_dw_size, null_descriptor,
+			       sizeof(null_descriptor));
 }

 static void si_release_descriptors(struct si_descriptors *desc)
 {
 	pipe_resource_reference((struct pipe_resource**)&desc->buffer, NULL);
+	FREE(desc->list);
 }

-static void si_update_descriptors(struct si_context *sctx,
+static bool si_upload_descriptors(struct si_context *sctx,
 				  struct si_descriptors *desc)
 {
-	if (desc->dirty_mask) {
-		desc->atom.num_dw =
-			7 + /* copy */
-			(4 + desc->element_dw_size) * util_bitcount64(desc->dirty_mask) + /* update */
-			4; /* pointer update */
+	unsigned list_size = desc->num_elements * desc->element_dw_size * 4;
+	void *ptr;

-		if (desc->shader_userdata_reg >= R_00B130_SPI_SHADER_USER_DATA_VS_0 &&
-		    desc->shader_userdata_reg < R_00B230_SPI_SHADER_USER_DATA_GS_0)
-			desc->atom.num_dw += 4; /* second pointer update */
+	if (!desc->list_dirty)
+		return true;

-		desc->atom.dirty = true;
+	u_upload_alloc(sctx->b.uploader, 0, list_size,
+		       &desc->buffer_offset,
+		       (struct pipe_resource**)&desc->buffer, &ptr);
+	if (!desc->buffer)
+		return false; /* skip the draw call */

-		/* TODO: Investigate if these flushes can be removed after
-		 * adding CE support. */
+	util_memcpy_cpu_to_le32(ptr, desc->list, list_size);

-		/* The descriptors are read with the K cache. */
-		sctx->b.flags |= SI_CONTEXT_INV_KCACHE;
+	r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, desc->buffer,
+			      RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA);

-		/* Since SI uses uncached CP DMA to update descriptors,
-		 * we have to flush TC L2, which is used to fetch constants
-		 * along with KCACHE. */
-		if (sctx->b.chip_class == SI)
-			sctx->b.flags |= SI_CONTEXT_INV_TC_L2;
-	} else {
-		desc->atom.dirty = false;
-	}
-}
-
-static void si_emit_shader_pointer(struct si_context *sctx,
-				   struct r600_atom *atom)
-{
-	struct si_descriptors *desc = (struct si_descriptors*)atom;
-	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
-	uint64_t va = desc->buffer->gpu_address +
-		      desc->current_context_id * desc->context_size +
-		      desc->buffer_offset;
-
-	radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 2, 0));
-	radeon_emit(cs, (desc->shader_userdata_reg - SI_SH_REG_OFFSET) >> 2);
-	radeon_emit(cs, va);
-	radeon_emit(cs, va >> 32);
-
-	if (desc->shader_userdata_reg >= R_00B130_SPI_SHADER_USER_DATA_VS_0 &&
-	    desc->shader_userdata_reg < R_00B230_SPI_SHADER_USER_DATA_GS_0) {
-		radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 2, 0));
-		radeon_emit(cs, (desc->shader_userdata_reg +
-				 (R_00B330_SPI_SHADER_USER_DATA_ES_0 -
-				  R_00B130_SPI_SHADER_USER_DATA_VS_0) -
-				 SI_SH_REG_OFFSET) >> 2);
-		radeon_emit(cs, va);
-		radeon_emit(cs, va >> 32);
-	}
-}
-
-static void si_emit_descriptors(struct si_context *sctx,
-				struct si_descriptors *desc,
-				uint32_t **descriptors)
-{
-	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
-	uint64_t va_base;
-	int packet_start = 0;
-	int packet_size = 0;
-	int last_index = desc->num_elements; /* point to a non-existing element */
-	uint64_t dirty_mask = desc->dirty_mask;
-	unsigned new_context_id = (desc->current_context_id + 1) % SI_NUM_CONTEXTS;
-
-	assert(dirty_mask);
-
-	va_base = desc->buffer->gpu_address;
-
-	/* Copy the descriptors to a new context slot. */
-	si_emit_cp_dma_copy_buffer(sctx,
-				   va_base + new_context_id * desc->context_size,
-				   va_base + desc->current_context_id * desc->context_size,
-				   desc->context_size, R600_CP_DMA_SYNC | CIK_CP_DMA_USE_L2);
-
-	va_base += new_context_id * desc->context_size;
-
-	/* Update the descriptors.
-	 * Updates of consecutive descriptors are merged to one WRITE_DATA packet.
-	 *
-	 * XXX When unbinding lots of resources, consider clearing the memory
-	 *     with CP DMA instead of emitting zeros.
-	 */
-	while (dirty_mask) {
-		int i = u_bit_scan64(&dirty_mask);
-
-		assert(i < desc->num_elements);
-
-		if (last_index+1 == i && packet_size) {
-			/* Append new data at the end of the last packet. */
-			packet_size += desc->element_dw_size;
-			cs->buf[packet_start] = PKT3(PKT3_WRITE_DATA, packet_size, 0);
-		} else {
-			/* Start a new packet. */
-			uint64_t va = va_base + i * desc->element_dw_size * 4;
-
-			packet_start = cs->cdw;
-			packet_size = 2 + desc->element_dw_size;
-
-			radeon_emit(cs, PKT3(PKT3_WRITE_DATA, packet_size, 0));
-			radeon_emit(cs, PKT3_WRITE_DATA_DST_SEL(sctx->b.chip_class == SI ?
-						PKT3_WRITE_DATA_DST_SEL_MEM_SYNC :
-						PKT3_WRITE_DATA_DST_SEL_TC_L2) |
-					     PKT3_WRITE_DATA_WR_CONFIRM |
-					     PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME));
-			radeon_emit(cs, va & 0xFFFFFFFFUL);
-			radeon_emit(cs, (va >> 32UL) & 0xFFFFFFFFUL);
-		}
-
-		radeon_emit_array(cs, descriptors[i], desc->element_dw_size);
-
-		last_index = i;
-	}
-
-	desc->dirty_mask = 0;
-	desc->current_context_id = new_context_id;
-
-	/* Now update the shader userdata pointer. */
-	si_emit_shader_pointer(sctx, &desc->atom);
-}
-
-static unsigned si_get_shader_user_data_base(unsigned shader)
-{
-	switch (shader) {
-	case PIPE_SHADER_VERTEX:
-		return R_00B130_SPI_SHADER_USER_DATA_VS_0;
-	case PIPE_SHADER_GEOMETRY:
-		return R_00B230_SPI_SHADER_USER_DATA_GS_0;
-	case PIPE_SHADER_FRAGMENT:
-		return R_00B030_SPI_SHADER_USER_DATA_PS_0;
-	default:
-		assert(0);
-		return 0;
-	}
+	desc->list_dirty = false;
+	desc->pointer_dirty = true;
+	sctx->shader_userdata.atom.dirty = true;
+	return true;
 }

 /* SAMPLER VIEWS */

-static void si_emit_sampler_views(struct si_context *sctx, struct r600_atom *atom)
-{
-	struct si_sampler_views *views = (struct si_sampler_views*)atom;
-
-	si_emit_descriptors(sctx, &views->desc, views->desc_data);
-}
-
-static void si_init_sampler_views(struct si_context *sctx,
-				  struct si_sampler_views *views,
-				  unsigned shader)
-{
-	int i;
-
-	si_init_descriptors(sctx, &views->desc,
-			    si_get_shader_user_data_base(shader) +
-			    SI_SGPR_RESOURCE * 4,
-			    8, SI_NUM_SAMPLER_VIEWS, si_emit_sampler_views);
-
-	for (i = 0; i < views->desc.num_elements; i++) {
-		views->desc_data[i] = null_descriptor;
-		views->desc.dirty_mask |= 1llu << i;
-	}
-	si_update_descriptors(sctx, &views->desc);
-}
-
 static void si_release_sampler_views(struct si_sampler_views *views)
 {
 	int i;
@@ -382,10 +243,10 @@ static void si_sampler_views_begin_new_cs(struct si_context *sctx,
 				      si_get_resource_ro_priority(rview->resource));
 	}

+	if (!views->desc.buffer)
+		return;
 	r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, views->desc.buffer,
 			      RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA);
-
-	si_emit_shader_pointer(sctx, &views->desc.atom);
 }

 static void si_set_sampler_view(struct si_context *sctx, unsigned shader,
@@ -406,17 +267,16 @@ static void si_set_sampler_view(struct si_context *sctx, unsigned shader,
 				rview->resource, RADEON_USAGE_READ,
 				si_get_resource_ro_priority(rview->resource));

-
 		pipe_sampler_view_reference(&views->views[slot], view);
-		views->desc_data[slot] = view_desc;
+		memcpy(views->desc.list + slot*8, view_desc, 8*4);
 		views->desc.enabled_mask |= 1llu << slot;
 	} else {
 		pipe_sampler_view_reference(&views->views[slot], NULL);
-		views->desc_data[slot] = null_descriptor;
+		memcpy(views->desc.list + slot*8, null_descriptor, 8*4);
 		views->desc.enabled_mask &= ~(1llu << slot);
 	}

-	views->desc.dirty_mask |= 1llu << slot;
+	views->desc.list_dirty = true;
 }

 static void si_set_sampler_views(struct pipe_context *ctx,
@@ -475,25 +335,17 @@ static void si_set_sampler_views(struct pipe_context *ctx,
 					    NULL, NULL);
 		}
 	}
-
-	si_update_descriptors(sctx, &samplers->views.desc);
 }

 /* SAMPLER STATES */

-static void si_emit_sampler_states(struct si_context *sctx, struct r600_atom *atom)
-{
-	struct si_sampler_states *states = (struct si_sampler_states*)atom;
-
-	si_emit_descriptors(sctx, &states->desc, states->desc_data);
-}
-
 static void si_sampler_states_begin_new_cs(struct si_context *sctx,
 					   struct si_sampler_states *states)
 {
+	if (!states->desc.buffer)
+		return;
 	r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, states->desc.buffer,
 			      RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA);
-	si_emit_shader_pointer(sctx, &states->desc.atom);
 }

 void si_set_sampler_descriptors(struct si_context *sctx, unsigned shader,
@@ -513,66 +365,39 @@ void si_set_sampler_descriptors(struct si_context *sctx, unsigned shader,
 	for (i = 0; i < count; i++) {
 		unsigned slot = start + i;

-		if (!sstates[i]) {
-			samplers->desc.dirty_mask &= ~(1llu << slot);
+		if (!sstates[i])
 			continue;
-		}

-		samplers->desc_data[slot] = sstates[i]->val;
-		samplers->desc.dirty_mask |= 1llu << slot;
+		memcpy(samplers->desc.list + slot*4, sstates[i]->val, 4*4);
+		samplers->desc.list_dirty = true;
 	}
-
-	si_update_descriptors(sctx, &samplers->desc);
 }

 /* BUFFER RESOURCES */

-static void si_emit_buffer_resources(struct si_context *sctx, struct r600_atom *atom)
-{
-	struct si_buffer_resources *buffers = (struct si_buffer_resources*)atom;
-
-	si_emit_descriptors(sctx, &buffers->desc, buffers->desc_data);
-}
-
-static void si_init_buffer_resources(struct si_context *sctx,
-				     struct si_buffer_resources *buffers,
-				     unsigned num_buffers, unsigned shader,
+static void si_init_buffer_resources(struct si_buffer_resources *buffers,
+				     unsigned num_buffers,
 				     unsigned shader_userdata_index,
 				     enum radeon_bo_usage shader_usage,
 				     enum radeon_bo_priority priority)
 {
-	int i;
-
-	buffers->num_buffers = num_buffers;
 	buffers->shader_usage = shader_usage;
 	buffers->priority = priority;
 	buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource*));
-	buffers->desc_storage = CALLOC(num_buffers, sizeof(uint32_t) * 4);

-	/* si_emit_descriptors only accepts an array of arrays.
-	 * This adds such an array. */
-	buffers->desc_data = CALLOC(num_buffers, sizeof(uint32_t*));
-	for (i = 0; i < num_buffers; i++) {
-		buffers->desc_data[i] = &buffers->desc_storage[i*4];
-	}
-
-	si_init_descriptors(sctx, &buffers->desc,
-			    si_get_shader_user_data_base(shader) +
-			    shader_userdata_index*4, 4, num_buffers,
-			    si_emit_buffer_resources);
+	si_init_descriptors(&buffers->desc, shader_userdata_index, 4,
+			    num_buffers);
 }

 static void si_release_buffer_resources(struct si_buffer_resources *buffers)
 {
 	int i;

-	for (i = 0; i < buffers->num_buffers; i++) {
+	for (i = 0; i < buffers->desc.num_elements; i++) {
 		pipe_resource_reference(&buffers->buffers[i], NULL);
 	}

 	FREE(buffers->buffers);
-	FREE(buffers->desc_storage);
-	FREE(buffers->desc_data);
 	si_release_descriptors(&buffers->desc);
 }

@@ -590,11 +415,11 @@ static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
 				      buffers->shader_usage, buffers->priority);
 	}

+	if (!buffers->desc.buffer)
+		return;
 	r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
 			      buffers->desc.buffer, RADEON_USAGE_READWRITE,
 			      RADEON_PRIO_SHADER_DATA);
-
-	si_emit_shader_pointer(sctx, &buffers->desc.atom);
 }

 /* VERTEX BUFFERS */
@@ -617,14 +442,15 @@ static void si_vertex_buffers_begin_new_cs(struct si_context *sctx)
 				      (struct r600_resource*)sctx->vertex_buffer[vb].buffer,
 				      RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO);
 	}
+
+	if (!desc->buffer)
+		return;
 	r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
 			      desc->buffer, RADEON_USAGE_READ,
 			      RADEON_PRIO_SHADER_DATA);
-
-	si_emit_shader_pointer(sctx, &desc->atom);
 }

-void si_update_vertex_buffers(struct si_context *sctx)
+static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
 {
 	struct si_descriptors *desc = &sctx->vertex_buffers;
 	bool bound[SI_NUM_VERTEX_BUFFERS] = {};
@@ -632,8 +458,10 @@ void si_update_vertex_buffers(struct si_context *sctx)
 	uint64_t va;
 	uint32_t *ptr;

+	if (!sctx->vertex_buffers_dirty)
+		return true;
 	if (!count || !sctx->vertex_elements)
-		return;
+		return true;

 	/* Vertex buffer descriptors are the only ones which are uploaded
 	 * directly through a staging buffer and don't go through
@@ -641,13 +469,14 @@ void si_update_vertex_buffers(struct si_context *sctx)
 	 */
 	u_upload_alloc(sctx->b.uploader, 0, count * 16, &desc->buffer_offset,
 		       (struct pipe_resource**)&desc->buffer, (void**)&ptr);
+	if (!desc->buffer)
+		return false;

 	r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
 			      desc->buffer, RADEON_USAGE_READ,
 			      RADEON_PRIO_SHADER_DATA);

 	assert(count <= SI_NUM_VERTEX_BUFFERS);
-	assert(desc->current_context_id == 0);

 	for (i = 0; i < count; i++) {
 		struct pipe_vertex_element *ve = &sctx->vertex_elements->elements[i];
@@ -693,13 +522,14 @@ void si_update_vertex_buffers(struct si_context *sctx)
 		}
 	}

-	desc->atom.num_dw = 8; /* update 2 shader pointers (VS+ES) */
-	desc->atom.dirty = true;
-
 	/* Don't flush the const cache. It would have a very negative effect
 	 * on performance (confirmed by testing). New descriptors are always
 	 * uploaded to a fresh new buffer, so I don't think flushing the const
 	 * cache is needed. */
+	desc->pointer_dirty = true;
+	sctx->shader_userdata.atom.dirty = true;
+	sctx->vertex_buffers_dirty = false;
+	return true;
 }


@@ -724,7 +554,7 @@ static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint s
 	if (shader >= SI_NUM_SHADERS)
 		return;

-	assert(slot < buffers->num_buffers);
+	assert(slot < buffers->desc.num_elements);
 	pipe_resource_reference(&buffers->buffers[slot], NULL);

 	/* CIK cannot unbind a constant buffer (S_BUFFER_LOAD is buggy
@@ -751,7 +581,7 @@ static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint s
 		}

 		/* Set the descriptor. */
-		uint32_t *desc = buffers->desc_data[slot];
+		uint32_t *desc = buffers->desc.list + slot*4;
 		desc[0] = va;
 		desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
 			  S_008F04_STRIDE(0);
@@ -770,12 +600,11 @@ static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint s
 		buffers->desc.enabled_mask |= 1llu << slot;
 	} else {
 		/* Clear the descriptor. */
-		memset(buffers->desc_data[slot], 0, sizeof(uint32_t) * 4);
+		memset(buffers->desc.list + slot*4, 0, sizeof(uint32_t) * 4);
 		buffers->desc.enabled_mask &= ~(1llu << slot);
 	}

-	buffers->desc.dirty_mask |= 1llu << slot;
-	si_update_descriptors(sctx, &buffers->desc);
+	buffers->desc.list_dirty = true;
 }

 /* RING BUFFERS */
@@ -795,7 +624,7 @@ void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
 	/* The stride field in the resource descriptor has 14 bits */
 	assert(stride < (1 << 14));

-	assert(slot < buffers->num_buffers);
+	assert(slot < buffers->desc.num_elements);
 	pipe_resource_reference(&buffers->buffers[slot], NULL);

 	if (buffer) {
@@ -840,7 +669,7 @@ void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
 		}

 		/* Set the descriptor. */
-		uint32_t *desc = buffers->desc_data[slot];
+		uint32_t *desc = buffers->desc.list + slot*4;
 		desc[0] = va;
 		desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
 			  S_008F04_STRIDE(stride) |
@@ -863,12 +692,11 @@ void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
 		buffers->desc.enabled_mask |= 1llu << slot;
 	} else {
 		/* Clear the descriptor. */
-		memset(buffers->desc_data[slot], 0, sizeof(uint32_t) * 4);
+		memset(buffers->desc.list + slot*4, 0, sizeof(uint32_t) * 4);
 		buffers->desc.enabled_mask &= ~(1llu << slot);
 	}

-	buffers->desc.dirty_mask |= 1llu << slot;
-	si_update_descriptors(sctx, &buffers->desc);
+	buffers->desc.list_dirty = true;
 }

 /* STREAMOUT BUFFERS */
@@ -930,7 +758,7 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
 			uint64_t va = r600_resource(buffer)->gpu_address;

 			/* Set the descriptor. */
-			uint32_t *desc = buffers->desc_data[bufidx];
+			uint32_t *desc = buffers->desc.list + bufidx*4;
 			desc[0] = va;
 			desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
 			desc[2] = 0xffffffff;
@@ -948,24 +776,22 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
 			buffers->desc.enabled_mask |= 1llu << bufidx;
 		} else {
 			/* Clear the descriptor and unset the resource. */
-			memset(buffers->desc_data[bufidx], 0,
+			memset(buffers->desc.list + bufidx*4, 0,
 			       sizeof(uint32_t) * 4);
 			pipe_resource_reference(&buffers->buffers[bufidx],
 						NULL);
 			buffers->desc.enabled_mask &= ~(1llu << bufidx);
 		}
-		buffers->desc.dirty_mask |= 1llu << bufidx;
 	}
 	for (; i < old_num_targets; i++) {
 		bufidx = SI_SO_BUF_OFFSET + i;
 		/* Clear the descriptor and unset the resource. */
-		memset(buffers->desc_data[bufidx], 0, sizeof(uint32_t) * 4);
+		memset(buffers->desc.list + bufidx*4, 0, sizeof(uint32_t) * 4);
 		pipe_resource_reference(&buffers->buffers[bufidx], NULL);
 		buffers->desc.enabled_mask &= ~(1llu << bufidx);
-		buffers->desc.dirty_mask |= 1llu << bufidx;
 	}

-	si_update_descriptors(sctx, &buffers->desc);
+	buffers->desc.list_dirty = true;
 }

 static void si_desc_reset_buffer_offset(struct pipe_context *ctx,
@@ -1034,22 +860,19 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
 	/* Read/Write buffers. */
 	for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
 		struct si_buffer_resources *buffers = &sctx->rw_buffers[shader];
-		bool found = false;
 		uint64_t mask = buffers->desc.enabled_mask;

 		while (mask) {
 			i = u_bit_scan64(&mask);
 			if (buffers->buffers[i] == buf) {
-				si_desc_reset_buffer_offset(ctx, buffers->desc_data[i],
+				si_desc_reset_buffer_offset(ctx, buffers->desc.list + i*4,
 							    old_va, buf);
+				buffers->desc.list_dirty = true;

 				r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
 						      rbuffer, buffers->shader_usage,
 						      buffers->priority);

-				buffers->desc.dirty_mask |= 1llu << i;
-				found = true;
-
 				if (i >= SI_SO_BUF_OFFSET && shader == PIPE_SHADER_VERTEX) {
 					/* Update the streamout state. */
 					if (sctx->b.streamout.begin_emitted) {
@@ -1061,34 +884,25 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
 				}
 			}
 		}
-		if (found) {
-			si_update_descriptors(sctx, &buffers->desc);
-		}
 	}

 	/* Constant buffers. */
 	for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
 		struct si_buffer_resources *buffers = &sctx->const_buffers[shader];
-		bool found = false;
 		uint64_t mask = buffers->desc.enabled_mask;

 		while (mask) {
 			unsigned i = u_bit_scan64(&mask);
 			if (buffers->buffers[i] == buf) {
-				si_desc_reset_buffer_offset(ctx, buffers->desc_data[i],
+				si_desc_reset_buffer_offset(ctx, buffers->desc.list + i*4,
 							    old_va, buf);
+				buffers->desc.list_dirty = true;

 				r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
 						      rbuffer, buffers->shader_usage,
 						      buffers->priority);
-
-				buffers->desc.dirty_mask |= 1llu << i;
-				found = true;
 			}
 		}
-		if (found) {
-			si_update_descriptors(sctx, &buffers->desc);
-		}
 	}

 	/* Texture buffers - update virtual addresses in sampler view descriptors. */
@@ -1100,23 +914,20 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
 	/* Texture buffers - update bindings. */
 	for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
 		struct si_sampler_views *views = &sctx->samplers[shader].views;
-		bool found = false;
 		uint64_t mask = views->desc.enabled_mask;

 		while (mask) {
 			unsigned i = u_bit_scan64(&mask);
 			if (views->views[i]->texture == buf) {
+				si_desc_reset_buffer_offset(ctx, views->desc.list + i*8+4,
+							    old_va, buf);
+				views->desc.list_dirty = true;
+
 				r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
 						      rbuffer, RADEON_USAGE_READ,
 						      RADEON_PRIO_SHADER_BUFFER_RO);
-
-				views->desc.dirty_mask |= 1llu << i;
-				found = true;
 			}
 		}
-		if (found) {
-			si_update_descriptors(sctx, &views->desc);
-		}
 	}
 }

@@ -1277,39 +1088,133 @@ void si_copy_buffer(struct si_context *sctx,
 		r600_resource(dst)->TC_L2_dirty = true;
 }

-/* INIT/DEINIT */
+/* SHADER USER DATA */
+
+static void si_mark_shader_pointers_dirty(struct si_context *sctx,
+					  unsigned shader)
+{
+	sctx->const_buffers[shader].desc.pointer_dirty = true;
+	sctx->rw_buffers[shader].desc.pointer_dirty = true;
+	sctx->samplers[shader].views.desc.pointer_dirty = true;
+	sctx->samplers[shader].states.desc.pointer_dirty = true;
+
+	if (shader == PIPE_SHADER_VERTEX)
+		sctx->vertex_buffers.pointer_dirty = true;
+
+	sctx->shader_userdata.atom.dirty = true;
+}
+
+static void si_shader_userdata_begin_new_cs(struct si_context *sctx)
+{
+	int i;
+
+	for (i = 0; i < SI_NUM_SHADERS; i++) {
+		si_mark_shader_pointers_dirty(sctx, i);
+	}
+}
+
+/* Set a base register address for user data constants in the given shader.
+ * This assigns a mapping from PIPE_SHADER_* to SPI_SHADER_USER_DATA_*.
+ */
+static void si_set_user_data_base(struct si_context *sctx,
+				  unsigned shader, uint32_t new_base)
+{
+	uint32_t *base = &sctx->shader_userdata.sh_base[shader];
+
+	if (*base != new_base) {
+		*base = new_base;
+
+		if (new_base)
+			si_mark_shader_pointers_dirty(sctx, shader);
+	}
+}
+
+/* This must be called when these shaders are changed from non-NULL to NULL
+ * and vice versa:
+ * - geometry shader
+ * - tessellation control shader
+ * - tessellation evaluation shader
+ */
+void si_shader_change_notify(struct si_context *sctx)
+{
+	/* VS can be bound as VS or ES. */
+	if (sctx->gs_shader)
+		si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
+				      R_00B330_SPI_SHADER_USER_DATA_ES_0);
+	else
+		si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
+				      R_00B130_SPI_SHADER_USER_DATA_VS_0);
+}
+
+static void si_emit_shader_pointer(struct si_context *sctx,
+				   struct si_descriptors *desc,
+				   unsigned sh_base, bool keep_dirty)
+{
+	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+	uint64_t va;
+
+	if (!desc->pointer_dirty || !desc->buffer)
+		return;
+
+	va = desc->buffer->gpu_address +
+	     desc->buffer_offset;
+
+	radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 2, 0));
+	radeon_emit(cs, (sh_base + desc->shader_userdata_offset - SI_SH_REG_OFFSET) >> 2);
+	radeon_emit(cs, va);
+	radeon_emit(cs, va >> 32);
+
+	desc->pointer_dirty = keep_dirty;
+}
+
+static void si_emit_shader_userdata(struct si_context *sctx,
+				    struct r600_atom *atom)
+{
+	unsigned i;
+	uint32_t *sh_base = sctx->shader_userdata.sh_base;
+
+	/* The VS copy shader needs these for clipping, streamout, and rings. */
+	if (sctx->gs_shader) {
+		unsigned base = R_00B130_SPI_SHADER_USER_DATA_VS_0;
+		unsigned i = PIPE_SHADER_VERTEX;
+
+		si_emit_shader_pointer(sctx, &sctx->const_buffers[i].desc, base, true);
+		si_emit_shader_pointer(sctx, &sctx->rw_buffers[i].desc, base, true);
+	}
+
+	for (i = 0; i < SI_NUM_SHADERS; i++) {
+		unsigned base = sh_base[i];
+
+		si_emit_shader_pointer(sctx, &sctx->const_buffers[i].desc, base, false);
+		si_emit_shader_pointer(sctx, &sctx->rw_buffers[i].desc, base, false);
+		si_emit_shader_pointer(sctx, &sctx->samplers[i].views.desc, base, false);
+		si_emit_shader_pointer(sctx, &sctx->samplers[i].states.desc, base, false);
+	}
+	si_emit_shader_pointer(sctx, &sctx->vertex_buffers, sh_base[PIPE_SHADER_VERTEX], false);
+}
+
+/* INIT/DEINIT/UPLOAD */

 void si_init_all_descriptors(struct si_context *sctx)
 {
 	int i;

 	for (i = 0; i < SI_NUM_SHADERS; i++) {
-		si_init_buffer_resources(sctx, &sctx->const_buffers[i],
-					 SI_NUM_CONST_BUFFERS, i, SI_SGPR_CONST,
+		si_init_buffer_resources(&sctx->const_buffers[i],
+					 SI_NUM_CONST_BUFFERS, SI_SGPR_CONST,
 					 RADEON_USAGE_READ, RADEON_PRIO_SHADER_BUFFER_RO);
-		si_init_buffer_resources(sctx, &sctx->rw_buffers[i],
-					 i == PIPE_SHADER_VERTEX ?
-					 SI_NUM_RW_BUFFERS : SI_NUM_RING_BUFFERS,
-					 i, SI_SGPR_RW_BUFFERS,
+		si_init_buffer_resources(&sctx->rw_buffers[i],
+					 SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS,
 					 RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RESOURCE_RW);

-		si_init_sampler_views(sctx, &sctx->samplers[i].views, i);
-
-		si_init_descriptors(sctx, &sctx->samplers[i].states.desc,
-				    si_get_shader_user_data_base(i) + SI_SGPR_SAMPLER * 4,
-				    4, SI_NUM_SAMPLER_STATES, si_emit_sampler_states);
-
-		sctx->atoms.s.const_buffers[i] = &sctx->const_buffers[i].desc.atom;
-		sctx->atoms.s.rw_buffers[i] = &sctx->rw_buffers[i].desc.atom;
-		sctx->atoms.s.sampler_views[i] = &sctx->samplers[i].views.desc.atom;
-		sctx->atoms.s.sampler_states[i] = &sctx->samplers[i].states.desc.atom;
+		si_init_descriptors(&sctx->samplers[i].views.desc,
+				    SI_SGPR_RESOURCE, 8, SI_NUM_SAMPLER_VIEWS);
+		si_init_descriptors(&sctx->samplers[i].states.desc,
+				    SI_SGPR_SAMPLER, 4, SI_NUM_SAMPLER_STATES);
 	}

-	si_init_descriptors(sctx, &sctx->vertex_buffers,
-			    si_get_shader_user_data_base(PIPE_SHADER_VERTEX) +
-			    SI_SGPR_VERTEX_BUFFER*4, 4, SI_NUM_VERTEX_BUFFERS,
-			    si_emit_shader_pointer);
-	sctx->atoms.s.vertex_buffers = &sctx->vertex_buffers.atom;
+	si_init_descriptors(&sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFER,
+			    4, SI_NUM_VERTEX_BUFFERS);

 	/* Set pipe_context functions. */
 	sctx->b.b.set_constant_buffer = si_set_constant_buffer;
@@ -1317,6 +1222,32 @@ void si_init_all_descriptors(struct si_context *sctx)
 	sctx->b.b.set_stream_output_targets = si_set_streamout_targets;
 	sctx->b.clear_buffer = si_clear_buffer;
 	sctx->b.invalidate_buffer = si_invalidate_buffer;
+
+	/* Shader user data. */
+	sctx->atoms.s.shader_userdata = &sctx->shader_userdata.atom;
+	sctx->shader_userdata.atom.emit = (void*)si_emit_shader_userdata;
+
+	/* Upper bound, 4 pointers per shader, +1 for vertex buffers, +2 for the VS copy shader. */
+	sctx->shader_userdata.atom.num_dw = (SI_NUM_SHADERS * 4 + 1 + 2) * 4;
+
+	/* Set default and immutable mappings. */
+	si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, R_00B130_SPI_SHADER_USER_DATA_VS_0);
+	si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY, R_00B230_SPI_SHADER_USER_DATA_GS_0);
+	si_set_user_data_base(sctx, PIPE_SHADER_FRAGMENT, R_00B030_SPI_SHADER_USER_DATA_PS_0);
+}
+
+bool si_upload_shader_descriptors(struct si_context *sctx)
+{
+	int i;
+
+	for (i = 0; i < SI_NUM_SHADERS; i++) {
+		if (!si_upload_descriptors(sctx, &sctx->const_buffers[i].desc) ||
+		    !si_upload_descriptors(sctx, &sctx->rw_buffers[i].desc) ||
+		    !si_upload_descriptors(sctx, &sctx->samplers[i].views.desc) ||
+		    !si_upload_descriptors(sctx, &sctx->samplers[i].states.desc))
+			return false;
+	}
+	return si_upload_vertex_buffer_descriptors(sctx);
 }

 void si_release_all_descriptors(struct si_context *sctx)
@@ -1343,4 +1274,5 @@ void si_all_descriptors_begin_new_cs(struct si_context *sctx)
 		si_sampler_states_begin_new_cs(sctx, &sctx->samplers[i].states);
 	}
 	si_vertex_buffers_begin_new_cs(sctx);
+	si_shader_userdata_begin_new_cs(sctx);
 }
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -125,8 +125,6 @@ struct si_framebuffer {

 #define SI_NUM_ATOMS(sctx) (sizeof((sctx)->atoms)/sizeof((sctx)->atoms.array[0]))

-#define SI_NUM_SHADERS (PIPE_SHADER_GEOMETRY+1)
-
 struct si_context {
 	struct r600_common_context	b;
 	struct blitter_context		*blitter;
@@ -141,13 +139,6 @@ struct si_context {
 	union {
 		struct {
 			/* The order matters. */
-			struct r600_atom *vertex_buffers;
-			struct r600_atom *const_buffers[SI_NUM_SHADERS];
-			struct r600_atom *rw_buffers[SI_NUM_SHADERS];
-			struct r600_atom *sampler_views[SI_NUM_SHADERS];
-			struct r600_atom *sampler_states[SI_NUM_SHADERS];
-			/* Caches must be flushed after resource descriptors are
-			 * updated in memory. */
 			struct r600_atom *cache_flush;
 			struct r600_atom *streamout_begin;
 			struct r600_atom *streamout_enable; /* must be after streamout_begin */
@@ -156,6 +147,7 @@ struct si_context {
 			struct r600_atom *db_render_state;
 			struct r600_atom *msaa_config;
 			struct r600_atom *clip_regs;
+			struct r600_atom *shader_userdata;
 		} s;
 		struct r600_atom *array[0];
 	} atoms;
@@ -169,6 +161,7 @@ struct si_context {
 	struct si_shader_selector	*gs_shader;
 	struct si_shader_selector	*vs_shader;
 	struct si_cs_shader_state	cs_shader_state;
+	struct si_shader_data		shader_userdata;
 	/* shader information */
 	unsigned			sprite_coord_enable;
 	bool				flatshade;
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2647,18 +2647,43 @@ void si_shader_apply_scratch_relocs(struct si_context *sctx,
 	}
 }

+int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader)
+{
+	const struct radeon_shader_binary *binary = &shader->binary;
+	unsigned code_size = binary->code_size + binary->rodata_size;
+	unsigned char *ptr;
+
+	r600_resource_reference(&shader->bo, NULL);
+	shader->bo = si_resource_create_custom(&sscreen->b.b,
+					       PIPE_USAGE_IMMUTABLE,
+					       code_size);
+	if (!shader->bo)
+		return -ENOMEM;
+
+	ptr = sscreen->b.ws->buffer_map(shader->bo->cs_buf, NULL,
+					PIPE_TRANSFER_READ_WRITE);
+	util_memcpy_cpu_to_le32(ptr, binary->code, binary->code_size);
+	if (binary->rodata_size > 0) {
+		ptr += binary->code_size;
+		util_memcpy_cpu_to_le32(ptr, binary->rodata,
+					binary->rodata_size);
+	}
+
+	sscreen->b.ws->buffer_unmap(shader->bo->cs_buf);
+	return 0;
+}
+
 int si_shader_binary_read(struct si_screen *sscreen,
 			struct si_shader *shader,
 			const struct radeon_shader_binary *binary)
 {

 	unsigned i;
-	unsigned code_size;
-	unsigned char *ptr;
 	bool dump  = r600_can_dump_shader(&sscreen->b,
 		shader->selector ? shader->selector->tokens : NULL);

 	si_shader_binary_read_config(sscreen, shader, 0);
+	si_shader_binary_upload(sscreen, shader);

 	if (dump) {
 		if (!binary->disassembled) {
@@ -2676,26 +2701,6 @@ int si_shader_binary_read(struct si_screen *sscreen,
 			shader->num_sgprs, shader->num_vgprs, binary->code_size,
 			shader->lds_size, shader->scratch_bytes_per_wave);
 	}
-
-	/* copy new shader */
-	code_size = binary->code_size + binary->rodata_size;
-	r600_resource_reference(&shader->bo, NULL);
-	shader->bo = si_resource_create_custom(&sscreen->b.b, PIPE_USAGE_IMMUTABLE,
-					       code_size);
-	if (shader->bo == NULL) {
-		return -ENOMEM;
-	}
-
-
-	ptr = sscreen->b.ws->buffer_map(shader->bo->cs_buf, NULL, PIPE_TRANSFER_READ_WRITE);
-	util_memcpy_cpu_to_le32(ptr, binary->code, binary->code_size);
-	if (binary->rodata_size > 0) {
-		ptr += binary->code_size;
-		util_memcpy_cpu_to_le32(ptr, binary->rodata, binary->rodata_size);
-	}
-
-	sscreen->b.ws->buffer_unmap(shader->bo->cs_buf);
-
 	return 0;
 }

--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -189,6 +189,7 @@ int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
 		    LLVMTargetMachineRef tm, LLVMModuleRef mod);
 void si_shader_destroy(struct pipe_context *ctx, struct si_shader *shader);
 unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
+int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader);
 int si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader,
 		const struct radeon_shader_binary *binary);
 void si_shader_apply_scratch_relocs(struct si_context *sctx,
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -30,6 +30,8 @@
 #include "si_pm4.h"
 #include "radeon/r600_pipe_common.h"

+#define SI_NUM_SHADERS (PIPE_SHADER_GEOMETRY+1)
+
 struct si_screen;
 struct si_shader;

@@ -111,6 +113,11 @@ union si_state {
 	struct si_pm4_state	*array[0];
 };

+struct si_shader_data {
+	struct r600_atom	atom;
+	uint32_t		sh_base[SI_NUM_SHADERS];
+};
+
 #define SI_NUM_USER_SAMPLERS            16 /* AKA OpenGL textures units per shader */
 #define SI_POLY_STIPPLE_SAMPLER         SI_NUM_USER_SAMPLERS
 #define SI_NUM_SAMPLERS                 (SI_POLY_STIPPLE_SAMPLER + 1)
@@ -144,59 +151,48 @@ union si_state {
 #define SI_NUM_VERTEX_BUFFERS	16


-/* This represents resource descriptors in memory, such as buffer resources,
+/* This represents descriptors in memory, such as buffer resources,
 * image resources, and sampler states.
 */
 struct si_descriptors {
-	struct r600_atom atom;
-
-	/* The size of one resource descriptor. */
+	/* The list of descriptors in malloc'd memory. */
+	uint32_t *list;
+	/* The size of one descriptor. */
 	unsigned element_dw_size;
-	/* The maximum number of resource descriptors. */
+	/* The maximum number of descriptors. */
 	unsigned num_elements;
+	/* Whether the list has been changed and should be re-uploaded. */
+	bool list_dirty;

-	/* The buffer where resource descriptors are stored. */
+	/* The buffer where the descriptors have been uploaded. */
 	struct r600_resource *buffer;
 	unsigned buffer_offset;

-	/* The i-th bit is set if that element is dirty (changed but not emitted). */
-	uint64_t dirty_mask;
 	/* The i-th bit is set if that element is enabled (non-NULL resource). */
 	uint64_t enabled_mask;

-	/* We can't update descriptors directly because the GPU might be
-	 * reading them at the same time, so we have to update them
-	 * in a copy-on-write manner. Each such copy is called a context,
-	 * which is just another array descriptors in the same buffer. */
-	unsigned current_context_id;
-	/* The size of a context, should be equal to 4*element_dw_size*num_elements. */
-	unsigned context_size;
-
-	/* The shader userdata register where the 64-bit pointer to the descriptor
+	/* The shader userdata offset within a shader where the 64-bit pointer to the descriptor
 	 * array will be stored. */
-	unsigned shader_userdata_reg;
+	unsigned shader_userdata_offset;
+	/* Whether the pointer should be re-emitted. */
+	bool pointer_dirty;
 };

 struct si_sampler_views {
 	struct si_descriptors		desc;
 	struct pipe_sampler_view	*views[SI_NUM_SAMPLER_VIEWS];
-	uint32_t			*desc_data[SI_NUM_SAMPLER_VIEWS];
 };

 struct si_sampler_states {
 	struct si_descriptors		desc;
-	uint32_t			*desc_data[SI_NUM_SAMPLER_STATES];
 	void				*saved_states[2]; /* saved for u_blitter */
 };

 struct si_buffer_resources {
 	struct si_descriptors		desc;
-	unsigned			num_buffers;
 	enum radeon_bo_usage		shader_usage; /* READ, WRITE, or READWRITE */
 	enum radeon_bo_priority		priority;
 	struct pipe_resource		**buffers; /* this has num_buffers elements */
-	uint32_t			*desc_storage; /* this has num_buffers*4 elements */
-	uint32_t			**desc_data; /* an array of pointers pointing to desc_storage */
 };

 #define si_pm4_block_idx(member) \
@@ -232,13 +228,13 @@ struct si_buffer_resources {
 /* si_descriptors.c */
 void si_set_sampler_descriptors(struct si_context *sctx, unsigned shader,
 				unsigned start, unsigned count, void **states);
-void si_update_vertex_buffers(struct si_context *sctx);
 void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
 			struct pipe_resource *buffer,
 			unsigned stride, unsigned num_records,
 			bool add_tid, bool swizzle,
 			unsigned element_size, unsigned index_stride);
 void si_init_all_descriptors(struct si_context *sctx);
+bool si_upload_shader_descriptors(struct si_context *sctx);
 void si_release_all_descriptors(struct si_context *sctx);
 void si_all_descriptors_begin_new_cs(struct si_context *sctx);
 void si_copy_buffer(struct si_context *sctx,
@@ -246,6 +242,7 @@ void si_copy_buffer(struct si_context *sctx,
 		    uint64_t dst_offset, uint64_t src_offset, unsigned size, bool is_framebuffer);
 void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer,
 			    const uint8_t *ptr, unsigned size, uint32_t *const_offset);
+void si_shader_change_notify(struct si_context *sctx);

 /* si_state.c */
 struct si_shader_selector;
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -139,6 +139,10 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
 		    (info->indirect || info->instance_count > 1))
 			wd_switch_on_eop = true;

+		/* USE_OPAQUE doesn't work when WD_SWITCH_ON_EOP is 0. */
+		if (info->count_from_stream_output)
+			wd_switch_on_eop = true;
+
 		/* If the WD switch is false, the IA switch must be false too. */
 		assert(wd_switch_on_eop || !ia_switch_on_eop);
 	}
@@ -245,8 +249,7 @@ static void si_emit_draw_packets(struct si_context *sctx,
 				 const struct pipe_index_buffer *ib)
 {
 	struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
-	unsigned sh_base_reg = (sctx->gs_shader ? R_00B330_SPI_SHADER_USER_DATA_ES_0 :
-						  R_00B130_SPI_SHADER_USER_DATA_VS_0);
+	unsigned sh_base_reg = sctx->shader_userdata.sh_base[PIPE_SHADER_VERTEX];

 	if (info->count_from_stream_output) {
 		struct r600_so_target *t =
@@ -536,11 +539,8 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 		sctx->current_rast_prim = info->mode;

 	si_update_shaders(sctx);
-
-	if (sctx->vertex_buffers_dirty) {
-		si_update_vertex_buffers(sctx);
-		sctx->vertex_buffers_dirty = false;
-	}
+	if (!si_upload_shader_descriptors(sctx))
+		return;

 	if (info->indexed) {
 		/* Initialize the index buffer struct. */
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -535,6 +535,7 @@ static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 	struct si_shader_selector *sel = state;
+	bool enable_changed = !!sctx->gs_shader != !!sel;

 	if (sctx->gs_shader == sel)
 		return;
@@ -542,6 +543,9 @@ static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
 	sctx->gs_shader = sel;
 	sctx->clip_regs.dirty = true;
 	sctx->last_rast_prim = -1; /* reset this so that it gets updated */
+
+	if (enable_changed)
+		si_shader_change_notify(sctx);
 }

 static void si_make_dummy_ps(struct si_context *sctx)
@@ -743,7 +747,6 @@ static unsigned si_update_scratch_buffer(struct si_context *sctx,
 {
 	struct si_shader *shader;
 	uint64_t scratch_va = sctx->scratch_buffer->gpu_address;
-	unsigned char *ptr;

 	if (!sel)
 		return 0;
@@ -764,12 +767,7 @@ static unsigned si_update_scratch_buffer(struct si_context *sctx,
 	si_shader_apply_scratch_relocs(sctx, shader, scratch_va);

 	/* Replace the shader bo with a new bo that has the relocs applied. */
-	r600_resource_reference(&shader->bo, NULL);
-	shader->bo = si_resource_create_custom(&sctx->screen->b.b, PIPE_USAGE_IMMUTABLE,
-					       shader->binary.code_size);
-	ptr = sctx->screen->b.ws->buffer_map(shader->bo->cs_buf, NULL, PIPE_TRANSFER_WRITE);
-	util_memcpy_cpu_to_le32(ptr, shader->binary.code, shader->binary.code_size);
-	sctx->screen->b.ws->buffer_unmap(shader->bo->cs_buf);
+	si_shader_binary_upload(sctx->screen, shader);

 	/* Update the shader state to use the new shader bo. */
 	si_shader_init_pm4_state(shader);
--- a/src/gallium/state_trackers/dri/dri2.c
+++ b/src/gallium/state_trackers/dri/dri2.c
@@ -554,7 +554,7 @@ dri2_allocate_textures(struct dri_context *ctx,

         if (drawable->textures[statt]) {
            templ.format = drawable->textures[statt]->format;
-            templ.bind = drawable->textures[statt]->bind;
+            templ.bind = drawable->textures[statt]->bind & ~PIPE_BIND_SCANOUT;
            templ.nr_samples = drawable->stvis.samples;

            /* Try to reuse the resource.
--- a/src/gallium/state_trackers/osmesa/osmesa.c
+++ b/src/gallium/state_trackers/osmesa/osmesa.c
@@ -886,7 +886,7 @@ static struct name_function functions[] = {
   { "OSMesaDestroyContext", (OSMESAproc) OSMesaDestroyContext },
   { "OSMesaMakeCurrent", (OSMESAproc) OSMesaMakeCurrent },
   { "OSMesaGetCurrentContext", (OSMESAproc) OSMesaGetCurrentContext },
-   { "OSMesaPixelsStore", (OSMESAproc) OSMesaPixelStore },
+   { "OSMesaPixelStore", (OSMESAproc) OSMesaPixelStore },
   { "OSMesaGetIntegerv", (OSMESAproc) OSMesaGetIntegerv },
   { "OSMesaGetDepthBuffer", (OSMESAproc) OSMesaGetDepthBuffer },
   { "OSMesaGetColorBuffer", (OSMESAproc) OSMesaGetColorBuffer },
--- a/src/gallium/state_trackers/vdpau/mixer.c
+++ b/src/gallium/state_trackers/vdpau/mixer.c
@@ -49,7 +49,8 @@ vlVdpVideoMixerCreate(VdpDevice device,
   vlVdpVideoMixer *vmixer = NULL;
   VdpStatus ret;
   struct pipe_screen *screen;
-   unsigned max_width, max_height, i;
+   uint32_t max_2d_texture_level;
+   unsigned max_size, i;

   vlVdpDevice *dev = vlGetDataHTAB(device);
   if (!dev)
@@ -134,18 +135,17 @@ vlVdpVideoMixerCreate(VdpDevice device,
      VDPAU_MSG(VDPAU_WARN, "[VDPAU] Max layers > 4 not supported\n", vmixer->max_layers);
      goto no_params;
   }
-   max_width = screen->get_video_param(screen, PIPE_VIDEO_PROFILE_UNKNOWN,
-                                       PIPE_VIDEO_ENTRYPOINT_BITSTREAM, PIPE_VIDEO_CAP_MAX_WIDTH);
-   max_height = screen->get_video_param(screen, PIPE_VIDEO_PROFILE_UNKNOWN,
-                                        PIPE_VIDEO_ENTRYPOINT_BITSTREAM, PIPE_VIDEO_CAP_MAX_HEIGHT);
-   if (vmixer->video_width < 48 ||
-       vmixer->video_width > max_width) {
-      VDPAU_MSG(VDPAU_WARN, "[VDPAU] 48 < %u < %u not valid for width\n", vmixer->video_width, max_width);
+
+   max_2d_texture_level = screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS);
+   max_size = pow(2, max_2d_texture_level-1);
+   if (vmixer->video_width < 48 || vmixer->video_width > max_size) {
+      VDPAU_MSG(VDPAU_WARN, "[VDPAU] 48 < %u < %u not valid for width\n",
+                vmixer->video_width, max_size);
      goto no_params;
   }
-   if (vmixer->video_height < 48 ||
-       vmixer->video_height > max_height) {
-      VDPAU_MSG(VDPAU_WARN, "[VDPAU] 48 < %u < %u  not valid for height\n", vmixer->video_height, max_height);
+   if (vmixer->video_height < 48 || vmixer->video_height > max_size) {
+      VDPAU_MSG(VDPAU_WARN, "[VDPAU] 48 < %u < %u  not valid for height\n",
+                vmixer->video_height, max_size);
      goto no_params;
   }
   vmixer->luma_key_min = 0.f;
--- a/src/gallium/state_trackers/xa/xa_context.c
+++ b/src/gallium/state_trackers/xa/xa_context.c
@@ -37,7 +37,11 @@
 XA_EXPORT void
 xa_context_flush(struct xa_context *ctx)
 {
-	ctx->pipe->flush(ctx->pipe, &ctx->last_fence, 0);
+    if (ctx->last_fence) {
+        struct pipe_screen *screen = ctx->xa->screen;
+        screen->fence_reference(screen, &ctx->last_fence, NULL);
+    }
+    ctx->pipe->flush(ctx->pipe, &ctx->last_fence, 0);
 }

 XA_EXPORT struct xa_context *
--- a/src/gallium/state_trackers/xa/xa_tracker.c
+++ b/src/gallium/state_trackers/xa/xa_tracker.c
@@ -461,7 +461,7 @@ xa_surface_redefine(struct xa_surface *srf,
 			xa_min(save_height, template->height0), &src_box);
 	pipe->resource_copy_region(pipe, texture,
 				   0, 0, 0, 0, srf->tex, 0, &src_box);
-	pipe->flush(pipe, &xa->default_ctx->last_fence, 0);
+	xa_context_flush(xa->default_ctx);
    }

    pipe_resource_reference(&srf->tex, texture);
--- a/src/gallium/state_trackers/xa/xa_yuv.c
+++ b/src/gallium/state_trackers/xa/xa_yuv.c
@@ -154,7 +154,7 @@ xa_yuv_planar_blit(struct xa_context *r,
 	box++;
    }

-    r->pipe->flush(r->pipe, &r->last_fence, 0);
+    xa_context_flush(r);

    xa_ctx_sampler_views_destroy(r);
    xa_ctx_srf_destroy(r);
--- a/src/gallium/targets/opencl/Makefile.am
+++ b/src/gallium/targets/opencl/Makefile.am
@@ -7,7 +7,7 @@ lib_LTLIBRARIES = lib@OPENCL_LIBNAME@.la
 lib@OPENCL_LIBNAME@_la_LDFLAGS = \
 	$(LLVM_LDFLAGS) \
 	-no-undefined \
-	-version-number 1:0 \
+	-version-number @OPENCL_VERSION@:0 \
 	$(GC_SECTIONS) \
 	$(LD_NO_UNDEFINED)

--- a/src/gallium/targets/opencl/mesa.icd
+++ b/src/gallium/targets/opencl/mesa.icd
@@ -1 +0,0 @@
-libMesaOpenCL.so
--- a/src/gallium/targets/opencl/mesa.icd.in
+++ b/src/gallium/targets/opencl/mesa.icd.in
@@ -0,0 +1 @@
+lib@OPENCL_LIBNAME@.so.@OPENCL_VERSION@
--- a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
+++ b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
@@ -120,7 +120,11 @@ nouveau_drm_screen_create(int fd)
 	if (!screen)
 		goto err;

-	util_hash_table_set(fd_tab, intptr_to_pointer(fd), screen);
+	/* Use dupfd in hash table, to avoid errors if the original fd gets
+	 * closed by its owner. The hash key needs to live at least as long as
+	 * the screen.
+	 */
+	util_hash_table_set(fd_tab, intptr_to_pointer(dupfd), screen);
 	screen->refcount = 1;
 	pipe_mutex_unlock(nouveau_screen_mutex);
 	return &screen->base;
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -305,14 +305,34 @@ static void radeon_bo_destroy(struct pb_buffer *_buf)
    if (bo->ptr)
        os_munmap(bo->ptr, bo->base.size);

+    if (mgr->va) {
+        if (bo->rws->va_unmap_working) {
+            struct drm_radeon_gem_va va;
+
+            va.handle = bo->handle;
+            va.vm_id = 0;
+            va.operation = RADEON_VA_UNMAP;
+            va.flags = RADEON_VM_PAGE_READABLE |
+                       RADEON_VM_PAGE_WRITEABLE |
+                       RADEON_VM_PAGE_SNOOPED;
+            va.offset = bo->va;
+
+            if (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_VA, &va,
+				    sizeof(va)) != 0 &&
+		va.operation == RADEON_VA_RESULT_ERROR) {
+                fprintf(stderr, "radeon: Failed to deallocate virtual address for buffer:\n");
+                fprintf(stderr, "radeon:    size      : %d bytes\n", bo->base.size);
+                fprintf(stderr, "radeon:    va        : 0x%016llx\n", (unsigned long long)bo->va);
+            }
+	}
+
+	radeon_bomgr_free_va(mgr, bo->va, bo->base.size);
+    }
+
    /* Close object. */
    args.handle = bo->handle;
    drmIoctl(bo->rws->fd, DRM_IOCTL_GEM_CLOSE, &args);

-    if (mgr->va) {
-        radeon_bomgr_free_va(mgr, bo->va, bo->base.size);
-    }
-
    pipe_mutex_destroy(bo->map_mutex);

    if (bo->initial_domain & RADEON_DOMAIN_VRAM)
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -57,6 +57,8 @@
 #define RADEON_INFO_READ_REG		0x24
 #endif

+#define RADEON_INFO_VA_UNMAP_WORKING	0x25
+
 static struct util_hash_table *fd_tab = NULL;
 pipe_static_mutex(fd_tab_mutex);

@@ -399,6 +401,8 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
            if (!radeon_get_drm_value(ws->fd, RADEON_INFO_IB_VM_MAX_SIZE, NULL,
                                      &ib_vm_max_size))
                ws->info.r600_virtual_address = FALSE;
+            radeon_get_drm_value(ws->fd, RADEON_INFO_VA_UNMAP_WORKING, NULL,
+                                 &ws->va_unmap_working);
        }
 	if (ws->gen == DRV_R600 && !debug_get_bool_option("RADEON_VA", FALSE))
 		ws->info.r600_virtual_address = FALSE;
@@ -484,6 +488,10 @@ static void radeon_winsys_destroy(struct radeon_winsys *rws)
    if (ws->gen >= DRV_R600) {
        radeon_surface_manager_free(ws->surf_man);
    }
+
+    if (ws->fd >= 0)
+        close(ws->fd);
+
    FREE(rws);
 }

@@ -696,7 +704,7 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create)
        return NULL;
    }

-    ws->fd = fd;
+    ws->fd = dup(fd);

    if (!do_winsys_init(ws))
        goto fail;
@@ -712,7 +720,7 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create)
        goto fail;

    if (ws->gen >= DRV_R600) {
-        ws->surf_man = radeon_surface_manager_new(fd);
+        ws->surf_man = radeon_surface_manager_new(ws->fd);
        if (!ws->surf_man)
            goto fail;
    }
@@ -753,7 +761,7 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create)
        return NULL;
    }

-    util_hash_table_set(fd_tab, intptr_to_pointer(fd), ws);
+    util_hash_table_set(fd_tab, intptr_to_pointer(ws->fd), ws);

    /* We must unlock the mutex once the winsys is fully initialized, so that
     * other threads attempting to create the winsys from the same fd will
@@ -770,6 +778,9 @@ fail:
        ws->kman->destroy(ws->kman);
    if (ws->surf_man)
        radeon_surface_manager_free(ws->surf_man);
+    if (ws->fd >= 0)
+        close(ws->fd);
+
    FREE(ws);
    return NULL;
 }
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
@@ -74,6 +74,7 @@ struct radeon_drm_winsys {
    enum radeon_generation gen;
    struct radeon_info info;
    uint32_t va_start;
+    uint32_t va_unmap_working;
    uint32_t accel_working2;

    struct pb_manager *kman;
--- a/src/glsl/ast_array_index.cpp
+++ b/src/glsl/ast_array_index.cpp
@@ -225,25 +225,25 @@ _mesa_ast_array_index_to_hir(void *mem_ctx,
       * values *do* diverge, then the behavior of the operation requiring a
       * dynamically uniform expression is undefined.
       */
-      if (array->type->element_type()->is_sampler()) {
-	 if (!state->is_version(130, 100)) {
-	    if (state->es_shader) {
-	       _mesa_glsl_warning(&loc, state,
-				  "sampler arrays indexed with non-constant "
-				  "expressions is optional in %s",
-				  state->get_version_string());
-	    } else {
-	       _mesa_glsl_warning(&loc, state,
-				  "sampler arrays indexed with non-constant "
-				  "expressions will be forbidden in GLSL 1.30 "
-				  "and later");
-	    }
-	 } else if (!state->is_version(400, 0) && !state->ARB_gpu_shader5_enable) {
-	    _mesa_glsl_error(&loc, state,
-			     "sampler arrays indexed with non-constant "
-			     "expressions is forbidden in GLSL 1.30 and "
-			     "later");
-	 }
+      if (array->type->without_array()->is_sampler()) {
+         if (!state->is_version(400, 0) && !state->ARB_gpu_shader5_enable) {
+            if (state->is_version(130, 300))
+               _mesa_glsl_error(&loc, state,
+                                "sampler arrays indexed with non-constant "
+                                "expressions are forbidden in GLSL %s "
+                                "and later",
+                                state->es_shader ? "ES 3.00" : "1.30");
+            else if (state->es_shader)
+               _mesa_glsl_warning(&loc, state,
+                                  "sampler arrays indexed with non-constant "
+                                  "expressions will be forbidden in GLSL "
+                                  "3.00 and later");
+            else
+               _mesa_glsl_warning(&loc, state,
+                                  "sampler arrays indexed with non-constant "
+                                  "expressions will be forbidden in GLSL "
+                                  "1.30 and later");
+         }
      }
   }

--- a/src/glsl/glcpp/glcpp-parse.y
+++ b/src/glsl/glcpp/glcpp-parse.y
@@ -1074,9 +1074,9 @@ _token_list_equal_ignoring_space (token_list_t *a, token_list_t *b)
 		 */
 		if (node_a->token->type == SPACE
 		    && node_b->token->type == SPACE) {
-			while (node_a->token->type == SPACE)
+			while (node_a && node_a->token->type == SPACE)
 				node_a = node_a->next;
-			while (node_b->token->type == SPACE)
+			while (node_b && node_b->token->type == SPACE)
 				node_b = node_b->next;
 			continue;
 		}
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -63,8 +63,6 @@ update_rhs_swizzle(ir_swizzle_mask &m, unsigned from, unsigned to)
   case 3: m.w = from; break;
   default: assert(!"Should not get here.");
   }
-
-   m.num_components = MAX2(m.num_components, (to + 1));
 }

 void
@@ -95,6 +93,7 @@ ir_assignment::set_lhs(ir_rvalue *lhs)

 	 write_mask |= (((this->write_mask >> i) & 1) << c);
 	 update_rhs_swizzle(rhs_swiz, i, c);
+         rhs_swiz.num_components = swiz->val->type->vector_elements;
      }

      this->write_mask = write_mask;
@@ -114,6 +113,7 @@ ir_assignment::set_lhs(ir_rvalue *lhs)
 	 if (write_mask & (1 << i))
 	    update_rhs_swizzle(rhs_swiz, i, rhs_chan++);
      }
+      rhs_swiz.num_components = rhs_chan;
      this->rhs = new(mem_ctx) ir_swizzle(this->rhs, rhs_swiz);
   }

--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -346,6 +346,39 @@ private:
   bool uses_non_zero_stream;
 };

+/* Class that finds array derefs and check if indexes are dynamic. */
+class dynamic_sampler_array_indexing_visitor : public ir_hierarchical_visitor
+{
+public:
+   dynamic_sampler_array_indexing_visitor() :
+      dynamic_sampler_array_indexing(false)
+   {
+   }
+
+   ir_visitor_status visit_enter(ir_dereference_array *ir)
+   {
+      if (!ir->variable_referenced())
+         return visit_continue;
+
+      if (!ir->variable_referenced()->type->contains_sampler())
+         return visit_continue;
+
+      if (!ir->array_index->constant_expression_value()) {
+         dynamic_sampler_array_indexing = true;
+         return visit_stop;
+      }
+      return visit_continue;
+   }
+
+   bool uses_dynamic_sampler_array_indexing()
+   {
+      return dynamic_sampler_array_indexing;
+   }
+
+private:
+   bool dynamic_sampler_array_indexing;
+};
+
 } /* anonymous namespace */

 void
@@ -2736,6 +2769,40 @@ build_program_resource_list(struct gl_context *ctx,
    */
 }

+/**
+ * This check is done to make sure we allow only constant expression
+ * indexing and "constant-index-expression" (indexing with an expression
+ * that includes loop induction variable).
+ */
+static bool
+validate_sampler_array_indexing(struct gl_context *ctx,
+                                struct gl_shader_program *prog)
+{
+   dynamic_sampler_array_indexing_visitor v;
+   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+      if (prog->_LinkedShaders[i] == NULL)
+	 continue;
+
+      bool no_dynamic_indexing =
+         ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectSampler;
+
+      /* Search for array derefs in shader. */
+      v.run(prog->_LinkedShaders[i]->ir);
+      if (v.uses_dynamic_sampler_array_indexing()) {
+         const char *msg = "sampler arrays indexed with non-constant "
+                           "expressions is forbidden in GLSL %s %u";
+         /* Backend has indicated that it has no dynamic indexing support. */
+         if (no_dynamic_indexing) {
+            linker_error(prog, msg, prog->IsES ? "ES" : "", prog->Version);
+            return false;
+         } else {
+            linker_warning(prog, msg, prog->IsES ? "ES" : "", prog->Version);
+         }
+      }
+   }
+   return true;
+}
+

 void
 link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
@@ -2954,6 +3021,16 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
      lower_const_arrays_to_uniforms(prog->_LinkedShaders[i]->ir);
   }

+   /* Validation for special cases where we allow sampler array indexing
+    * with loop induction variable. This check emits a warning or error
+    * depending if backend can handle dynamic indexing.
+    */
+   if ((!prog->IsES && prog->Version < 130) ||
+       (prog->IsES && prog->Version < 300)) {
+      if (!validate_sampler_array_indexing(ctx, prog))
+         goto done;
+   }
+
   /* Check and validate stream emissions in geometry shaders */
   validate_geometry_shader_emissions(ctx, prog);

--- a/src/glsl/loop_unroll.cpp
+++ b/src/glsl/loop_unroll.cpp
@@ -100,6 +100,18 @@ public:

   virtual ir_visitor_status visit_enter(ir_dereference_array *ir)
   {
+      /* Force unroll in case of dynamic indexing with sampler arrays
+       * when EmitNoIndirectSampler is set.
+       */
+      if (options->EmitNoIndirectSampler) {
+         if ((ir->array->type->is_array() &&
+              ir->array->type->contains_sampler()) &&
+             !ir->array_index->constant_expression_value()) {
+            unsupported_variable_indexing = true;
+            return visit_continue;
+         }
+      }
+
      /* Check for arrays variably-indexed by a loop induction variable.
       * Unrolling the loop may convert that access into constant-indexing.
       *
--- a/src/glsl/opt_algebraic.cpp
+++ b/src/glsl/opt_algebraic.cpp
@@ -580,7 +580,7 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
            continue;

         ir_expression *add_expr = floor_expr->operands[0]->as_expression();
-         if (!add_expr)
+         if (!add_expr || add_expr->operation != ir_binop_add)
            continue;

         for (int j = 0; j < 2; j++) {
@@ -589,7 +589,7 @@ ir_algebraic_visitor::handle_expression(ir_expression *ir)
               continue;

            ir_constant *point_five = add_expr->operands[1 - j]->as_constant();
-            if (!point_five->is_value(0.5, 0))
+            if (!point_five || !point_five->is_value(0.5, 0))
               continue;

            if (abs_expr->operands[0]->equals(sign_expr->operands[0])) {
--- a/src/loader/loader.c
+++ b/src/loader/loader.c
@@ -112,26 +112,36 @@ static void *udev_handle = NULL;
 static void *
 udev_dlopen_handle(void)
 {
-   if (!udev_handle) {
-      udev_handle = dlopen("libudev.so.1", RTLD_LOCAL | RTLD_LAZY);
+   char name[80];
+   unsigned flags = RTLD_NOLOAD | RTLD_LOCAL | RTLD_LAZY;
+   int version;

-      if (!udev_handle) {
-         /* libudev.so.1 changed the return types of the two unref functions
-          * from voids to pointers.  We don't use those return values, and the
-          * only ABI I've heard that cares about this kind of change (calling
-          * a function with a void * return that actually only returns void)
-          * might be ia64.
-          */
-         udev_handle = dlopen("libudev.so.0", RTLD_LOCAL | RTLD_LAZY);
+   /* libudev.so.1 changed the return types of the two unref functions
+    * from voids to pointers.  We don't use those return values, and the
+    * only ABI I've heard that cares about this kind of change (calling
+    * a function with a void * return that actually only returns void)
+    * might be ia64.
+    */

-         if (!udev_handle) {
-            log_(_LOADER_WARNING, "Couldn't dlopen libudev.so.1 or "
-                 "libudev.so.0, driver detection may be broken.\n");
-         }
+   /* First try opening an already linked libudev, then try loading one */
+   do {
+      for (version = 1; version >= 0; version--) {
+         snprintf(name, sizeof(name), "libudev.so.%d", version);
+         udev_handle = dlopen(name, flags);
+         if (udev_handle)
+            return udev_handle;
      }
-   }

-   return udev_handle;
+      if ((flags & RTLD_NOLOAD) == 0)
+         break;
+
+      flags &= ~RTLD_NOLOAD;
+   } while (1);
+
+   log_(_LOADER_WARNING,
+        "Couldn't dlopen libudev.so.1 or "
+        "libudev.so.0, driver detection may be broken.\n");
+   return NULL;
 }

 static int dlsym_failed = 0;
--- a/src/mesa/Makefile.am
+++ b/src/mesa/Makefile.am
@@ -40,8 +40,11 @@ gl_HEADERS = $(top_srcdir)/include/GL/*.h

 .PHONY: main/git_sha1.h.tmp
 main/git_sha1.h.tmp:
+	@# Don't assume that $(top_srcdir)/.git is a directory. It may be
+	@# a gitlink file if $(top_srcdir) is a submodule checkout or a linked
+	@# worktree.
 	@touch main/git_sha1.h.tmp
-	@if test -d $(top_srcdir)/.git; then \
+	@if test -e $(top_srcdir)/.git; then \
 		if which git > /dev/null; then \
 		    git --git-dir=$(top_srcdir)/.git log -n 1 --oneline | \
 			sed 's/^\([^ ]*\) .*/#define MESA_GIT_SHA1 "git-\1"/' \
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -728,7 +728,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
      save->DepthNear = ctx->ViewportArray[0].Near;
      save->DepthFar = ctx->ViewportArray[0].Far;
      /* set depth range to default */
-      _mesa_DepthRange(0.0, 1.0);
+      _mesa_set_depth_range(ctx, 0, 0.0, 1.0);
   }

   if (state & MESA_META_CLAMP_FRAGMENT_COLOR) {
@@ -1129,7 +1129,7 @@ _mesa_meta_end(struct gl_context *ctx)
         _mesa_set_viewport(ctx, 0, save->ViewportX, save->ViewportY,
                            save->ViewportW, save->ViewportH);
      }
-      _mesa_DepthRange(save->DepthNear, save->DepthFar);
+      _mesa_set_depth_range(ctx, 0, save->DepthNear, save->DepthFar);
   }

   if (state & MESA_META_CLAMP_FRAGMENT_COLOR &&
--- a/src/mesa/drivers/common/meta_tex_subimage.c
+++ b/src/mesa/drivers/common/meta_tex_subimage.c
@@ -25,8 +25,10 @@
 *    Jason Ekstrand <jason.ekstrand@intel.com>
 */

+#include "blend.h"
 #include "bufferobj.h"
 #include "buffers.h"
+#include "clear.h"
 #include "fbobject.h"
 #include "glformats.h"
 #include "glheader.h"
@@ -247,6 +249,24 @@ fail:
   return success;
 }

+static bool
+need_signed_unsigned_int_conversion(mesa_format rbFormat,
+                                    GLenum format, GLenum type)
+{
+   const GLenum srcType = _mesa_get_format_datatype(rbFormat);
+   const bool is_dst_format_integer = _mesa_is_enum_format_integer(format);
+   return (srcType == GL_INT &&
+           is_dst_format_integer &&
+           (type == GL_UNSIGNED_INT ||
+            type == GL_UNSIGNED_SHORT ||
+            type == GL_UNSIGNED_BYTE)) ||
+          (srcType == GL_UNSIGNED_INT &&
+           is_dst_format_integer &&
+           (type == GL_INT ||
+            type == GL_SHORT ||
+            type == GL_BYTE));
+}
+
 bool
 _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims,
                              struct gl_texture_image *tex_image,
@@ -259,8 +279,9 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims,
   int full_height, image_height;
   struct gl_texture_image *pbo_tex_image;
   struct gl_renderbuffer *rb = NULL;
-   GLenum status;
-   bool success = false;
+   GLenum status, src_base_format;
+   bool success = false, clear_channels_to_zero = false;
+   float save_clear_color[4];
   int z;

   if (!_mesa_is_bufferobj(packing->BufferObj))
@@ -272,14 +293,27 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims,
       format == GL_COLOR_INDEX)
      return false;

-   if (ctx->_ImageTransferState)
-      return false;
-
-
+   /* Don't use meta path for readpixels in below conditions. */
   if (!tex_image) {
      rb = ctx->ReadBuffer->_ColorReadBuffer;
+
+      /* _mesa_get_readpixels_transfer_ops() includes the cases of read
+       * color clamping along with the ctx->_ImageTransferState.
+       */
+      if (_mesa_get_readpixels_transfer_ops(ctx, rb->Format, format,
+                                            type, GL_FALSE))
+         return false;
+
      if (_mesa_need_rgb_to_luminance_conversion(rb->Format, format))
         return false;
+
+      /* This function rely on BlitFramebuffer to fill in the pixel data for
+       * ReadPixels. But, BlitFrameBuffer doesn't support signed to unsigned
+       * or unsigned to signed integer conversions. OpenGL spec expects an
+       * invalid operation in that case.
+       */
+      if (need_signed_unsigned_int_conversion(rb->Format, format, type))
+         return false;
   }

   /* For arrays, use a tall (height * depth) 2D texture but taking into
@@ -299,6 +333,10 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims,
   _mesa_meta_begin(ctx, ~(MESA_META_PIXEL_TRANSFER |
                           MESA_META_PIXEL_STORE));

+   /* GL_CLAMP_FRAGMENT_COLOR doesn't affect ReadPixels and GettexImage */
+   if (ctx->Extensions.ARB_color_buffer_float)
+      _mesa_ClampColor(GL_CLAMP_FRAGMENT_COLOR, GL_FALSE);
+
   _mesa_GenFramebuffers(2, fbos);

   if (tex_image && tex_image->TexObject->Target == GL_TEXTURE_1D_ARRAY) {
@@ -344,6 +382,27 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims,
                                  GL_COLOR_BUFFER_BIT, GL_NEAREST))
      goto fail;

+   src_base_format = tex_image ?
+                     tex_image->_BaseFormat :
+                     ctx->ReadBuffer->_ColorReadBuffer->_BaseFormat;
+
+   /* Depending on the base formats involved we might need to rebase some
+    * values. For example if we download from a Luminance format to RGBA
+    * format, we want G=0 and B=0.
+    */
+   clear_channels_to_zero =
+      _mesa_need_luminance_to_rgb_conversion(src_base_format,
+                                             pbo_tex_image->_BaseFormat);
+
+   if (clear_channels_to_zero) {
+      memcpy(save_clear_color, ctx->Color.ClearColor.f, 4 * sizeof(float));
+      /* Clear the Green, Blue channels. */
+      _mesa_ColorMask(GL_FALSE, GL_TRUE, GL_TRUE,
+                      src_base_format != GL_LUMINANCE_ALPHA);
+      _mesa_ClearColor(0.0, 0.0, 0.0, 1.0);
+      _mesa_Clear(GL_COLOR_BUFFER_BIT);
+   }
+
   for (z = 1; z < depth; z++) {
      _mesa_meta_bind_fbo_image(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
                                tex_image, zoffset + z);
@@ -356,6 +415,15 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims,
                                 0, z * image_height,
                                 width, z * image_height + height,
                                 GL_COLOR_BUFFER_BIT, GL_NEAREST);
+      if (clear_channels_to_zero)
+         _mesa_Clear(GL_COLOR_BUFFER_BIT);
+   }
+
+   /* Unmask the color channels and restore the saved clear color values. */
+   if (clear_channels_to_zero) {
+      _mesa_ColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
+      _mesa_ClearColor(save_clear_color[0], save_clear_color[1],
+                       save_clear_color[2], save_clear_color[3]);
   }

   success = true;
--- a/src/mesa/drivers/dri/i915/i915_context.c
+++ b/src/mesa/drivers/dri/i915/i915_context.c
@@ -255,6 +255,8 @@ i915CreateContext(int api,
    * FINISHME: vertex shaders?
    */
   ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitCondCodes = true;
+   ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoIndirectSampler =
+      true;

   struct gl_shader_compiler_options *const fs_options =
      & ctx->Const.ShaderCompilerOptions[MESA_SHADER_FRAGMENT];
@@ -266,6 +268,7 @@ i915CreateContext(int api,
   fs_options->EmitNoIndirectOutput = true;
   fs_options->EmitNoIndirectUniform = true;
   fs_options->EmitNoIndirectTemp = true;
+   fs_options->EmitNoIndirectSampler = true;

   ctx->Const.MaxDrawBuffers = 1;
   ctx->Const.QueryCounterBits.SamplesPassed = 0;
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -584,6 +584,10 @@ brw_initialize_context_constants(struct brw_context *brw)
 	 (i == MESA_SHADER_FRAGMENT);
      ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectUniform = false;
      ctx->Const.ShaderCompilerOptions[i].LowerClipDistance = true;
+
+      /* !ARB_gpu_shader5 */
+      if (brw->gen < 7)
+         ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectSampler = true;
   }

   ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS = true;
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -415,6 +415,7 @@ struct brw_wm_prog_data {
   bool uses_pos_offset;
   bool uses_omask;
   bool uses_kill;
+   bool pulls_bary;
   uint32_t prog_offset_16;

   /**
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -2124,6 +2124,7 @@ enum brw_pixel_shader_computed_depth_mode {
 # define GEN8_PSX_SHADER_DISABLES_ALPHA_TO_COVERAGE     (1 << 7)
 # define GEN8_PSX_SHADER_IS_PER_SAMPLE                  (1 << 6)
 # define GEN8_PSX_SHADER_COMPUTES_STENCIL               (1 << 5)
+# define GEN9_PSX_SHADER_PULLS_BARY                     (1 << 3)
 # define GEN8_PSX_SHADER_HAS_UAV                        (1 << 2)
 # define GEN8_PSX_SHADER_USES_INPUT_COVERAGE_MASK       (1 << 1)

@@ -2470,12 +2471,13 @@ enum brw_wm_barycentric_interp_mode {
 #define BDW_MOCS_WT  0x58
 #define BDW_MOCS_PTE 0x18

-/* Skylake: MOCS is now an index into an array of 64 different configurable
- * cache settings.  We still use only either write-back or write-through; and
- * rely on the documented default values.
+/* Skylake: MOCS is now an index into an array of 62 different caching
+ * configurations programmed by the kernel.
 */
-#define SKL_MOCS_WB 9
-#define SKL_MOCS_WT 5
+/* TC=LLC/eLLC, LeCC=WB, LRUM=3, L3CC=WB */
+#define SKL_MOCS_WB  (2 << 1)
+/* TC=LLC/eLLC, LeCC=PTE, LRUM=3, L3CC=WB */
+#define SKL_MOCS_PTE (1 << 1)

 #define MEDIA_VFE_STATE                         0x7000
 /* GEN7 DW2, GEN8+ DW3 */
--- a/src/mesa/drivers/dri/i965/brw_device_info.c
+++ b/src/mesa/drivers/dri/i965/brw_device_info.c
@@ -304,13 +304,13 @@ static const struct brw_device_info brw_device_info_chv = {
   .must_use_separate_stencil = true,               \
   .has_llc = true,                                 \
   .has_pln = true,                                 \
-   .max_vs_threads = 280,                           \
-   .max_gs_threads = 256,                           \
-   .max_wm_threads = 408,                           \
+   .max_vs_threads = 336,                           \
+   .max_gs_threads = 336,                           \
+   .max_wm_threads = 64 * 6,                        \
   .urb = {                                         \
-      .size = 128,                                  \
+      .size = 192,                                  \
      .min_vs_entries = 64,                         \
-      .max_vs_entries = 1664,                       \
+      .max_vs_entries = 1856,                       \
      .max_gs_entries = 640,                        \
   }

--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -973,6 +973,7 @@ fs_inst::regs_read(int arg) const
   case IMM:
      return 1;
   case GRF:
+   case ATTR:
   case HW_REG:
      if (src[arg].stride == 0) {
         return 1;
@@ -3623,10 +3624,16 @@ fs_visitor::lower_integer_multiplication()
               src1_1_w.fixed_hw_reg.dw1.ud >>= 16;
            } else {
               src1_0_w.type = BRW_REGISTER_TYPE_UW;
-               src1_0_w.stride = 2;
+               if (src1_0_w.stride != 0) {
+                  assert(src1_0_w.stride == 1);
+                  src1_0_w.stride = 2;
+               }

               src1_1_w.type = BRW_REGISTER_TYPE_UW;
-               src1_1_w.stride = 2;
+               if (src1_1_w.stride != 0) {
+                  assert(src1_1_w.stride == 1);
+                  src1_1_w.stride = 2;
+               }
               src1_1_w.subreg_offset += type_sz(BRW_REGISTER_TYPE_UW);
            }
            insert(MUL(low, inst->src[0], src1_0_w));
@@ -3636,10 +3643,16 @@ fs_visitor::lower_integer_multiplication()
            fs_reg src0_1_w = inst->src[0];

            src0_0_w.type = BRW_REGISTER_TYPE_UW;
-            src0_0_w.stride = 2;
+            if (src0_0_w.stride != 0) {
+               assert(src0_0_w.stride == 1);
+               src0_0_w.stride = 2;
+            }

            src0_1_w.type = BRW_REGISTER_TYPE_UW;
-            src0_1_w.stride = 2;
+            if (src0_1_w.stride != 0) {
+               assert(src0_1_w.stride == 1);
+               src0_1_w.stride = 2;
+            }
            src0_1_w.subreg_offset += type_sz(BRW_REGISTER_TYPE_UW);

            insert(MUL(low, src0_0_w, inst->src[1]));
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -89,12 +89,19 @@ fs_visitor::nir_setup_inputs(nir_shader *shader)
          * So, we need to copy from fs_reg(ATTR, var->location) to
          * offset(nir_inputs, var->data.driver_location).
          */
-         unsigned components = var->type->without_array()->components();
+         const glsl_type *const t = var->type->without_array();
+         const unsigned components = t->components();
+         const unsigned cols = t->matrix_columns;
+         const unsigned elts = t->vector_elements;
         unsigned array_length = var->type->is_array() ? var->type->length : 1;
         for (unsigned i = 0; i < array_length; i++) {
-            for (unsigned j = 0; j < components; j++) {
-               emit(MOV(retype(offset(input, components * i + j), type),
-                        offset(fs_reg(ATTR, var->data.location + i, type), j)));
+            for (unsigned j = 0; j < cols; j++) {
+               for (unsigned k = 0; k < elts; k++) {
+                  emit(MOV(offset(retype(input, type),
+                                  components * i + elts * j + k),
+                           offset(fs_reg(ATTR, var->data.location + i, type),
+                                  4 * j + k)));
+               }
            }
         }
         break;
@@ -1462,6 +1469,10 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
   case nir_intrinsic_interp_var_at_centroid:
   case nir_intrinsic_interp_var_at_sample:
   case nir_intrinsic_interp_var_at_offset: {
+      assert(stage == MESA_SHADER_FRAGMENT);
+
+      ((struct brw_wm_prog_data *) prog_data)->pulls_bary = true;
+
      /* in SIMD16 mode, the pixel interpolator returns coords interleaved
       * 8 channels at a time, same as the barycentric coords presented in
       * the FS payload. this requires a bit of extra work to support.
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1575,7 +1575,7 @@ fs_visitor::emit_texture_gen4_simd16(ir_texture_opcode op, fs_reg dst,
                                     uint32_t sampler)
 {
   fs_reg message(MRF, 2, BRW_REGISTER_TYPE_F, dispatch_width);
-   bool has_lod = op == ir_txl || op == ir_txb || op == ir_txf;
+   bool has_lod = op == ir_txl || op == ir_txb || op == ir_txf || op == ir_txs;

   if (has_lod && shadow_c.file != BAD_FILE)
      no16("TXB and TXL with shadow comparison unsupported in SIMD16.");
@@ -1592,14 +1592,15 @@ fs_visitor::emit_texture_gen4_simd16(ir_texture_opcode op, fs_reg dst,
   fs_reg msg_end = offset(message, vector_elements);

   /* Messages other than sample and ld require all three components */
-   if (has_lod || shadow_c.file != BAD_FILE) {
+   if (vector_elements > 0 && (has_lod || shadow_c.file != BAD_FILE)) {
      for (int i = vector_elements; i < 3; i++) {
         emit(MOV(offset(message, i), fs_reg(0.0f)));
      }
+      msg_end = offset(message, 3);
   }

   if (has_lod) {
-      fs_reg msg_lod = retype(offset(message, 3), op == ir_txf ?
+      fs_reg msg_lod = retype(msg_end, op == ir_txf ?
                              BRW_REGISTER_TYPE_UD : BRW_REGISTER_TYPE_F);
      emit(MOV(msg_lod, lod));
      msg_end = offset(msg_lod, 1);
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -599,10 +599,13 @@ brw_dump_ir(const char *stage, struct gl_shader_program *shader_prog,
            struct gl_shader *shader, struct gl_program *prog)
 {
   if (shader_prog) {
-      fprintf(stderr,
-              "GLSL IR for native %s shader %d:\n", stage, shader_prog->Name);
-      _mesa_print_ir(stderr, shader->ir, NULL);
-      fprintf(stderr, "\n\n");
+      if (shader->ir) {
+         fprintf(stderr,
+                 "GLSL IR for native %s shader %d:\n",
+                 stage, shader_prog->Name);
+         _mesa_print_ir(stderr, shader->ir, NULL);
+         fprintf(stderr, "\n\n");
+      }
   } else {
      fprintf(stderr, "ARB_%s_program %d ir for native %s shader\n",
              stage, prog->Id, stage);
--- a/src/mesa/drivers/dri/i965/gen6_cc.c
+++ b/src/mesa/drivers/dri/i965/gen6_cc.c
@@ -51,7 +51,7 @@ gen6_upload_blend_state(struct brw_context *brw)
    * with render target 0, which will reference BLEND_STATE[0] for
    * alpha test enable.
    */
-   if (nr_draw_buffers == 0 && ctx->Color.AlphaEnabled)
+   if (nr_draw_buffers == 0)
      nr_draw_buffers = 1;

   size = sizeof(*blend) * nr_draw_buffers;
--- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c
@@ -58,6 +58,9 @@ gen8_upload_ps_extra(struct brw_context *brw,
   if (prog_data->uses_omask)
      dw1 |= GEN8_PSX_OMASK_TO_RENDER_TARGET;

+   if (brw->gen >= 9 && prog_data->pulls_bary)
+      dw1 |= GEN9_PSX_SHADER_PULLS_BARY;
+
   BEGIN_BATCH(2);
   OUT_BATCH(_3DSTATE_PS_EXTRA << 16 | (2 - 2));
   OUT_BATCH(dw1);
--- a/src/mesa/drivers/dri/i965/gen8_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c
@@ -349,8 +349,7 @@ gen8_update_renderbuffer_surface(struct brw_context *brw,
      irb->mt_layer : (irb->mt_layer / MAX2(mt->num_samples, 1));
   GLenum gl_target =
      rb->TexImage ? rb->TexImage->TexObject->Target : GL_TEXTURE_2D;
-   /* FINISHME: Use PTE MOCS on Skylake. */
-   uint32_t mocs = brw->gen >= 9 ? SKL_MOCS_WT : BDW_MOCS_PTE;
+   const uint32_t mocs = brw->gen >= 9 ? SKL_MOCS_PTE : BDW_MOCS_PTE;

   intel_miptree_used_for_rendering(mt);

--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
@@ -22,12 +22,12 @@ extern "C" {
 *   - Disabling OA counters on Gen6+ (3 DWords = 12 bytes)
 *   - Ending MI_REPORT_PERF_COUNT on Gen5+, plus associated PIPE_CONTROLs:
 *     - Two sets of PIPE_CONTROLs, which become 3 PIPE_CONTROLs each on SNB,
- *       which are 4 DWords each ==> 2 * 3 * 4 * 4 = 96 bytes
+ *       which are 5 DWords each ==> 2 * 3 * 5 * 4 = 120 bytes
 *     - 3 DWords for MI_REPORT_PERF_COUNT itself on Gen6+.  ==> 12 bytes.
 *       On Ironlake, it's 6 DWords, but we have some slack due to the lack of
 *       Sandybridge PIPE_CONTROL madness.
 */
-#define BATCH_RESERVED 146
+#define BATCH_RESERVED 152

 struct intel_batchbuffer;

--- a/src/mesa/drivers/osmesa/osmesa.c
+++ b/src/mesa/drivers/osmesa/osmesa.c
@@ -1124,7 +1124,7 @@ static struct name_function functions[] = {
   { "OSMesaDestroyContext", (OSMESAproc) OSMesaDestroyContext },
   { "OSMesaMakeCurrent", (OSMESAproc) OSMesaMakeCurrent },
   { "OSMesaGetCurrentContext", (OSMESAproc) OSMesaGetCurrentContext },
-   { "OSMesaPixelsStore", (OSMESAproc) OSMesaPixelStore },
+   { "OSMesaPixelStore", (OSMESAproc) OSMesaPixelStore },
   { "OSMesaGetIntegerv", (OSMESAproc) OSMesaGetIntegerv },
   { "OSMesaGetDepthBuffer", (OSMESAproc) OSMesaGetDepthBuffer },
   { "OSMesaGetColorBuffer", (OSMESAproc) OSMesaGetColorBuffer },
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2893,6 +2893,7 @@ struct gl_shader_compiler_options
   GLboolean EmitNoIndirectOutput;  /**< No indirect addressing of outputs */
   GLboolean EmitNoIndirectTemp;    /**< No indirect addressing of temps */
   GLboolean EmitNoIndirectUniform; /**< No indirect addressing of constants */
+   GLboolean EmitNoIndirectSampler; /**< No indirect addressing of samplers */
   /*@}*/

   GLuint MaxIfDepth;               /**< Maximum nested IF blocks */
--- a/src/mesa/main/readpix.c
+++ b/src/mesa/main/readpix.c
@@ -60,13 +60,33 @@ _mesa_need_rgb_to_luminance_conversion(mesa_format texFormat, GLenum format)
           format == GL_LUMINANCE_ALPHA_INTEGER_EXT);
 }

+/**
+ * Return true if the conversion L,I to RGB conversion is needed.
+ */
+GLboolean
+_mesa_need_luminance_to_rgb_conversion(GLenum srcBaseFormat,
+                                       GLenum dstBaseFormat)
+{
+   return (srcBaseFormat == GL_LUMINANCE ||
+           srcBaseFormat == GL_LUMINANCE_ALPHA ||
+           srcBaseFormat == GL_INTENSITY) &&
+          (dstBaseFormat == GL_GREEN ||
+           dstBaseFormat == GL_BLUE ||
+           dstBaseFormat == GL_RG ||
+           dstBaseFormat == GL_RGB ||
+           dstBaseFormat == GL_BGR ||
+           dstBaseFormat == GL_RGBA ||
+           dstBaseFormat == GL_BGRA);
+}

 /**
 * Return transfer op flags for this ReadPixels operation.
 */
-static GLbitfield
-get_readpixels_transfer_ops(const struct gl_context *ctx, mesa_format texFormat,
-                            GLenum format, GLenum type, GLboolean uses_blit)
+GLbitfield
+_mesa_get_readpixels_transfer_ops(const struct gl_context *ctx,
+                                  mesa_format texFormat,
+                                  GLenum format, GLenum type,
+                                  GLboolean uses_blit)
 {
   GLbitfield transferOps = ctx->_ImageTransferState;

@@ -169,8 +189,8 @@ _mesa_readpixels_needs_slow_path(const struct gl_context *ctx, GLenum format,
      }

      /* And finally, see if there are any transfer ops. */
-      return get_readpixels_transfer_ops(ctx, rb->Format, format, type,
-                                         uses_blit) != 0;
+      return _mesa_get_readpixels_transfer_ops(ctx, rb->Format, format, type,
+                                               uses_blit) != 0;
   }
   return GL_FALSE;
 }
@@ -436,8 +456,8 @@ read_rgba_pixels( struct gl_context *ctx,
   if (!rb)
      return;

-   transferOps = get_readpixels_transfer_ops(ctx, rb->Format, format, type,
-                                             GL_FALSE);
+   transferOps = _mesa_get_readpixels_transfer_ops(ctx, rb->Format, format,
+                                                   type, GL_FALSE);
   /* Describe the dst format */
   dst_is_integer = _mesa_is_enum_format_integer(format);
   dst_stride = _mesa_image_row_stride(packing, width, format, type);
--- a/src/mesa/main/readpix.h
+++ b/src/mesa/main/readpix.h
@@ -40,6 +40,16 @@ _mesa_readpixels_needs_slow_path(const struct gl_context *ctx, GLenum format,
 extern GLboolean
 _mesa_need_rgb_to_luminance_conversion(mesa_format texFormat, GLenum format);

+extern GLboolean
+_mesa_need_luminance_to_rgb_conversion(GLenum srcBaseFormat,
+                                       GLenum dstBaseFormat);
+
+extern GLbitfield
+_mesa_get_readpixels_transfer_ops(const struct gl_context *ctx,
+                                  mesa_format texFormat,
+                                  GLenum format, GLenum type,
+                                  GLboolean uses_blit);
+
 extern void
 _mesa_readpixels(struct gl_context *ctx,
                 GLint x, GLint y, GLsizei width, GLsizei height,
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -1692,12 +1692,23 @@ _mesa_ShaderBinary(GLint n, const GLuint* shaders, GLenum binaryformat,
                   const void* binary, GLint length)
 {
   GET_CURRENT_CONTEXT(ctx);
-   (void) n;
   (void) shaders;
   (void) binaryformat;
   (void) binary;
-   (void) length;
-   _mesa_error(ctx, GL_INVALID_OPERATION, "glShaderBinary");
+
+   /* Page 68, section 7.2 'Shader Binaries" of the of the OpenGL ES 3.1, and
+    * page 88 of the OpenGL 4.5 specs state:
+    *
+    *     "An INVALID_VALUE error is generated if count or length is negative.
+    *      An INVALID_ENUM error is generated if binaryformat is not a supported
+    *      format returned in SHADER_BINARY_FORMATS."
+    */
+   if (n < 0 || length < 0) {
+      _mesa_error(ctx, GL_INVALID_VALUE, "glShaderBinary(count or length < 0)");
+      return;
+   }
+
+   _mesa_error(ctx, GL_INVALID_ENUM, "glShaderBinary(format)");
 }


--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -2479,13 +2479,6 @@ texsubimage_error_check(struct gl_context *ctx, GLuint dimensions,
      return GL_TRUE;
   }

-   /* check target (proxies not allowed) */
-   if (!legal_texsubimage_target(ctx, dimensions, target, dsa)) {
-      _mesa_error(ctx, GL_INVALID_ENUM, "%s(target=%s)",
-                  callerName, _mesa_lookup_enum_by_nr(target));
-      return GL_TRUE;
-   }
-
   /* level check */
   if (level < 0 || level >= _mesa_max_texture_levels(ctx, target)) {
      _mesa_error(ctx, GL_INVALID_VALUE, "%s(level=%d)", callerName, level);
@@ -2629,13 +2622,6 @@ copytexture_error_check( struct gl_context *ctx, GLuint dimensions,
      return GL_TRUE;
   }

-   rb = _mesa_get_read_renderbuffer_for_format(ctx, internalFormat);
-   if (rb == NULL) {
-      _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glCopyTexImage%dD(read buffer)", dimensions);
-      return GL_TRUE;
-   }
-
   /* OpenGL ES 1.x and OpenGL ES 2.0 impose additional restrictions on the
    * internalFormat.
    */
@@ -2648,7 +2634,7 @@ copytexture_error_check( struct gl_context *ctx, GLuint dimensions,
      case GL_LUMINANCE_ALPHA:
         break;
      default:
-         _mesa_error(ctx, GL_INVALID_VALUE,
+         _mesa_error(ctx, GL_INVALID_ENUM,
                     "glCopyTexImage%dD(internalFormat=%s)", dimensions,
                     _mesa_lookup_enum_by_nr(internalFormat));
         return GL_TRUE;
@@ -2657,12 +2643,19 @@ copytexture_error_check( struct gl_context *ctx, GLuint dimensions,

   baseFormat = _mesa_base_tex_format(ctx, internalFormat);
   if (baseFormat < 0) {
-      _mesa_error(ctx, GL_INVALID_OPERATION,
+      _mesa_error(ctx, GL_INVALID_ENUM,
                  "glCopyTexImage%dD(internalFormat=%s)", dimensions,
                  _mesa_lookup_enum_by_nr(internalFormat));
      return GL_TRUE;
   }

+   rb = _mesa_get_read_renderbuffer_for_format(ctx, internalFormat);
+   if (rb == NULL) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glCopyTexImage%dD(read buffer)", dimensions);
+      return GL_TRUE;
+   }
+
   rb_internal_format = rb->InternalFormat;
   rb_base_format = _mesa_base_tex_format(ctx, rb->InternalFormat);
   if (_mesa_is_color_format(internalFormat)) {
@@ -3515,14 +3508,6 @@ _mesa_texture_sub_image(struct gl_context *ctx, GLuint dims,
 {
   FLUSH_VERTICES(ctx, 0);

-   /* check target (proxies not allowed) */
-   if (!legal_texsubimage_target(ctx, dims, target, dsa)) {
-      _mesa_error(ctx, GL_INVALID_ENUM, "glTex%sSubImage%uD(target=%s)",
-                  dsa ? "ture" : "",
-                  dims, _mesa_lookup_enum_by_nr(target));
-      return;
-   }
-
   if (ctx->NewState & _NEW_PIXEL)
      _mesa_update_state(ctx);

@@ -3572,6 +3557,13 @@ texsubimage(struct gl_context *ctx, GLuint dims, GLenum target, GLint level,
   struct gl_texture_object *texObj;
   struct gl_texture_image *texImage;

+   /* check target (proxies not allowed) */
+   if (!legal_texsubimage_target(ctx, dims, target, false)) {
+      _mesa_error(ctx, GL_INVALID_ENUM, "glTexSubImage%uD(target=%s)",
+                  dims, _mesa_lookup_enum_by_nr(target));
+      return;
+   }
+
   texObj = _mesa_get_current_tex_object(ctx, target);
   if (!texObj)
      return;
@@ -3632,6 +3624,13 @@ texturesubimage(struct gl_context *ctx, GLuint dims,
      return;
   }

+   /* check target (proxies not allowed) */
+   if (!legal_texsubimage_target(ctx, dims, texObj->Target, true)) {
+      _mesa_error(ctx, GL_INVALID_ENUM, "%s(target=%s)",
+                  callerName, _mesa_lookup_enum_by_nr(texObj->Target));
+      return;
+   }
+
   if (texsubimage_error_check(ctx, dims, texObj, texObj->Target, level,
                               xoffset, yoffset, zoffset,
                               width, height, depth, format, type,
--- a/src/mesa/main/texstore.c
+++ b/src/mesa/main/texstore.c
@@ -787,6 +787,7 @@ texstore_rgba(TEXSTORE_PARAMS)
      srcType = GL_FLOAT;
      srcRowStride = srcWidth * 4 * sizeof(float);
      srcMesaFormat = RGBA32_FLOAT;
+      srcPacking = &ctx->DefaultPacking;
   }

   src = (GLubyte *)
--- a/src/mesa/program/prog_opt_constant_fold.c
+++ b/src/mesa/program/prog_opt_constant_fold.c
@@ -38,6 +38,8 @@ src_regs_are_constant(const struct prog_instruction *inst, unsigned num_srcs)
   for (i = 0; i < num_srcs; i++) {
      if (inst->SrcReg[i].File != PROGRAM_CONSTANT)
 	 return false;
+      if (inst->SrcReg[i].RelAddr)
+         return false;
   }

   return true;
--- a/src/mesa/program/program_parse_extra.c
+++ b/src/mesa/program/program_parse_extra.c
@@ -163,6 +163,8 @@ _mesa_ARBvp_parse_option(struct asm_parser_state *state, const char *option)
 int
 _mesa_ARBfp_parse_option(struct asm_parser_state *state, const char *option)
 {
+   unsigned fog_option;
+
   /* All of the options currently supported start with "ARB_".  The code is
    * currently structured with nested if-statements because eventually options
    * that start with "NV_" will be supported.  This structure will result in
@@ -177,20 +179,42 @@ _mesa_ARBfp_parse_option(struct asm_parser_state *state, const char *option)
      if (strncmp(option, "fog_", 4) == 0) {
 	 option += 4;

-	 if (state->option.Fog == OPTION_NONE) {
-	    if (strcmp(option, "exp") == 0) {
-	       state->option.Fog = OPTION_FOG_EXP;
-	       return 1;
-	    } else if (strcmp(option, "exp2") == 0) {
-	       state->option.Fog = OPTION_FOG_EXP2;
-	       return 1;
-	    } else if (strcmp(option, "linear") == 0) {
-	       state->option.Fog = OPTION_FOG_LINEAR;
-	       return 1;
-	    }
-	 }
+         if (strcmp(option, "exp") == 0) {
+            fog_option = OPTION_FOG_EXP;
+         } else if (strcmp(option, "exp2") == 0) {
+            fog_option = OPTION_FOG_EXP2;
+         } else if (strcmp(option, "linear") == 0) {
+            fog_option = OPTION_FOG_LINEAR;
+         } else {
+            /* invalid option */
+            return 0;
+         }

-	 return 0;
+         if (state->option.Fog == OPTION_NONE) {
+            state->option.Fog = fog_option;
+            return 1;
+         }
+
+         /* The ARB_fragment_program specification instructs us to handle
+          * redundant options in two seemingly contradictory ways:
+          *
+          * Section 3.11.4.5.1 says:
+          * "Only one fog application option may be specified by any given
+          *  fragment program.  A fragment program that specifies more than one
+          *  of the program options "ARB_fog_exp", "ARB_fog_exp2", and
+          *  "ARB_fog_linear", will fail to load."
+          *
+          * Issue 27 says:
+          * "The three mandatory options are ARB_fog_exp, ARB_fog_exp2, and
+          *  ARB_fog_linear.  As these options are mutually exclusive by
+          *  nature, specifying more than one is not useful.  If more than one
+          *  is specified, the last one encountered in the <optionSequence>
+          *  will be the one to actually modify the execution environment."
+          *
+          * We choose to allow programs to specify the same OPTION redundantly,
+          * but fail to load programs that specify contradictory options.
+          */
+         return state->option.Fog == fog_option ? 1 : 0;
      } else if (strncmp(option, "precision_hint_", 15) == 0) {
 	 option += 15;

--- a/src/mesa/state_tracker/st_atom_texture.c
+++ b/src/mesa/state_tracker/st_atom_texture.c
@@ -478,7 +478,7 @@ const struct st_tracked_state st_update_fragment_texture = {
   "st_update_texture",					/* name */
   {							/* dirty */
      _NEW_TEXTURE,					/* mesa */
-      ST_NEW_FRAGMENT_PROGRAM,				/* st */
+      ST_NEW_FRAGMENT_PROGRAM | ST_NEW_SAMPLER_VIEWS,	/* st */
   },
   update_fragment_textures				/* update */
 };
@@ -488,7 +488,7 @@ const struct st_tracked_state st_update_vertex_texture = {
   "st_update_vertex_texture",				/* name */
   {							/* dirty */
      _NEW_TEXTURE,					/* mesa */
-      ST_NEW_VERTEX_PROGRAM,				/* st */
+      ST_NEW_VERTEX_PROGRAM | ST_NEW_SAMPLER_VIEWS,	/* st */
   },
   update_vertex_textures				/* update */
 };
@@ -498,7 +498,7 @@ const struct st_tracked_state st_update_geometry_texture = {
   "st_update_geometry_texture",			/* name */
   {							/* dirty */
      _NEW_TEXTURE,					/* mesa */
-      ST_NEW_GEOMETRY_PROGRAM,				/* st */
+      ST_NEW_GEOMETRY_PROGRAM | ST_NEW_SAMPLER_VIEWS,	/* st */
   },
   update_geometry_textures				/* update */
 };
--- a/src/mesa/state_tracker/st_cb_blit.c
+++ b/src/mesa/state_tracker/st_cb_blit.c
@@ -39,7 +39,7 @@
 #include "st_cb_bitmap.h"
 #include "st_cb_blit.h"
 #include "st_cb_fbo.h"
-#include "st_atom.h"
+#include "st_manager.h"

 #include "util/u_format.h"

@@ -92,7 +92,7 @@ st_BlitFramebuffer(struct gl_context *ctx,
   } clip;
   struct pipe_blit_info blit;

-   st_validate_state(st);
+   st_manager_validate_framebuffers(st);

   /* Make sure bitmap rendering has landed in the framebuffers */
   st_flush_bitmap_cache(st);
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -287,6 +287,11 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe,
   /* For vertex shaders, make sure not to emit saturate when SM 3.0 is not supported */
   ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitNoSat = !st->has_shader_model3;

+   if (!ctx->Extensions.ARB_gpu_shader5) {
+      for (i = 0; i < MESA_SHADER_STAGES; i++)
+         ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectSampler = true;
+   }
+
   _mesa_compute_version(ctx);

   if (ctx->Version == 0) {
@@ -308,6 +313,7 @@ static void st_init_driver_flags(struct gl_driver_flags *f)
   f->NewArray = ST_NEW_VERTEX_ARRAYS;
   f->NewRasterizerDiscard = ST_NEW_RASTERIZER;
   f->NewUniformBuffer = ST_NEW_UNIFORM_BUFFER;
+   f->NewTextureBuffer = ST_NEW_SAMPLER_VIEWS;
 }

 struct st_context *st_create_context(gl_api api, struct pipe_context *pipe,
--- a/src/mesa/state_tracker/st_context.h
+++ b/src/mesa/state_tracker/st_context.h
@@ -58,6 +58,8 @@ struct u_upload_mgr;
 #define ST_NEW_VERTEX_ARRAYS           (1 << 6)
 #define ST_NEW_RASTERIZER              (1 << 7)
 #define ST_NEW_UNIFORM_BUFFER          (1 << 8)
+/* gap for TESSCTRL/TESSEVAL that's in master only */
+#define ST_NEW_SAMPLER_VIEWS           (1 << 11)


 struct st_state_flags {
--- a/src/util/Makefile.am
+++ b/src/util/Makefile.am
@@ -36,7 +36,6 @@ libmesautil_la_CPPFLAGS = \
 	-I$(top_srcdir)/src/mesa \
 	-I$(top_srcdir)/src/gallium/include \
 	-I$(top_srcdir)/src/gallium/auxiliary \
-	$(SHA1_CFLAGS) \
 	$(VISIBILITY_CFLAGS) \
 	$(MSVC2008_COMPAT_CFLAGS)

@@ -44,12 +43,6 @@ libmesautil_la_SOURCES = \
 	$(MESA_UTIL_FILES) \
 	$(MESA_UTIL_GENERATED_FILES)

-if ENABLE_SHADER_CACHE
-libmesautil_la_SOURCES += $(MESA_UTIL_SHADER_CACHE_FILES)
-endif
-
-libmesautil_la_LIBADD = $(SHA1_LIBS)
-
 roundeven_test_LDADD = -lm

 check_PROGRAMS = u_atomic_test roundeven_test
--- a/src/util/Makefile.sources
+++ b/src/util/Makefile.sources
@@ -1,7 +1,3 @@
-MESA_UTIL_SHADER_CACHE_FILES := \
-	mesa-sha1.c \
-	mesa-sha1.h
-
 MESA_UTIL_FILES :=	\
 	bitset.h \
 	format_srgb.h \
--- a/src/util/SConscript
+++ b/src/util/SConscript
@@ -33,11 +33,6 @@ mesautil_sources = (
    source_lists['MESA_UTIL_GENERATED_FILES']
 )

-# XXX We don't yet have scons support for detecting any of the various
-# HAVE_SHA1_* definitions, so for now simply disable the shader cache.
-if False:
-    mesautil_sources += source_lists['MESA_UTIL_SHADER_CACHE_FILES']
-
 mesautil = env.ConvenienceLibrary(
    target = 'mesautil',
    source = mesautil_sources,
--- a/src/util/mesa-sha1.c
+++ b/src/util/mesa-sha1.c
@@ -1,316 +0,0 @@
-/* Copyright © 2007 Carl Worth
- * Copyright © 2009 Jeremy Huddleston, Julien Cristau, and Matthieu Herrb
- * Copyright © 2009-2010 Mikhail Gusarov
- * Copyright © 2012 Yaakov Selkowitz and Keith Packard
- * Copyright © 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#include "mesa-sha1.h"
-
-#if defined(HAVE_SHA1_IN_LIBMD)  /* Use libmd for SHA1 */ \
-	|| defined(HAVE_SHA1_IN_LIBC)   /* Use libc for SHA1 */
-
-#include <sha1.h>
-
-struct mesa_sha1 *
-_mesa_sha1_init(void)
-{
-   SHA1_CTX *ctx = malloc(sizeof(*ctx));
-
-   if (!ctx)
-      return NULL;
-
-   SHA1Init(ctx);
-   return (struct mesa_sha1 *) ctx;
-}
-
-int
-_mesa_sha1_update(struct mesa_sha1 *ctx, const void *data, int size)
-{
-   SHA1_CTX *sha1_ctx = (SHA1_CTX *) ctx;
-
-   SHA1Update(sha1_ctx, data, size);
-   return 1;
-}
-
-int
-_mesa_sha1_final(struct mesa_sha1 *ctx, unsigned char result[20])
-{
-   SHA1_CTX *sha1_ctx = (SHA1_CTX *) ctx;
-
-   SHA1Final(result, sha1_ctx);
-   free(sha1_ctx);
-   return 1;
-}
-
-#elif defined(HAVE_SHA1_IN_COMMONCRYPTO)        /* Use CommonCrypto for SHA1 */
-
-#include <CommonCrypto/CommonDigest.h>
-
-struct mesa_sha1 *
-_mesa_sha1_init(void)
-{
-   CC_SHA1_CTX *ctx = malloc(sizeof(*ctx));
-
-   if (!ctx)
-      return NULL;
-
-   CC_SHA1_Init(ctx);
-   return (struct mesa_sha1 *) ctx;
-}
-
-int
-_mesa_sha1_update(struct mesa_sha1 *ctx, const void *data, int size)
-{
-   CC_SHA1_CTX *sha1_ctx = (CC_SHA1_CTX *) ctx;
-
-   CC_SHA1_Update(sha1_ctx, data, size);
-   return 1;
-}
-
-int
-_mesa_sha1_final(struct mesa_sha1 *ctx, unsigned char result[20])
-{
-   CC_SHA1_CTX *sha1_ctx = (CC_SHA1_CTX *) ctx;
-
-   CC_SHA1_Final(result, sha1_ctx);
-   free(sha1_ctx);
-   return 1;
-}
-
-#elif defined(HAVE_SHA1_IN_CRYPTOAPI)        /* Use CryptoAPI for SHA1 */
-
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
-#include <wincrypt.h>
-
-static HCRYPTPROV hProv;
-
-struct mesa_sha1 *
-_mesa_sha1_init(void)
-{
-   HCRYPTHASH *ctx = malloc(sizeof(*ctx));
-
-   if (!ctx)
-      return NULL;
-
-   CryptAcquireContext(&hProv, NULL, MS_DEF_PROV, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT);
-   CryptCreateHash(hProv, CALG_SHA1, 0, 0, ctx);
-   return (struct mesa_sha1 *) ctx;
-}
-
-int
-_mesa_sha1_update(struct mesa_sha1 *ctx, const void *data, int size)
-{
-   HCRYPTHASH *hHash = (HCRYPTHASH *) ctx;
-
-   CryptHashData(*hHash, data, size, 0);
-   return 1;
-}
-
-int
-_mesa_sha1_final(struct mesa_sha1 *ctx, unsigned char result[20])
-{
-   HCRYPTHASH *hHash = (HCRYPTHASH *) ctx;
-   DWORD len = 20;
-
-   CryptGetHashParam(*hHash, HP_HASHVAL, result, &len, 0);
-   CryptDestroyHash(*hHash);
-   CryptReleaseContext(hProv, 0);
-   free(ctx);
-   return 1;
-}
-
-#elif defined(HAVE_SHA1_IN_LIBNETTLE)   /* Use libnettle for SHA1 */
-
-#include <nettle/sha.h>
-
-struct mesa_sha1 *
-_mesa_sha1_init(void)
-{
-   struct sha1_ctx *ctx = malloc(sizeof(*ctx));
-
-   if (!ctx)
-      return NULL;
-   sha1_init(ctx);
-   return (struct mesa_sha1 *) ctx;
-}
-
-int
-_mesa_sha1_update(struct mesa_sha1 *ctx, const void *data, int size)
-{
-   sha1_update((struct sha1_ctx *) ctx, size, data);
-   return 1;
-}
-
-int
-_mesa_sha1_final(struct mesa_sha1 *ctx, unsigned char result[20])
-{
-   sha1_digest((struct sha1_ctx *) ctx, 20, result);
-   free(ctx);
-   return 1;
-}
-
-#elif defined(HAVE_SHA1_IN_LIBGCRYPT)   /* Use libgcrypt for SHA1 */
-
-#include <gcrypt.h>
-
-struct mesa_sha1 *
-_mesa_sha1_init(void)
-{
-   static int init;
-   gcry_md_hd_t h;
-   gcry_error_t err;
-
-   if (!init) {
-      if (!gcry_check_version(NULL))
-         return NULL;
-      gcry_control(GCRYCTL_DISABLE_SECMEM, 0);
-      gcry_control(GCRYCTL_INITIALIZATION_FINISHED, 0);
-      init = 1;
-   }
-
-   err = gcry_md_open(&h, GCRY_MD_SHA1, 0);
-   if (err)
-      return NULL;
-   return (struct mesa_sha1 *) h;
-}
-
-int
-_mesa_sha1_update(struct mesa_sha1 *ctx, const void *data, int size)
-{
-   gcry_md_hd_t h = (gcry_md_hd_t) ctx;
-
-   gcry_md_write(h, data, size);
-   return 1;
-}
-
-int
-_mesa_sha1_final(struct mesa_sha1 *ctx, unsigned char result[20])
-{
-   gcry_md_hd_t h = (gcry_md_hd_t) ctx;
-
-   memcpy(result, gcry_md_read(h, GCRY_MD_SHA1), 20);
-   gcry_md_close(h);
-   return 1;
-}
-
-#elif defined(HAVE_SHA1_IN_LIBSHA1)     /* Use libsha1 */
-
-#include <libsha1.h>
-
-struct mesa_sha1 *
-_mesa_sha1_init(void)
-{
-   sha1_ctx *ctx = malloc(sizeof(*ctx));
-
-   if (!ctx)
-      return NULL;
-   sha1_begin(ctx);
-   return (struct mesa_sha1 *) ctx;
-}
-
-int
-_mesa_sha1_update(struct mesa_sha1 *ctx, const void *data, int size)
-{
-   sha1_hash(data, size, (sha1_ctx *) ctx);
-   return 1;
-}
-
-int
-_mesa_sha1_final(struct mesa_sha1 *ctx, unsigned char result[20])
-{
-   sha1_end(result, (sha1_ctx *) ctx);
-   free(ctx);
-   return 1;
-}
-
-#else                           /* Use OpenSSL's libcrypto */
-
-#include <stddef.h>             /* buggy openssl/sha.h wants size_t */
-#include <openssl/sha.h>
-
-struct mesa_sha1 *
-_mesa_sha1_init(void)
-{
-   int ret;
-   SHA_CTX *ctx = malloc(sizeof(*ctx));
-
-   if (!ctx)
-      return NULL;
-   ret = SHA1_Init(ctx);
-   if (!ret) {
-      free(ctx);
-      return NULL;
-   }
-   return (struct mesa_sha1 *) ctx;
-}
-
-int
-_mesa_sha1_update(struct mesa_sha1 *ctx, const void *data, int size)
-{
-   int ret;
-   SHA_CTX *sha_ctx = (SHA_CTX *) ctx;
-
-   ret = SHA1_Update(sha_ctx, data, size);
-   if (!ret)
-      free(sha_ctx);
-   return ret;
-}
-
-int
-_mesa_sha1_final(struct mesa_sha1 *ctx, unsigned char result[20])
-{
-   int ret;
-   SHA_CTX *sha_ctx = (SHA_CTX *) ctx;
-
-   ret = SHA1_Final(result, (SHA_CTX *) sha_ctx);
-   free(sha_ctx);
-   return ret;
-}
-
-#endif
-
-void
-_mesa_sha1_compute(const void *data, size_t size, unsigned char result[20])
-{
-   struct mesa_sha1 *ctx;
-
-   ctx = _mesa_sha1_init();
-   _mesa_sha1_update(ctx, data, size);
-   _mesa_sha1_final(ctx, result);
-}
-
-char *
-_mesa_sha1_format(char *buf, const unsigned char *sha1)
-{
-   static const char hex_digits[] = "0123456789abcdef";
-   int i;
-
-   for (i = 0; i < 40; i += 2) {
-      buf[i] = hex_digits[sha1[i >> 1] >> 4];
-      buf[i + 1] = hex_digits[sha1[i >> 1] & 0x0f];
-   }
-   buf[i] = '\0';
-
-   return buf;
-}
--- a/src/util/mesa-sha1.h
+++ b/src/util/mesa-sha1.h
@@ -1,53 +0,0 @@
-/* Copyright © 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef SHA1_H
-#define SHA1_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <stdlib.h>
-
-struct mesa_sha1;
-
-struct mesa_sha1 *
-_mesa_sha1_init(void);
-
-int
-_mesa_sha1_update(struct mesa_sha1 *ctx, const void *data, int size);
-
-int
-_mesa_sha1_final(struct mesa_sha1 *ctx, unsigned char result[20]);
-
-char *
-_mesa_sha1_format(char *buf, const unsigned char *sha1);
-
-void
-_mesa_sha1_compute(const void *data, size_t size, unsigned char result[20]);
-
-#ifdef __cplusplus
-} /* extern C */
-#endif
-
-#endif
@@ -1 +1 @@
 .6.1
 .6.4