docs: Update 11.0.0 release notes

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Update version to 11.0.0(final)
2015-09-12 13:06:25 +01:00 · 2015-09-12 10:21:51 +01:00 · 2015-09-11 19:47:09 +01:00 · 2015-09-11 19:46:16 +01:00 · 2015-09-11 19:43:42 +01:00 · 2015-09-11 19:19:32 +01:00
126 changed files with 1549 additions and 1148 deletions
--- a/Android.common.mk
+++ b/Android.common.mk
@@ -42,6 +42,7 @@ LOCAL_CFLAGS += \
 	-DANDROID_VERSION=0x0$(MESA_ANDROID_MAJOR_VERSION)0$(MESA_ANDROID_MINOR_VERSION)

 LOCAL_CFLAGS += \
+	-D__STDC_LIMIT_MACROS \
 	-DHAVE___BUILTIN_EXPECT \
 	-DHAVE___BUILTIN_FFS \
 	-DHAVE___BUILTIN_FFSLL \
@@ -70,7 +71,7 @@ endif

 ifeq ($(MESA_ENABLE_LLVM),true)
 LOCAL_CFLAGS += \
-	-DHAVE_LLVM=0x0305 -DLLVM_VERSION_PATCH=2 \
+	-DHAVE_LLVM=0x0305 -DMESA_LLVM_VERSION_PATCH=2 \
 	-D__STDC_CONSTANT_MACROS \
 	-D__STDC_FORMAT_MACROS \
 	-D__STDC_LIMIT_MACROS
--- a/2
+++ b/2
@@ -1 +1 @@
-11.0.0-devel
+11.0.0
--- a/configure.ac
+++ b/configure.ac
@@ -9,7 +9,6 @@ dnl Copyright © 2009-2014 Jon TURNEY
 dnl Copyright © 2011-2012 Benjamin Franzke
 dnl Copyright © 2008-2014 David Airlie
 dnl Copyright © 2009-2013 Brian Paul
-dnl Copyright © 2003-2007 Keith Packard, Daniel Stone
 dnl
 dnl Permission is hereby granted, free of charge, to any person obtaining a
 dnl copy of this software and associated documentation files (the "Software"),
@@ -988,144 +987,6 @@ fi

 AC_SUBST([MESA_LLVM])

-# SHA1 hashing
-AC_ARG_WITH([sha1],
-        [AS_HELP_STRING([--with-sha1=libc|libmd|libnettle|libgcrypt|libcrypto|libsha1|CommonCrypto|CryptoAPI],
-        [choose SHA1 implementation])])
-case "x$with_sha1" in
-x | xlibc | xlibmd | xlibnettle | xlibgcrypt | xlibcrypto | xlibsha1 | xCommonCrypto | xCryptoAPI)
-  ;;
-*)
-        AC_MSG_ERROR([Illegal value for --with-sha1: $with_sha1])
-esac
-
-AC_CHECK_FUNC([SHA1Init], [HAVE_SHA1_IN_LIBC=yes])
-if test "x$with_sha1" = x && test "x$HAVE_SHA1_IN_LIBC" = xyes; then
-	with_sha1=libc
-fi
-if test "x$with_sha1" = xlibc && test "x$HAVE_SHA1_IN_LIBC" != xyes; then
-	AC_MSG_ERROR([sha1 in libc requested but not found])
-fi
-if test "x$with_sha1" = xlibc; then
-	AC_DEFINE([HAVE_SHA1_IN_LIBC], [1],
-		[Use libc SHA1 functions])
-	SHA1_LIBS=""
-fi
-AC_CHECK_FUNC([CC_SHA1_Init], [HAVE_SHA1_IN_COMMONCRYPTO=yes])
-if test "x$with_sha1" = x && test "x$HAVE_SHA1_IN_COMMONCRYPTO" = xyes; then
-	with_sha1=CommonCrypto
-fi
-if test "x$with_sha1" = xCommonCrypto && test "x$HAVE_SHA1_IN_COMMONCRYPTO" != xyes; then
-	AC_MSG_ERROR([CommonCrypto requested but not found])
-fi
-if test "x$with_sha1" = xCommonCrypto; then
-	AC_DEFINE([HAVE_SHA1_IN_COMMONCRYPTO], [1],
-		[Use CommonCrypto SHA1 functions])
-	SHA1_LIBS=""
-fi
-dnl stdcall functions cannot be tested with AC_CHECK_LIB
-AC_CHECK_HEADER([wincrypt.h], [HAVE_SHA1_IN_CRYPTOAPI=yes], [], [#include <windows.h>])
-if test "x$with_sha1" = x && test "x$HAVE_SHA1_IN_CRYPTOAPI" = xyes; then
-	with_sha1=CryptoAPI
-fi
-if test "x$with_sha1" = xCryptoAPI && test "x$HAVE_SHA1_IN_CRYPTOAPI" != xyes; then
-	AC_MSG_ERROR([CryptoAPI requested but not found])
-fi
-if test "x$with_sha1" = xCryptoAPI; then
-	AC_DEFINE([HAVE_SHA1_IN_CRYPTOAPI], [1],
-		[Use CryptoAPI SHA1 functions])
-	SHA1_LIBS=""
-fi
-AC_CHECK_LIB([md], [SHA1Init], [HAVE_LIBMD=yes])
-if test "x$with_sha1" = x && test "x$HAVE_LIBMD" = xyes; then
-	with_sha1=libmd
-fi
-if test "x$with_sha1" = xlibmd && test "x$HAVE_LIBMD" != xyes; then
-	AC_MSG_ERROR([libmd requested but not found])
-fi
-if test "x$with_sha1" = xlibmd; then
-	AC_DEFINE([HAVE_SHA1_IN_LIBMD], [1],
-	          [Use libmd SHA1 functions])
-	SHA1_LIBS=-lmd
-fi
-PKG_CHECK_MODULES([LIBSHA1], [libsha1], [HAVE_LIBSHA1=yes], [HAVE_LIBSHA1=no])
-if test "x$with_sha1" = x && test "x$HAVE_LIBSHA1" = xyes; then
-   with_sha1=libsha1
-fi
-if test "x$with_sha1" = xlibsha1 && test "x$HAVE_LIBSHA1" != xyes; then
-	AC_MSG_ERROR([libsha1 requested but not found])
-fi
-if test "x$with_sha1" = xlibsha1; then
-	AC_DEFINE([HAVE_SHA1_IN_LIBSHA1], [1],
-	          [Use libsha1 for SHA1])
-	SHA1_LIBS=-lsha1
-fi
-AC_CHECK_LIB([nettle], [nettle_sha1_init], [HAVE_LIBNETTLE=yes])
-if test "x$with_sha1" = x && test "x$HAVE_LIBNETTLE" = xyes; then
-	with_sha1=libnettle
-fi
-if test "x$with_sha1" = xlibnettle && test "x$HAVE_LIBNETTLE" != xyes; then
-	AC_MSG_ERROR([libnettle requested but not found])
-fi
-if test "x$with_sha1" = xlibnettle; then
-	AC_DEFINE([HAVE_SHA1_IN_LIBNETTLE], [1],
-	          [Use libnettle SHA1 functions])
-	SHA1_LIBS=-lnettle
-fi
-AC_CHECK_LIB([gcrypt], [gcry_md_open], [HAVE_LIBGCRYPT=yes])
-if test "x$with_sha1" = x && test "x$HAVE_LIBGCRYPT" = xyes; then
-	with_sha1=libgcrypt
-fi
-if test "x$with_sha1" = xlibgcrypt && test "x$HAVE_LIBGCRYPT" != xyes; then
-	AC_MSG_ERROR([libgcrypt requested but not found])
-fi
-if test "x$with_sha1" = xlibgcrypt; then
-	AC_DEFINE([HAVE_SHA1_IN_LIBGCRYPT], [1],
-	          [Use libgcrypt SHA1 functions])
-	SHA1_LIBS=-lgcrypt
-fi
-# We don't need all of the OpenSSL libraries, just libcrypto
-AC_CHECK_LIB([crypto], [SHA1_Init], [HAVE_LIBCRYPTO=yes])
-PKG_CHECK_MODULES([OPENSSL], [openssl], [HAVE_OPENSSL_PKC=yes],
-                  [HAVE_OPENSSL_PKC=no])
-if test "x$HAVE_LIBCRYPTO" = xyes || test "x$HAVE_OPENSSL_PKC" = xyes; then
-	if test "x$with_sha1" = x; then
-		with_sha1=libcrypto
-	fi
-else
-	if test "x$with_sha1" = xlibcrypto; then
-		AC_MSG_ERROR([OpenSSL libcrypto requested but not found])
-	fi
-fi
-if test "x$with_sha1" = xlibcrypto; then
-	if test "x$HAVE_LIBCRYPTO" = xyes; then
-		SHA1_LIBS=-lcrypto
-	else
-		SHA1_LIBS="$OPENSSL_LIBS"
-		SHA1_CFLAGS="$OPENSSL_CFLAGS"
-	fi
-fi
-AC_MSG_CHECKING([for SHA1 implementation])
-AC_MSG_RESULT([$with_sha1])
-AC_SUBST(SHA1_LIBS)
-AC_SUBST(SHA1_CFLAGS)
-
-# Allow user to configure out the shader-cache feature
-AC_ARG_ENABLE([shader-cache],
-    AS_HELP_STRING([--disable-shader-cache], [Disable binary shader cache]),
-    [enable_shader_cache="$enableval"],
-    [if test "x$with_sha1" != "x"; then
-        enable_shader_cache=yes
-     else
-        enable_shader_cache=no
-     fi])
-if test "x$with_sha1" = "x"; then
-    if test "x$enable_shader_cache" = "xyes"; then
-        AC_MSG_ERROR([Cannot enable shader cache (no SHA-1 implementation found)])
-    fi
-fi
-AM_CONDITIONAL([ENABLE_SHADER_CACHE], [test x$enable_shader_cache = xyes])
-
 case "$host_os" in
 linux*)
    need_pci_id=yes ;;
@@ -2484,12 +2345,6 @@ else
    echo "        Gallium:         no"
 fi

-dnl Shader cache
-echo ""
-echo "        Shader cache:    $enable_shader_cache"
-if test "x$enable_shader_cache" = "xyes"; then
-    echo "        With SHA1 from:  $with_sha1"
-fi

 dnl Libraries
 echo ""
--- a/docs/relnotes/11.0.0.html
+++ b/docs/relnotes/11.0.0.html
@@ -14,7 +14,7 @@
 <iframe src="../contents.html"></iframe>
 <div class="content">

-<h1>Mesa 11.0.0 Release Notes / TBD</h1>
+<h1>Mesa 11.0.0 Release Notes / September 12, 2015</h1>

 <p>
 Mesa 11.0.0 is a new development release.
@@ -83,13 +83,175 @@ Note: some of the new features are only available with certain drivers.
 <li>EGL 1.5 on r600, radeonsi, nv50, nvc0</li>
 </ul>

+
 <h2>Bug fixes</h2>

-TBD.
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=51658">Bug 51658</a> - r200 (&amp; possibly radeon) DRI fixes for gnome shell on Mesa 8.0.3</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=65525">Bug 65525</a> - [llvmpipe] lp_scene.h:210:lp_scene_alloc: Assertion `size &lt;= (64 * 1024)' failed.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=66346">Bug 66346</a> - shader_query.cpp:49: error: invalid conversion from 'void*' to 'GLuint'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=73512">Bug 73512</a> - [clover] mesa.icd. should contain full path</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=73528">Bug 73528</a> - Deferred lighting in Second Life causes system hiccups and screen flickering</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=74329">Bug 74329</a> - Please expose OES_texture_float and OES_texture_half_float on the ES3 context</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=80500">Bug 80500</a> - Flickering shadows in unreleased title trace</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=82186">Bug 82186</a> - [r600g] BARTS GPU lockup with minecraft shaders</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=84225">Bug 84225</a> - Allow constant-index-expression sampler array indexing with GLSL-ES &lt; 300</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=84677">Bug 84677</a> - Triangle disappears with glPolygonMode GL_LINE</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=85252">Bug 85252</a> - Segfault in compiler while processing ternary operator with void arguments</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89131">Bug 89131</a> - [Bisected] Graphical corruption in Weston,  shows old framebuffer pieces</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90000">Bug 90000</a> - [i965 Bisected NIR] Piglit/gglean_fragprog1-z-write_test fail</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90073">Bug 90073</a> - Leaks in xcb_dri3_open_reply_fds() and get_render_node_from_id_path_tag</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90249">Bug 90249</a> - Fails to build egl_dri2 on osx</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90310">Bug 90310</a> - Fails to build gallium_dri.so at linking stage with clang because of multiple redefinitions</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90347">Bug 90347</a> - [NVE0+] Failure to insert texbar under some circumstances (causing bad colors in Terasology)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90466">Bug 90466</a> - arm: linker error ndefined reference to `nir_metadata_preserve'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90520">Bug 90520</a> - Register spilling clobbers registers used elsewhere in the shader</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90537">Bug 90537</a> - radeonsi bo/va conflict on RADEON_GEM_VA (rscreen-&gt;ws-&gt;buffer_from_handle returns NULL)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90547">Bug 90547</a> - [BDW/BSW/SKL Bisected]Piglit/glean&#64;vertprog1-rsq_test_2_(reciprocal_square_root_of_negative_value) fais</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90580">Bug 90580</a> - [HSW bisected] integer multiplication bug</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90600">Bug 90600</a> - IOError: [Errno 2] No such file or directory: 'gl_API.xml'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90621">Bug 90621</a> - Mesa fail to build from git</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90629">Bug 90629</a> - [i965] SIMD16 dual_source_blend assertion `src[i].file != GRF || src[i].width == dst.width' failed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90691">Bug 90691</a> - [BSW]Piglit/spec/nv_conditional_render/dlist fails intermittently</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90728">Bug 90728</a> - dvd playback with vlc and vdpau causes segmentation fault</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90734">Bug 90734</a> - glBufferSubData is corrupting data when buffer is &gt; 32k</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90748">Bug 90748</a> - [BDW Bisected]dEQP-GLES3.functional.fbo.completeness.renderable.texture.depth.rg_half_float_oes fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90749">Bug 90749</a> - [BDW Bisected]dEQP-GLES3.functional.rasterization.fbo.rbo_multisample_max.primitives.lines_wide fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90751">Bug 90751</a> - [BDW Bisected]dEQP-GLES3.functional.fbo.completeness.renderable.texture.stencil.stencil_index8 fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90797">Bug 90797</a> - [ALL bisected] Mesa change cause performance case manhattan fail.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90817">Bug 90817</a> - swrast fails to load with certain remote X servers</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90830">Bug 90830</a> - [bsw bisected regression] GPU hang for spec.arb_gpu_shader5.execution.sampler_array_indexing.vs-nonzero-base</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90839">Bug 90839</a> - [10.5.5/10.6 regression, bisected] PBO glDrawPixels no longer using blit fastpath</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90873">Bug 90873</a> - Kernel hang, TearFree On, Mate desktop environment</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90887">Bug 90887</a> - PhiMovesPass in register allocator broken</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90895">Bug 90895</a> - [IVB/HSW/BDW/BSW Bisected] GLB2.7 Egypt, GfxBench3.0 T-Rex &amp; ALU and many SynMark cases performance reduced by 10-23%</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90902">Bug 90902</a> - [bsw][regression] dEQP: &quot;Found invalid pixel values&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90903">Bug 90903</a> - egl_dri2.c:dri2_load fails to load libglapi on osx</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90904">Bug 90904</a> - OSX: EXC_BAD_ACCESS when using translate_sse + gallium + softpipe/llvmpipe</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90905">Bug 90905</a> - mesa: Finish subdir-objects transition</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90925">Bug 90925</a> - &quot;high fidelity&quot;: Segfault in _mesa_program_resource_find_name</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91022">Bug 91022</a> - [g45 g965 bisected] assertions generated from textureGrad cube samplers fix</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91047">Bug 91047</a> - [SNB Bisected] Messed up Fog in Super Smash Bros. Melee in Dolphin</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91056">Bug 91056</a> - The Bard's Tale (2005, native)  has rendering issues</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91077">Bug 91077</a> - dri2_glx.c:1186: undefined reference to `loader_open_device'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91099">Bug 91099</a> - [llvmpipe] piglit glsl-max-varyings &gt;max_varying_components regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91101">Bug 91101</a> - [softpipe] piglit glsl-1.50&#64;execution&#64;geometry&#64;max-input-components regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91117">Bug 91117</a> - Nimbus (running in wine) has rendering issues, objects are semi-transparent</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91124">Bug 91124</a> - Civilization V (in Wine) has rendering issues: text missing, menu bar corrupted</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91173">Bug 91173</a> - Oddworld: Stranger's Wrath HD: disfigured models in wrong colors</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91193">Bug 91193</a> - [290x] Dota2 reborn ingame rendering breaks with git-af4b9c7</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91222">Bug 91222</a> - lp_test_format regression on CentOS 7</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91226">Bug 91226</a> - Crash in glLinkProgram (NEW)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91231">Bug 91231</a> - [NV92] Psychonauts (native) segfaults on start when DRI3 enabled</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91254">Bug 91254</a> - (regresion) video using VA-API on Intel slow and freeze system with mesa 10.6 or 10.6.1</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91290">Bug 91290</a> - SIGSEGV glcpp/glcpp-parse.y:1077</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91292">Bug 91292</a> - [BDW+] glVertexAttribDivisor not working in combination with glPolygonMode</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91337">Bug 91337</a> - OSMesaGetProcAdress(&quot;OSMesaPixelStore&quot;) returns nil</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91418">Bug 91418</a> - Visual Studio 2015 vsnprintf build error</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91425">Bug 91425</a> - [regression, bisected] Piglit spec/ext_packed_float/ getteximage-invalid-format-for-packed-type fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91441">Bug 91441</a> - make check DispatchSanity_test.GL30 regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91444">Bug 91444</a> - regression bisected radeonsi: don't change pipe_resource in resource_copy_region</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91461">Bug 91461</a> - gl_TessLevel* writes have no effect for all but the last TCS invocation</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91513">Bug 91513</a> - [IVB/HSW/BDW/SKL Bisected] Lightsmark performance reduced by 7%-10%</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91526">Bug 91526</a> - World of Warcraft (on Wine) has UI corruption with nouveau</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91544">Bug 91544</a> - [i965, regression, bisected] regression of several tests in 93977d3a151675946c03e</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91551">Bug 91551</a> - DXTn compressed normal maps produce severe artifacts on all NV5x and NVDx chipsets</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91570">Bug 91570</a> - Upgrading mesa to 10.6 causes segfault in OpenGL applications with GeForce4 MX 440 / AGP 8X</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91591">Bug 91591</a> - rounding.h:102:2: error: #error &quot;Unsupported or undefined LONG_BIT&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91610">Bug 91610</a> - [BSW] GPU hang for spec.shaders.point-vertex-id gl_instanceid divisor</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91673">Bug 91673</a> - Segfault when calling glTexSubImage2D on storage texture to bound FBO</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91726">Bug 91726</a> - R600 asserts in tgsi_cmp/make_src_for_op3</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91847">Bug 91847</a> - glGenerateTextureMipmap not working (no errors) unless glActiveTexture(GL_TEXTURE1) is called before</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91857">Bug 91857</a> - Mesa 10.6.3 linker is slow</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91881">Bug 91881</a> - regression: GPU lockups since mesa-11.0.0_rc1 on RV620 (r600) driver</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91890">Bug 91890</a> - [nve7] witcher2: blurry image &amp; DATA_ERRORs (class 0xa097 mthd 0x2380/0x238c)</li>
+
+</ul>
+

 <h2>Changes</h2>

-TBD.
+<li>Removed the EGL loader from the Linux SCons build.</li>

 </div>
 </body>
--- a/src/egl/SConscript
+++ b/src/egl/SConscript
@@ -15,7 +15,6 @@ env.Append(CPPPATH = [

 # parse Makefile.sources
 egl_sources = env.ParseSourceList('Makefile.sources', 'LIBEGL_C_FILES')
-egl_sources.append(env.ParseSourceList('Makefile.sources', 'dri2_backend_core_FILES'))

 env.Append(CPPDEFINES = [
    '_EGL_NATIVE_PLATFORM=_EGL_PLATFORM_HAIKU',
--- a/src/gallium/auxiliary/Makefile.am
+++ b/src/gallium/auxiliary/Makefile.am
@@ -38,18 +38,23 @@ libgallium_la_SOURCES += \

 endif

-indices/u_indices_gen.c: $(srcdir)/indices/u_indices_gen.py
-	$(AM_V_at)$(MKDIR_P) indices
-	$(AM_V_GEN) $(PYTHON2) $< > $@
+MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
+PYTHON_GEN =  $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)

-indices/u_unfilled_gen.c: $(srcdir)/indices/u_unfilled_gen.py
-	$(AM_V_at)$(MKDIR_P) indices
-	$(AM_V_GEN) $(PYTHON2) $< > $@
+indices/u_indices_gen.c: indices/u_indices_gen.py
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/indices/u_indices_gen.py > $@

-util/u_format_table.c: $(srcdir)/util/u_format_table.py $(srcdir)/util/u_format_pack.py $(srcdir)/util/u_format_parse.py $(srcdir)/util/u_format.csv
-	$(AM_V_at)$(MKDIR_P) util
-	$(AM_V_GEN) $(PYTHON2) $(srcdir)/util/u_format_table.py $(srcdir)/util/u_format.csv > $@
+indices/u_unfilled_gen.c: indices/u_unfilled_gen.py
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/indices/u_unfilled_gen.py > $@

+util/u_format_table.c: util/u_format_table.py \
+                       util/u_format_pack.py \
+                       util/u_format_parse.py \
+                       util/u_format.csv
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/util/u_format_table.py $(srcdir)/util/u_format.csv > $@

 noinst_LTLIBRARIES += libgalliumvl_stub.la
 libgalliumvl_stub_la_SOURCES = \
--- a/src/gallium/auxiliary/gallivm/lp_bld_const.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_const.c
@@ -311,7 +311,7 @@ lp_build_const_elem(struct gallivm_state *gallivm,
   else {
      double dscale = lp_const_scale(type);

-      elem = LLVMConstInt(elem_type, round(val*dscale), 0);
+      elem = LLVMConstInt(elem_type, (long long) round(val*dscale), 0);
   }

   return elem;
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
@@ -166,6 +166,11 @@ pb_cache_manager_create(struct pb_manager *provider,
                        unsigned bypass_usage,
                        uint64_t maximum_cache_size);

+/**
+ * Remove a buffer from the cache, but keep it alive.
+ */
+void
+pb_cache_manager_remove_buffer(struct pb_buffer *buf);

 struct pb_fence_ops;

--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
@@ -104,18 +104,42 @@ pb_cache_manager(struct pb_manager *mgr)
 }


+static void
+_pb_cache_manager_remove_buffer_locked(struct pb_cache_buffer *buf)
+{
+   struct pb_cache_manager *mgr = buf->mgr;
+
+   if (buf->head.next) {
+      LIST_DEL(&buf->head);
+      assert(mgr->numDelayed);
+      --mgr->numDelayed;
+      mgr->cache_size -= buf->base.size;
+   }
+   buf->mgr = NULL;
+}
+
+void
+pb_cache_manager_remove_buffer(struct pb_buffer *pb_buf)
+{
+   struct pb_cache_buffer *buf = (struct pb_cache_buffer*)pb_buf;
+   struct pb_cache_manager *mgr = buf->mgr;
+
+   if (!mgr)
+      return;
+
+   pipe_mutex_lock(mgr->mutex);
+   _pb_cache_manager_remove_buffer_locked(buf);
+   pipe_mutex_unlock(mgr->mutex);
+}
+
 /**
 * Actually destroy the buffer.
 */
 static inline void
 _pb_cache_buffer_destroy(struct pb_cache_buffer *buf)
 {
-   struct pb_cache_manager *mgr = buf->mgr;
-
-   LIST_DEL(&buf->head);
-   assert(mgr->numDelayed);
-   --mgr->numDelayed;
-   mgr->cache_size -= buf->base.size;
+   if (buf->mgr)
+      _pb_cache_manager_remove_buffer_locked(buf);
   assert(!pipe_is_referenced(&buf->base.reference));
   pb_reference(&buf->buffer, NULL);
   FREE(buf);
@@ -156,6 +180,12 @@ pb_cache_buffer_destroy(struct pb_buffer *_buf)
   struct pb_cache_buffer *buf = pb_cache_buffer(_buf);   
   struct pb_cache_manager *mgr = buf->mgr;

+   if (!mgr) {
+      pb_reference(&buf->buffer, NULL);
+      FREE(buf);
+      return;
+   }
+
   pipe_mutex_lock(mgr->mutex);
   assert(!pipe_is_referenced(&buf->base.reference));
   
--- a/src/gallium/auxiliary/util/u_debug.c
+++ b/src/gallium/auxiliary/util/u_debug.c
@@ -41,6 +41,7 @@
 #include "util/u_tile.h" 
 #include "util/u_prim.h"
 #include "util/u_surface.h"
+#include <inttypes.h>

 #include <stdio.h>
 #include <limits.h> /* CHAR_BIT */
@@ -275,7 +276,7 @@ debug_get_flags_option(const char *name,
      for (; flags->name; ++flags)
         namealign = MAX2(namealign, strlen(flags->name));
      for (flags = orig; flags->name; ++flags)
-         _debug_printf("| %*s [0x%0*lx]%s%s\n", namealign, flags->name,
+         _debug_printf("| %*s [0x%0*"PRIu64"]%s%s\n", namealign, flags->name,
                      (int)sizeof(uint64_t)*CHAR_BIT/4, flags->value,
                      flags->desc ? " " : "", flags->desc ? flags->desc : "");
   }
@@ -290,9 +291,9 @@ debug_get_flags_option(const char *name,

   if (debug_get_option_should_print()) {
      if (str) {
-         debug_printf("%s: %s = 0x%lx (%s)\n", __FUNCTION__, name, result, str);
+         debug_printf("%s: %s = 0x%"PRIu64" (%s)\n", __FUNCTION__, name, result, str);
      } else {
-         debug_printf("%s: %s = 0x%lx\n", __FUNCTION__, name, result);
+         debug_printf("%s: %s = 0x%"PRIu64"\n", __FUNCTION__, name, result);
      }
   }

--- a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
+++ b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
@@ -14,7 +14,7 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2015-05-20 20:03:14)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67120 bytes, from 2015-08-14 23:22:03)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63785 bytes, from 2015-08-14 18:27:06)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63915 bytes, from 2015-08-24 16:56:28)

 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
--- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
+++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
@@ -14,7 +14,7 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2015-05-20 20:03:14)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67120 bytes, from 2015-08-14 23:22:03)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63785 bytes, from 2015-08-14 18:27:06)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63915 bytes, from 2015-08-24 16:56:28)

 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -563,10 +563,29 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 		val |= COND(fp->writes_pos, A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE);
 		val |= COND(fp->frag_coord, A3XX_GRAS_CL_CLIP_CNTL_ZCOORD |
 				A3XX_GRAS_CL_CLIP_CNTL_WCOORD);
+		/* TODO only use if prog doesn't use clipvertex/clipdist */
+		val |= MIN2(util_bitcount(ctx->rasterizer->clip_plane_enable), 6) << 26;
 		OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
 		OUT_RING(ring, val);
 	}

+	if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_UCP)) {
+		uint32_t planes = ctx->rasterizer->clip_plane_enable;
+		int count = 0;
+
+		while (planes && count < 6) {
+			int i = ffs(planes) - 1;
+
+			planes &= ~(1U << i);
+			fd_wfi(ctx, ring);
+			OUT_PKT0(ring, REG_A3XX_GRAS_CL_USER_PLANE(count++), 4);
+			OUT_RING(ring, fui(ctx->ucp.ucp[i][0]));
+			OUT_RING(ring, fui(ctx->ucp.ucp[i][1]));
+			OUT_RING(ring, fui(ctx->ucp.ucp[i][2]));
+			OUT_RING(ring, fui(ctx->ucp.ucp[i][3]));
+		}
+	}
+
 	/* NOTE: since primitive_restart is not actually part of any
 	 * state object, we need to make sure that we always emit
 	 * PRIM_VTX_CNTL.. either that or be more clever and detect
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -14,7 +14,7 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2015-05-20 20:03:14)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67120 bytes, from 2015-08-14 23:22:03)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63785 bytes, from 2015-08-14 18:27:06)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63915 bytes, from 2015-08-24 16:56:28)

 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
@@ -162,10 +162,13 @@ enum a4xx_tex_fmt {
 	TFMT4_8_UNORM = 4,
 	TFMT4_8_8_UNORM = 14,
 	TFMT4_8_8_8_8_UNORM = 28,
+	TFMT4_8_SNORM = 5,
 	TFMT4_8_8_SNORM = 15,
 	TFMT4_8_8_8_8_SNORM = 29,
+	TFMT4_8_UINT = 6,
 	TFMT4_8_8_UINT = 16,
 	TFMT4_8_8_8_8_UINT = 30,
+	TFMT4_8_SINT = 7,
 	TFMT4_8_8_SINT = 17,
 	TFMT4_8_8_8_8_SINT = 31,
 	TFMT4_16_UINT = 21,
--- a/src/gallium/drivers/freedreno/a4xx/fd4_format.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
@@ -79,9 +79,9 @@ struct fd4_format {
 static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
 	/* 8-bit */
 	VT(R8_UNORM,   8_UNORM, R8_UNORM, WZYX),
-	V_(R8_SNORM,   8_SNORM, NONE,     WZYX),
-	V_(R8_UINT,    8_UINT,  NONE,     WZYX),
-	V_(R8_SINT,    8_SINT,  NONE,     WZYX),
+	VT(R8_SNORM,   8_SNORM, NONE,     WZYX),
+	VT(R8_UINT,    8_UINT,  NONE,     WZYX),
+	VT(R8_SINT,    8_SINT,  NONE,     WZYX),
 	V_(R8_USCALED, 8_UINT,  NONE,     WZYX),
 	V_(R8_SSCALED, 8_UINT,  NONE,     WZYX),

@@ -115,8 +115,8 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {

 	VT(R8G8_UNORM,   8_8_UNORM, R8G8_UNORM, WZYX),
 	VT(R8G8_SNORM,   8_8_SNORM, R8G8_SNORM, WZYX),
-	VT(R8G8_UINT,    8_8_UINT,  NONE,       WZYX),
-	VT(R8G8_SINT,    8_8_SINT,  NONE,       WZYX),
+	VT(R8G8_UINT,    8_8_UINT,  R8G8_UINT,  WZYX),
+	VT(R8G8_SINT,    8_8_SINT,  R8G8_SINT,  WZYX),
 	V_(R8G8_USCALED, 8_8_UINT,  NONE,       WZYX),
 	V_(R8G8_SSCALED, 8_8_SINT,  NONE,       WZYX),

--- a/src/gallium/drivers/freedreno/adreno_common.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_common.xml.h
@@ -14,7 +14,7 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2015-05-20 20:03:14)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67120 bytes, from 2015-08-14 23:22:03)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63785 bytes, from 2015-08-14 18:27:06)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63915 bytes, from 2015-08-24 16:56:28)

 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
--- a/src/gallium/drivers/freedreno/adreno_pm4.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
@@ -14,7 +14,7 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2015-05-20 20:03:14)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67120 bytes, from 2015-08-14 23:22:03)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63785 bytes, from 2015-08-14 18:27:06)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63915 bytes, from 2015-08-24 16:56:28)

 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -334,6 +334,7 @@ struct fd_context {
 		FD_DIRTY_INDEXBUF    = (1 << 16),
 		FD_DIRTY_SCISSOR     = (1 << 17),
 		FD_DIRTY_STREAMOUT   = (1 << 18),
+		FD_DIRTY_UCP         = (1 << 19),
 	} dirty;

 	struct pipe_blend_state *blend;
@@ -355,6 +356,7 @@ struct fd_context {
 	struct fd_constbuf_stateobj constbuf[PIPE_SHADER_TYPES];
 	struct pipe_index_buffer indexbuf;
 	struct fd_streamout_stateobj streamout;
+	struct pipe_clip_state ucp;

 	/* GMEM/tile handling fxns: */
 	void (*emit_tile_init)(struct fd_context *ctx);
--- a/src/gallium/drivers/freedreno/freedreno_state.c
+++ b/src/gallium/drivers/freedreno/freedreno_state.c
@@ -65,7 +65,9 @@ static void
 fd_set_clip_state(struct pipe_context *pctx,
 		const struct pipe_clip_state *clip)
 {
-	DBG("TODO: ");
+	struct fd_context *ctx = fd_context(pctx);
+	ctx->ucp = *clip;
+	ctx->dirty |= FD_DIRTY_UCP;
 }

 static void
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
@@ -884,7 +884,7 @@ CodeEmitterNV50::emitINTERP(const Instruction *i)
   defId(i->def(0), 2);
   srcAddr8(i->src(0), 16);

-   if (i->getInterpMode() == NV50_IR_INTERP_FLAT) {
+   if (i->encSize != 8 && i->getInterpMode() == NV50_IR_INTERP_FLAT) {
      code[0] |= 1 << 8;
   } else {
      if (i->op == OP_PINTERP) {
@@ -896,10 +896,11 @@ CodeEmitterNV50::emitINTERP(const Instruction *i)
   }

   if (i->encSize == 8) {
-      code[1] =
-         (code[0] & (3 << 24)) >> (24 - 16) |
-         (code[0] & (1 <<  8)) << (18 -  8);
-      code[0] &= ~0x03000100;
+      if (i->getInterpMode() == NV50_IR_INTERP_FLAT)
+         code[1] = 4 << 16;
+      else
+         code[1] = (code[0] & (3 << 24)) >> (24 - 16);
+      code[0] &= ~0x03000000;
      code[0] |= 1;
      emitFlagsRd(i);
   }
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
@@ -202,7 +202,10 @@ NV50LegalizePostRA::visit(Function *fn)
   Program *prog = fn->getProgram();

   r63 = new_LValue(fn, FILE_GPR);
-   r63->reg.data.id = 63;
+   if (prog->maxGPR < 63)
+      r63->reg.data.id = 63;
+   else
+      r63->reg.data.id = 127;

   // this is actually per-program, but we can do it all on visiting main()
   std::list<Instruction *> *outWrites =
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -2602,6 +2602,10 @@ NV50PostRaConstantFolding::visit(BasicBlock *bb)
             !isFloatType(i->dType))
            break;

+         if (i->getDef(0)->reg.data.id >= 64 ||
+             i->getSrc(0)->reg.data.id >= 64)
+            break;
+
         def = i->getSrc(1)->getInsn();
         if (def->op == OP_MOV && def->src(0).getFile() == FILE_IMMEDIATE) {
            vtmp = i->getSrc(1);
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
@@ -411,7 +411,7 @@ int ImmediateValue::print(char *buf, size_t size, DataType ty) const
   case TYPE_U64:
   case TYPE_S64:
   default:
-      PRINT("0x%016"PRIx64, reg.data.u64);
+      PRINT("0x%016" PRIx64, reg.data.u64);
      break;
   }
   return pos;
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -25,6 +25,7 @@

 #include <stack>
 #include <limits>
+#include <tr1/unordered_map>

 namespace nv50_ir {

@@ -222,6 +223,7 @@ private:
   private:
      virtual bool visit(BasicBlock *);
      inline bool needNewElseBlock(BasicBlock *b, BasicBlock *p);
+      inline void splitEdges(BasicBlock *b);
   };

   class ArgumentMovesPass : public Pass {
@@ -345,28 +347,55 @@ RegAlloc::PhiMovesPass::needNewElseBlock(BasicBlock *b, BasicBlock *p)
   return (n == 2);
 }

-// For each operand of each PHI in b, generate a new value by inserting a MOV
-// at the end of the block it is coming from and replace the operand with its
-// result. This eliminates liveness conflicts and enables us to let values be
-// copied to the right register if such a conflict exists nonetheless.
+struct PhiMapHash {
+   size_t operator()(const std::pair<Instruction *, BasicBlock *>& val) const {
+      return std::tr1::hash<Instruction*>()(val.first) * 31 +
+         std::tr1::hash<BasicBlock*>()(val.second);
+   }
+};
+
+typedef std::tr1::unordered_map<
+   std::pair<Instruction *, BasicBlock *>, Value *, PhiMapHash> PhiMap;
+
+// Critical edges need to be split up so that work can be inserted along
+// specific edge transitions. Unfortunately manipulating incident edges into a
+// BB invalidates all the PHI nodes since their sources are implicitly ordered
+// by incident edge order.
 //
-// These MOVs are also crucial in making sure the live intervals of phi srces
-// are extended until the end of the loop, since they are not included in the
-// live-in sets.
-bool
-RegAlloc::PhiMovesPass::visit(BasicBlock *bb)
+// TODO: Make it so that that is not the case, and PHI nodes store pointers to
+// the original BBs.
+void
+RegAlloc::PhiMovesPass::splitEdges(BasicBlock *bb)
 {
-   Instruction *phi, *mov;
   BasicBlock *pb, *pn;
-
+   Instruction *phi;
+   Graph::EdgeIterator ei;
   std::stack<BasicBlock *> stack;
+   int j = 0;

-   for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
+   for (ei = bb->cfg.incident(); !ei.end(); ei.next()) {
      pb = BasicBlock::get(ei.getNode());
      assert(pb);
      if (needNewElseBlock(bb, pb))
         stack.push(pb);
   }
+
+   // No critical edges were found, no need to perform any work.
+   if (stack.empty())
+      return;
+
+   // We're about to, potentially, reorder the inbound edges. This means that
+   // we need to hold on to the (phi, bb) -> src mapping, and fix up the phi
+   // nodes after the graph has been modified.
+   PhiMap phis;
+
+   j = 0;
+   for (ei = bb->cfg.incident(); !ei.end(); ei.next(), j++) {
+      pb = BasicBlock::get(ei.getNode());
+      for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next)
+         phis.insert(std::make_pair(std::make_pair(phi, pb), phi->getSrc(j)));
+   }
+
   while (!stack.empty()) {
      pb = stack.top();
      pn = new BasicBlock(func);
@@ -379,12 +408,47 @@ RegAlloc::PhiMovesPass::visit(BasicBlock *bb)
      assert(pb->getExit()->op != OP_CALL);
      if (pb->getExit()->asFlow()->target.bb == bb)
         pb->getExit()->asFlow()->target.bb = pn;
+
+      for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) {
+         PhiMap::iterator it = phis.find(std::make_pair(phi, pb));
+         assert(it != phis.end());
+         phis.insert(std::make_pair(std::make_pair(phi, pn), it->second));
+         phis.erase(it);
+      }
   }

+   // Now go through and fix up all of the phi node sources.
+   j = 0;
+   for (ei = bb->cfg.incident(); !ei.end(); ei.next(), j++) {
+      pb = BasicBlock::get(ei.getNode());
+      for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) {
+         PhiMap::const_iterator it = phis.find(std::make_pair(phi, pb));
+         assert(it != phis.end());
+
+         phi->setSrc(j, it->second);
+      }
+   }
+}
+
+// For each operand of each PHI in b, generate a new value by inserting a MOV
+// at the end of the block it is coming from and replace the operand with its
+// result. This eliminates liveness conflicts and enables us to let values be
+// copied to the right register if such a conflict exists nonetheless.
+//
+// These MOVs are also crucial in making sure the live intervals of phi srces
+// are extended until the end of the loop, since they are not included in the
+// live-in sets.
+bool
+RegAlloc::PhiMovesPass::visit(BasicBlock *bb)
+{
+   Instruction *phi, *mov;
+
+   splitEdges(bb);
+
   // insert MOVs (phi->src(j) should stem from j-th in-BB)
   int j = 0;
   for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
-      pb = BasicBlock::get(ei.getNode());
+      BasicBlock *pb = BasicBlock::get(ei.getNode());
      if (!pb->isTerminated())
         pb->insertTail(new_FlowInstruction(func, OP_BRA, bb));

--- a/src/gallium/drivers/nouveau/nouveau_buffer.c
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
@@ -206,8 +206,8 @@ nouveau_transfer_write(struct nouveau_context *nv, struct nouveau_transfer *tx,
      nv->copy_data(nv, buf->bo, buf->offset + base, buf->domain,
                    tx->bo, tx->offset + offset, NOUVEAU_BO_GART, size);
   else
-   if ((buf->base.bind & PIPE_BIND_CONSTANT_BUFFER) && nv->push_cb && can_cb)
-      nv->push_cb(nv, buf->bo, buf->domain, buf->offset, buf->base.width0,
+   if (nv->push_cb && can_cb)
+      nv->push_cb(nv, buf,
                  base, size / 4, (const uint32_t *)data);
   else
      nv->push_data(nv, buf->bo, buf->offset + base, buf->domain, size, data);
@@ -532,8 +532,13 @@ nouveau_buffer_transfer_unmap(struct pipe_context *pipe,
   struct nv04_resource *buf = nv04_resource(transfer->resource);

   if (tx->base.usage & PIPE_TRANSFER_WRITE) {
-      if (!(tx->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT) && tx->map)
-         nouveau_transfer_write(nv, tx, 0, tx->base.box.width);
+      if (!(tx->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) {
+         if (tx->map)
+            nouveau_transfer_write(nv, tx, 0, tx->base.box.width);
+
+         util_range_add(&buf->valid_buffer_range,
+                        tx->base.box.x, tx->base.box.x + tx->base.box.width);
+      }

      if (likely(buf->domain)) {
         const uint8_t bind = buf->base.bind;
@@ -541,9 +546,6 @@ nouveau_buffer_transfer_unmap(struct pipe_context *pipe,
         if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER))
            nv->vbo_dirty = true;
      }
-
-      util_range_add(&buf->valid_buffer_range,
-                     tx->base.box.x, tx->base.box.x + tx->base.box.width);
   }

   if (!tx->bo && (tx->base.usage & PIPE_TRANSFER_WRITE))
--- a/src/gallium/drivers/nouveau/nouveau_buffer.h
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.h
@@ -41,6 +41,8 @@ struct nv04_resource {
   uint8_t status;
   uint8_t domain;

+   uint16_t cb_bindings[6]; /* per-shader per-slot bindings */
+
   struct nouveau_fence *fence;
   struct nouveau_fence *fence_wr;

--- a/src/gallium/drivers/nouveau/nouveau_context.h
+++ b/src/gallium/drivers/nouveau/nouveau_context.h
@@ -6,6 +6,8 @@

 #define NOUVEAU_MAX_SCRATCH_BUFS 4

+struct nv04_resource;
+
 struct nouveau_context {
   struct pipe_context pipe;
   struct nouveau_screen *screen;
@@ -23,8 +25,7 @@ struct nouveau_context {
                     unsigned, const void *);
   /* base, size refer to the whole constant buffer */
   void (*push_cb)(struct nouveau_context *,
-                   struct nouveau_bo *, unsigned domain,
-                   unsigned base, unsigned size,
+                   struct nv04_resource *,
                   unsigned offset, unsigned words, const uint32_t *);

   /* @return: @ref reduced by nr of references found in context */
--- a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
@@ -28,6 +28,7 @@
 #include "util/u_surface.h"

 #include "nv_m2mf.xml.h"
+#include "nv_object.xml.h"
 #include "nv30/nv30_screen.h"
 #include "nv30/nv30_context.h"
 #include "nv30/nv30_resource.h"
@@ -144,21 +145,54 @@ nv30_resource_copy_region(struct pipe_context *pipe,
   nv30_transfer_rect(nv30, NEAREST, &src, &dst);
 }

-void
-nv30_resource_resolve(struct pipe_context *pipe,
-                      const struct pipe_resolve_info *info)
+static void
+nv30_resource_resolve(struct nv30_context *nv30,
+                      const struct pipe_blit_info *info)
 {
-#if 0
-   struct nv30_context *nv30 = nv30_context(pipe);
+   struct nv30_miptree *src_mt = nv30_miptree(info->src.resource);
   struct nv30_rect src, dst;
+   unsigned x, x0, x1, y, y1, w, h;

-   define_rect(info->src.res, 0, 0, info->src.x0, info->src.y0,
-               info->src.x1 - info->src.x0, info->src.y1 - info->src.y0, &src);
-   define_rect(info->dst.res, info->dst.level, 0, info->dst.x0, info->dst.y0,
-               info->dst.x1 - info->dst.x0, info->dst.y1 - info->dst.y0, &dst);
+   define_rect(info->src.resource, 0, info->src.box.z, info->src.box.x,
+      info->src.box.y, info->src.box.width, info->src.box.height, &src);
+   define_rect(info->dst.resource, 0, info->dst.box.z, info->dst.box.x,
+      info->dst.box.y, info->dst.box.width, info->dst.box.height, &dst);

-   nv30_transfer_rect(nv30, BILINEAR, &src, &dst);
-#endif
+   x0 = src.x0;
+   x1 = src.x1;
+   y1 = src.y1;
+
+   /* On nv3x we must use sifm which is restricted to 1024x1024 tiles */
+   for (y = src.y0; y < y1; y += h) {
+      h = y1 - y;
+      if (h > 1024)
+         h = 1024;
+
+      src.y0 = 0;
+      src.y1 = h;
+      src.h = h;
+
+      dst.y1 = dst.y0 + (h >> src_mt->ms_y);
+      dst.h = h >> src_mt->ms_y;
+
+      for (x = x0; x < x1; x += w) {
+         w = x1 - x;
+         if (w > 1024)
+            w = 1024;
+
+         src.offset = y * src.pitch + x * src.cpp;
+         src.x0 = 0;
+         src.x1 = w;
+         src.w = w;
+
+         dst.offset = (y >> src_mt->ms_y) * dst.pitch +
+                      (x >> src_mt->ms_x) * dst.cpp;
+         dst.x1 = dst.x0 + (w >> src_mt->ms_x);
+         dst.w = w >> src_mt->ms_x;
+
+         nv30_transfer_rect(nv30, BILINEAR, &src, &dst);
+      }
+   }
 }

 void
@@ -172,7 +206,7 @@ nv30_blit(struct pipe_context *pipe,
       info.dst.resource->nr_samples <= 1 &&
       !util_format_is_depth_or_stencil(info.src.resource->format) &&
       !util_format_is_pure_integer(info.src.resource->format)) {
-      debug_printf("nv30: color resolve unimplemented\n");
+      nv30_resource_resolve(nv30, blit_info);
      return;
   }

@@ -362,6 +396,7 @@ nv30_miptree_create(struct pipe_screen *pscreen,
   blocksz = util_format_get_blocksize(pt->format);

   if ((pt->target == PIPE_TEXTURE_RECT) ||
+       (pt->bind & PIPE_BIND_SCANOUT) ||
       !util_is_power_of_two(pt->width0) ||
       !util_is_power_of_two(pt->height0) ||
       !util_is_power_of_two(pt->depth0) ||
@@ -369,6 +404,14 @@ nv30_miptree_create(struct pipe_screen *pscreen,
       util_format_is_float(pt->format) || mt->ms_mode) {
      mt->uniform_pitch = util_format_get_nblocksx(pt->format, w) * blocksz;
      mt->uniform_pitch = align(mt->uniform_pitch, 64);
+      if (pt->bind & PIPE_BIND_SCANOUT) {
+         struct nv30_screen *screen = nv30_screen(pscreen);
+         int pitch_align = MAX2(
+               screen->eng3d->oclass >= NV40_3D_CLASS ? 1024 : 256,
+               /* round_down_pow2(mt->uniform_pitch / 4) */
+               1 << (util_last_bit(mt->uniform_pitch / 4) - 1));
+         mt->uniform_pitch = align(mt->uniform_pitch, pitch_align);
+      }
   }

   if (!mt->uniform_pitch)
--- a/src/gallium/drivers/nouveau/nv30/nv30_resource.h
+++ b/src/gallium/drivers/nouveau/nv30/nv30_resource.h
@@ -65,9 +65,6 @@ nv30_resource_copy_region(struct pipe_context *pipe,
                          struct pipe_resource *src, unsigned src_level,
                          const struct pipe_box *src_box);

-void
-nv30_resource_resolve(struct pipe_context *, const struct pipe_resolve_info *);
-
 void
 nv30_blit(struct pipe_context *pipe,
          const struct pipe_blit_info *blit_info);
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -319,8 +319,9 @@ nv30_screen_is_format_supported(struct pipe_screen *pscreen,
                                unsigned sample_count,
                                unsigned bindings)
 {
-   if (sample_count > 4)
+   if (sample_count > nv30_screen(pscreen)->max_sample_count)
      return false;
+
   if (!(0x00000017 & (1 << sample_count)))
      return false;

@@ -450,6 +451,23 @@ nv30_screen_create(struct nouveau_device *dev)
      return NULL;
   }

+   /*
+    * Some modern apps try to use msaa without keeping in mind the
+    * restrictions on videomem of older cards. Resulting in dmesg saying:
+    * [ 1197.850642] nouveau E[soffice.bin[3785]] fail ttm_validate
+    * [ 1197.850648] nouveau E[soffice.bin[3785]] validating bo list
+    * [ 1197.850654] nouveau E[soffice.bin[3785]] validate: -12
+    *
+    * Because we are running out of video memory, after which the program
+    * using the msaa visual freezes, and eventually the entire system freezes.
+    *
+    * To work around this we do not allow msaa visauls by default and allow
+    * the user to override this via NV30_MAX_MSAA.
+    */
+   screen->max_sample_count = debug_get_num_option("NV30_MAX_MSAA", 0);
+   if (screen->max_sample_count > 4)
+      screen->max_sample_count = 4;
+
   pscreen = &screen->base.base;
   pscreen->destroy = nv30_screen_destroy;
   pscreen->get_param = nv30_screen_get_param;
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.h
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.h
@@ -38,6 +38,8 @@ struct nv30_screen {
   /*XXX: nvfx state */
   struct nouveau_heap *vp_exec_heap;
   struct nouveau_heap *vp_data_heap;
+
+   unsigned max_sample_count;
 };

 static inline struct nv30_screen *
--- a/src/gallium/drivers/nouveau/nv30/nv30_transfer.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_transfer.c
@@ -371,7 +371,7 @@ nv30_transfer_rect_blit(XFER_ARGS)
 static bool
 nv30_transfer_sifm(XFER_ARGS)
 {
-   if (!src->pitch || (src->w | src->h) > 1024 || src->w < 2 || src->h < 2)
+   if (!src->pitch || src->w > 1024 || src->h > 1024 || src->w < 2 || src->h < 2)
      return false;

   if (src->d > 1 || dst->d > 1)
@@ -381,7 +381,7 @@ nv30_transfer_sifm(XFER_ARGS)
      return false;

   if (!dst->pitch) {
-      if ((dst->w | dst->h) > 2048 || dst->w < 2 || dst->h < 2)
+      if (dst->w > 2048 || dst->h > 2048 || dst->w < 2 || dst->h < 2)
         return false;
   } else {
      if (dst->domain != NOUVEAU_BO_VRAM)
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c
@@ -199,9 +199,13 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
         }
      }

-      if (nv50->idxbuf.buffer == res)
+      if (nv50->idxbuf.buffer == res) {
+         /* Just rebind to the bufctx as there is no separate dirty bit */
+         nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_INDEX);
+         BCTX_REFN(nv50->bufctx_3d, INDEX, nv04_resource(res), RD);
         if (!--ref)
            return ref;
+      }

      for (s = 0; s < 3; ++s) {
      assert(nv50->num_textures[s] <= PIPE_MAX_SAMPLERS);
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h
@@ -197,7 +197,7 @@ extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *);

 /* nv50_query.c */
 void nv50_init_query_functions(struct nv50_context *);
-void nv50_query_pushbuf_submit(struct nouveau_pushbuf *,
+void nv50_query_pushbuf_submit(struct nouveau_pushbuf *, uint16_t method,
                               struct pipe_query *, unsigned result_offset);
 void nv84_query_fifo_wait(struct nouveau_pushbuf *, struct pipe_query *);
 void nva0_so_target_save_offset(struct pipe_context *,
--- a/src/gallium/drivers/nouveau/nv50/nv50_formats.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_formats.c
@@ -203,8 +203,10 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
   F3B(B5G6R5_UNORM, B5G6R5_UNORM, C2, C1, C0, xx, UNORM, 5_6_5, TD),
   C4B(B5G5R5A1_UNORM, BGR5_A1_UNORM, C2, C1, C0, C3, UNORM, 5_5_5_1, TD),
   F3B(B5G5R5X1_UNORM, BGR5_X1_UNORM, C2, C1, C0, xx, UNORM, 5_5_5_1, TD),
+#if NOUVEAU_DRIVER != 0xc0
   C4B(B4G4R4A4_UNORM, NONE, C2, C1, C0, C3, UNORM, 4_4_4_4, T),
   F3B(B4G4R4X4_UNORM, NONE, C2, C1, C0, xx, UNORM, 4_4_4_4, T),
+#endif
   F3B(R9G9B9E5_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 9_9_9_E5, T),

   C4A(R10G10B10A2_UNORM, RGB10_A2_UNORM, C0, C1, C2, C3, UNORM, 10_10_10_2,
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -66,6 +66,7 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info)
      case TGSI_SEMANTIC_VERTEXID:
         prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID;
         prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START;
+         prog->vp.vertexid = 1;
         continue;
      default:
         break;
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h
@@ -76,6 +76,7 @@ struct nv50_program {
      ubyte psiz;        /* output slot of point size */
      ubyte bfc[2];      /* indices into varying for FFC (FP) or BFC (VP) */
      ubyte edgeflag;
+      ubyte vertexid;
      ubyte clpd[2];     /* output slot of clip distance[i]'s 1st component */
      ubyte clpd_nr;
   } vp;
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -266,6 +266,7 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq)
      nv50_query_get(push, q, 0, 0x1000f010);
      break;
   case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
+      q->sequence++;
      nv50_query_get(push, q, 0, 0x0d005002 | (q->index << 5));
      break;
   case PIPE_QUERY_TIMESTAMP_DISJOINT:
@@ -451,18 +452,18 @@ nv50_render_condition(struct pipe_context *pipe,
 }

 void
-nv50_query_pushbuf_submit(struct nouveau_pushbuf *push,
+nv50_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method,
                          struct pipe_query *pq, unsigned result_offset)
 {
   struct nv50_query *q = nv50_query(pq);

-   /* XXX: does this exist ? */
-#define NV50_IB_ENTRY_1_NO_PREFETCH (0 << (31 - 8))
+   nv50_query_update(q);
+   if (q->state != NV50_QUERY_STATE_READY)
+      nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, push->client);
+   q->state = NV50_QUERY_STATE_READY;

-   PUSH_REFN(push, q->bo, NOUVEAU_BO_RD | NOUVEAU_BO_GART);
-   nouveau_pushbuf_space(push, 0, 0, 1);
-   nouveau_pushbuf_data(push, q->bo, q->offset + result_offset, 4 |
-                        NV50_IB_ENTRY_1_NO_PREFETCH);
+   BEGIN_NV04(push, SUBC_3D(method), 1);
+   PUSH_DATA (push, q->data[result_offset / 4]);
 }

 void
--- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
@@ -641,12 +641,12 @@ nv50_stream_output_validate(struct nv50_context *nv50)
      PUSH_DATA (push, so->num_attribs[i]);
      if (n == 4) {
         PUSH_DATA(push, targ->pipe.buffer_size);
-
-         BEGIN_NV04(push, NVA0_3D(STRMOUT_OFFSET(i)), 1);
         if (!targ->clean) {
            assert(targ->pq);
-            nv50_query_pushbuf_submit(push, targ->pq, 0x4);
+            nv50_query_pushbuf_submit(push, NVA0_3D_STRMOUT_OFFSET(i),
+                                      targ->pq, 0x4);
         } else {
+            BEGIN_NV04(push, NVA0_3D(STRMOUT_OFFSET(i)), 1);
            PUSH_DATA(push, 0);
            targ->clean = false;
         }
@@ -655,6 +655,7 @@ nv50_stream_output_validate(struct nv50_context *nv50)
            (so->stride[i] * nv50->state.prim_size);
         prims = MIN2(prims, limit);
      }
+      targ->stride = so->stride[i];
      BCTX_REFN(nv50->bufctx_3d, SO, buf, WR);
   }
   if (prims != ~0) {
--- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
@@ -117,7 +117,6 @@ nv50_blend_state_create(struct pipe_context *pipe,
   struct nv50_blend_stateobj *so = CALLOC_STRUCT(nv50_blend_stateobj);
   int i;
   bool emit_common_func = cso->rt[0].blend_enable;
-   uint32_t ms;

   if (nv50_context(pipe)->screen->tesla->oclass >= NVA3_3D_CLASS) {
      SB_BEGIN_3D(so, BLEND_INDEPENDENT, 1);
@@ -189,15 +188,6 @@ nv50_blend_state_create(struct pipe_context *pipe,
      SB_DATA    (so, nv50_colormask(cso->rt[0].colormask));
   }

-   ms = 0;
-   if (cso->alpha_to_coverage)
-      ms |= NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE;
-   if (cso->alpha_to_one)
-      ms |= NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE;
-
-   SB_BEGIN_3D(so, MULTISAMPLE_CTRL, 1);
-   SB_DATA    (so, ms);
-
   assert(so->size <= (sizeof(so->state) / sizeof(so->state[0])));
   return so;
 }
--- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
@@ -1,4 +1,6 @@

+#include "util/u_format.h"
+
 #include "nv50/nv50_context.h"
 #include "nv50/nv50_defs.xml.h"

@@ -313,6 +315,25 @@ nv50_validate_derived_2(struct nv50_context *nv50)
   }
 }

+static void
+nv50_validate_derived_3(struct nv50_context *nv50)
+{
+   struct nouveau_pushbuf *push = nv50->base.pushbuf;
+   struct pipe_framebuffer_state *fb = &nv50->framebuffer;
+   uint32_t ms = 0;
+
+   if ((!fb->nr_cbufs || !fb->cbufs[0] ||
+        !util_format_is_pure_integer(fb->cbufs[0]->format)) && nv50->blend) {
+      if (nv50->blend->pipe.alpha_to_coverage)
+         ms |= NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE;
+      if (nv50->blend->pipe.alpha_to_one)
+         ms |= NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE;
+   }
+
+   BEGIN_NV04(push, NV50_3D(MULTISAMPLE_CTRL), 1);
+   PUSH_DATA (push, ms);
+}
+
 static void
 nv50_validate_clip(struct nv50_context *nv50)
 {
@@ -474,6 +495,7 @@ static struct state_validate {
    { nv50_validate_derived_rs,    NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER |
                                   NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
    { nv50_validate_derived_2,     NV50_NEW_ZSA | NV50_NEW_FRAMEBUFFER },
+    { nv50_validate_derived_3,     NV50_NEW_BLEND | NV50_NEW_FRAMEBUFFER },
    { nv50_validate_clip,          NV50_NEW_CLIP | NV50_NEW_RASTERIZER |
                                   NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
    { nv50_constbufs_validate,     NV50_NEW_CONSTBUF },
@@ -481,7 +503,8 @@ static struct state_validate {
    { nv50_validate_samplers,      NV50_NEW_SAMPLERS },
    { nv50_stream_output_validate, NV50_NEW_STRMOUT |
                                   NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
-    { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS },
+    { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS |
+                                   NV50_NEW_VERTPROG },
    { nv50_validate_min_samples,   NV50_NEW_MIN_SAMPLES },
 };
 #define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
--- a/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h
@@ -19,7 +19,7 @@
 struct nv50_blend_stateobj {
   struct pipe_blend_state pipe;
   int size;
-   uint32_t state[84]; // TODO: allocate less if !independent_blend_enable
+   uint32_t state[82]; // TODO: allocate less if !independent_blend_enable
 };

 struct nv50_rasterizer_stateobj {
--- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -68,6 +68,10 @@ nv50_2d_format(enum pipe_format format, bool dst, bool dst_src_equal)
      return NV50_SURFACE_FORMAT_R16_UNORM;
   case 4:
      return NV50_SURFACE_FORMAT_BGRA8_UNORM;
+   case 8:
+      return NV50_SURFACE_FORMAT_RGBA16_FLOAT;
+   case 16:
+      return NV50_SURFACE_FORMAT_RGBA32_FLOAT;
   default:
      return 0;
   }
@@ -1003,6 +1007,8 @@ nv50_blitctx_prepare_state(struct nv50_blitctx *blit)
   /* zsa state */
   BEGIN_NV04(push, NV50_3D(DEPTH_TEST_ENABLE), 1);
   PUSH_DATA (push, 0);
+   BEGIN_NV04(push, NV50_3D(DEPTH_BOUNDS_EN), 1);
+   PUSH_DATA (push, 0);
   BEGIN_NV04(push, NV50_3D(STENCIL_ENABLE), 1);
   PUSH_DATA (push, 0);
   BEGIN_NV04(push, NV50_3D(ALPHA_TEST_ENABLE), 1);
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -293,7 +293,8 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
   uint64_t addrs[PIPE_MAX_ATTRIBS];
   uint32_t limits[PIPE_MAX_ATTRIBS];
   struct nouveau_pushbuf *push = nv50->base.pushbuf;
-   struct nv50_vertex_stateobj *vertex = nv50->vertex;
+   struct nv50_vertex_stateobj dummy = {};
+   struct nv50_vertex_stateobj *vertex = nv50->vertex ? nv50->vertex : &dummy;
   struct pipe_vertex_buffer *vb;
   struct nv50_vertex_element *ve;
   uint32_t mask;
@@ -301,6 +302,14 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
   unsigned i;
   const unsigned n = MAX2(vertex->num_elements, nv50->state.num_vtxelts);

+   /* A vertexid is not generated for inline data uploads. Have to use a
+    * VBO. This check must come after the vertprog has been validated,
+    * otherwise vertexid may be unset.
+    */
+   assert(nv50->vertprog->translated);
+   if (nv50->vertprog->vp.vertexid)
+      nv50->vbo_push_hint = 0;
+
   if (unlikely(vertex->need_conversion))
      nv50->vbo_fifo = ~0;
   else
@@ -317,7 +326,6 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
         if (buf && buf->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
            buf->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
            nv50->base.vbo_dirty = true;
-            break;
         }
      }
   }
@@ -736,9 +744,8 @@ nva0_draw_stream_output(struct nv50_context *nv50,
      BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BASE), 1);
      PUSH_DATA (push, 0);
      BEGIN_NV04(push, NVA0_3D(DRAW_TFB_STRIDE), 1);
-      PUSH_DATA (push, 0);
-      BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BYTES), 1);
-      nv50_query_pushbuf_submit(push, so->pq, 0x4);
+      PUSH_DATA (push, so->stride);
+      nv50_query_pushbuf_submit(push, NVA0_3D_DRAW_TFB_BYTES, so->pq, 0x4);
      BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1);
      PUSH_DATA (push, 0);

@@ -838,10 +845,6 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
         nv50->base.vbo_dirty = true;
   }

-   if (!nv50->base.vbo_dirty && nv50->idxbuf.buffer &&
-       nv50->idxbuf.buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
-      nv50->base.vbo_dirty = true;
-
   if (nv50->base.vbo_dirty) {
      BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FLUSH), 1);
      PUSH_DATA (push, 0);
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -299,10 +299,10 @@ nve4_p2mf_push_linear(struct nouveau_context *nv,
                      struct nouveau_bo *dst, unsigned offset, unsigned domain,
                      unsigned size, const void *data);
 void
-nvc0_cb_push(struct nouveau_context *,
-             struct nouveau_bo *bo, unsigned domain,
-             unsigned base, unsigned size,
-             unsigned offset, unsigned words, const uint32_t *data);
+nvc0_cb_bo_push(struct nouveau_context *,
+                struct nouveau_bo *bo, unsigned domain,
+                unsigned base, unsigned size,
+                unsigned offset, unsigned words, const uint32_t *data);

 /* nvc0_vbo.c */
 void nvc0_draw_vbo(struct pipe_context *, const struct pipe_draw_info *);
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -449,7 +449,7 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info)

   for (i = 0; i < info->numOutputs; ++i) {
      if (info->out[i].sn == TGSI_SEMANTIC_COLOR)
-         fp->hdr[18] |= info->out[i].mask << info->out[i].slot[0];
+         fp->hdr[18] |= 0xf << info->out[i].slot[0];
   }

   fp->fp.early_z = info->prop.fp.earlyFragTests;
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
@@ -831,6 +831,8 @@ nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
   }
   nvc0->constbuf_dirty[s] |= 1 << i;

+   if (nvc0->constbuf[s][i].u.buf)
+      nv04_resource(nvc0->constbuf[s][i].u.buf)->cb_bindings[s] &= ~(1 << i);
   pipe_resource_reference(&nvc0->constbuf[s][i].u.buf, res);

   nvc0->constbuf[s][i].user = (cb && cb->user_buffer) ? true : false;
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -440,7 +440,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
               BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1);
               PUSH_DATA (push, (0 << 4) | 1);
            }
-            nvc0_cb_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),
+            nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),
                         base, nvc0->state.uniform_buffer_bound[s],
                         0, (size + 3) / 4,
                         nvc0->constbuf[s][0].u.data);
@@ -458,6 +458,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
               BCTX_REFN(nvc0->bufctx_3d, CB(s, i), res, RD);

               nvc0->cb_dirty = 1; /* Force cache flush for UBO. */
+               res->cb_bindings[s] |= 1 << i;
            } else {
               BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1);
               PUSH_DATA (push, (i << 4) | 0);
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -887,6 +887,7 @@ nvc0_blitctx_prepare_state(struct nvc0_blitctx *blit)

   /* zsa state */
   IMMED_NVC0(push, NVC0_3D(DEPTH_TEST_ENABLE), 0);
+   IMMED_NVC0(push, NVC0_3D(DEPTH_BOUNDS_EN), 0);
   IMMED_NVC0(push, NVC0_3D(STENCIL_ENABLE), 0);
   IMMED_NVC0(push, NVC0_3D(ALPHA_TEST_ENABLE), 0);

--- a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
@@ -506,11 +506,48 @@ nvc0_miptree_transfer_unmap(struct pipe_context *pctx,
 }

 /* This happens rather often with DTD9/st. */
-void
+static void
 nvc0_cb_push(struct nouveau_context *nv,
-             struct nouveau_bo *bo, unsigned domain,
-             unsigned base, unsigned size,
+             struct nv04_resource *res,
             unsigned offset, unsigned words, const uint32_t *data)
+{
+   struct nvc0_context *nvc0 = nvc0_context(&nv->pipe);
+   struct nvc0_constbuf *cb = NULL;
+   int s;
+
+   /* Go through all the constbuf binding points of this buffer and try to
+    * find one which contains the region to be updated.
+    */
+   for (s = 0; s < 6 && !cb; s++) {
+      uint16_t bindings = res->cb_bindings[s];
+      while (bindings) {
+         int i = ffs(bindings) - 1;
+         uint32_t cb_offset = nvc0->constbuf[s][i].offset;
+
+         bindings &= ~(1 << i);
+         if (cb_offset <= offset &&
+             cb_offset + nvc0->constbuf[s][i].size >= offset + words * 4) {
+            cb = &nvc0->constbuf[s][i];
+            break;
+         }
+      }
+   }
+
+   if (cb) {
+      nvc0_cb_bo_push(nv, res->bo, res->domain,
+                      res->offset + cb->offset, cb->size,
+                      offset - cb->offset, words, data);
+   } else {
+      nv->push_data(nv, res->bo, res->offset + offset, res->domain,
+                    words * 4, data);
+   }
+}
+
+void
+nvc0_cb_bo_push(struct nouveau_context *nv,
+                struct nouveau_bo *bo, unsigned domain,
+                unsigned base, unsigned size,
+                unsigned offset, unsigned words, const uint32_t *data)
 {
   struct nouveau_pushbuf *push = nv->pushbuf;

@@ -520,6 +557,9 @@ nvc0_cb_push(struct nouveau_context *nv,
   assert(!(offset & 3));
   size = align(size, 0x100);

+   assert(offset < size);
+   assert(offset + words * 4 <= size);
+
   BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
   PUSH_DATA (push, size);
   PUSH_DATAh(push, bo->offset + base);
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1853,7 +1853,7 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx,
 		radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags);
 		radeon_emit(cs, (resource_offset + buffer_index) * 8);
 		radeon_emit(cs, va); /* RESOURCEi_WORD0 */
-		radeon_emit(cs, rbuffer->buf->size - vb->buffer_offset - 1); /* RESOURCEi_WORD1 */
+		radeon_emit(cs, rbuffer->b.b.width0 - vb->buffer_offset - 1); /* RESOURCEi_WORD1 */
 		radeon_emit(cs, /* RESOURCEi_WORD2 */
 				 S_030008_ENDIAN_SWAP(r600_endian_swap(32)) |
 				 S_030008_STRIDE(vb->stride) |
@@ -1923,7 +1923,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
 		radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags);
 		radeon_emit(cs, (buffer_id_base + buffer_index) * 8);
 		radeon_emit(cs, va); /* RESOURCEi_WORD0 */
-		radeon_emit(cs, rbuffer->buf->size - cb->buffer_offset - 1); /* RESOURCEi_WORD1 */
+		radeon_emit(cs, rbuffer->b.b.width0 - cb->buffer_offset - 1); /* RESOURCEi_WORD1 */
 		radeon_emit(cs, /* RESOURCEi_WORD2 */
 			    S_030008_ENDIAN_SWAP(gs_ring_buffer ? ENDIAN_NONE : r600_endian_swap(32)) |
 			    S_030008_STRIDE(gs_ring_buffer ? 4 : 16) |
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -2029,6 +2029,8 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
 					fprintf(stderr, "CND:%X ", cf->cond);
 				if (cf->pop_count)
 					fprintf(stderr, "POP:%X ", cf->pop_count);
+				if (cf->end_of_program)
+					fprintf(stderr, "EOP ");
 				fprintf(stderr, "\n");
 			}
 		}
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -936,28 +936,5 @@ static inline bool r600_can_read_depth(struct r600_texture *rtex)
 #define     V_028A6C_OUTPRIM_TYPE_LINESTRIP            1
 #define     V_028A6C_OUTPRIM_TYPE_TRISTRIP             2

-static inline unsigned r600_conv_prim_to_gs_out(unsigned mode)
-{
-	static const int prim_conv[] = {
-		V_028A6C_OUTPRIM_TYPE_POINTLIST,
-		V_028A6C_OUTPRIM_TYPE_LINESTRIP,
-		V_028A6C_OUTPRIM_TYPE_LINESTRIP,
-		V_028A6C_OUTPRIM_TYPE_LINESTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_LINESTRIP,
-		V_028A6C_OUTPRIM_TYPE_LINESTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP
-	};
-	assert(mode < Elements(prim_conv));
-
-	return prim_conv[mode];
-}
-
+unsigned r600_conv_prim_to_gs_out(unsigned mode);
 #endif
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -141,7 +141,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
 	bool dump = r600_can_dump_shader(&rctx->screen->b, sel->tokens);
 	unsigned use_sb = !(rctx->screen->b.debug_flags & DBG_NO_SB);
 	unsigned sb_disasm = use_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM);
-	unsigned export_shader = key.vs.as_es;
+	unsigned export_shader;

 	shader->shader.bc.isa = rctx->isa;

@@ -220,6 +220,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
 		}
 		break;
 	case TGSI_PROCESSOR_VERTEX:
+		export_shader = key.vs.as_es;
 		if (rctx->b.chip_class >= EVERGREEN) {
 			if (export_shader)
 				evergreen_update_es_state(ctx, shader);
@@ -1830,8 +1831,6 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 	ctx.shader = shader;
 	ctx.native_integers = true;

-	shader->vs_as_gs_a = key.vs.as_gs_a;
-	shader->vs_as_es = key.vs.as_es;

 	r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family,
 			   rscreen->has_compressed_msaa_texturing);
@@ -1844,9 +1843,14 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 	shader->processor_type = ctx.type;
 	ctx.bc->type = shader->processor_type;

-	ring_outputs = key.vs.as_es || (ctx.type == TGSI_PROCESSOR_GEOMETRY);
+	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
+		shader->vs_as_gs_a = key.vs.as_gs_a;
+		shader->vs_as_es = key.vs.as_es;
+	}

-	if (key.vs.as_es) {
+	ring_outputs = shader->vs_as_es || ctx.type == TGSI_PROCESSOR_GEOMETRY;
+
+	if (shader->vs_as_es) {
 		ctx.gs_for_vs = &rctx->gs_shader->current->shader;
 	} else {
 		ctx.gs_for_vs = NULL;
@@ -1866,7 +1870,8 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 	shader->nr_ps_color_exports = 0;
 	shader->nr_ps_max_color_exports = 0;

-	shader->two_side = key.ps.color_two_side;
+	if (ctx.type == TGSI_PROCESSOR_FRAGMENT)
+		shader->two_side = key.ps.color_two_side;

 	/* register allocations */
 	/* Values [0,127] correspond to GPR[0..127].
@@ -2270,7 +2275,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 	convert_edgeflag_to_int(&ctx);

 	if (ring_outputs) {
-		if (key.vs.as_es)
+		if (shader->vs_as_es)
 			emit_gs_ring_writes(&ctx, FALSE);
 	} else {
 		/* Export output */
@@ -6151,10 +6156,10 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx)
 		r = tgsi_make_src_for_op3(ctx, temp_regs[0], i, &alu.src[0], &ctx->src[0]);
 		if (r)
 			return r;
-		r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[1], &ctx->src[2]);
+		r = tgsi_make_src_for_op3(ctx, temp_regs[2], i, &alu.src[1], &ctx->src[2]);
 		if (r)
 			return r;
-		r = tgsi_make_src_for_op3(ctx, temp_regs[2], i, &alu.src[2], &ctx->src[1]);
+		r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[2], &ctx->src[1]);
 		if (r)
 			return r;
 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -1007,7 +1007,7 @@ static void r600_init_color_surface(struct r600_context *rctx,

 		/* CMASK. */
 		if (!rctx->dummy_cmask ||
-		    rctx->dummy_cmask->buf->size < cmask.size ||
+		    rctx->dummy_cmask->b.b.width0 < cmask.size ||
 		    rctx->dummy_cmask->buf->alignment % cmask.alignment != 0) {
 			struct pipe_transfer *transfer;
 			void *ptr;
@@ -1025,7 +1025,7 @@ static void r600_init_color_surface(struct r600_context *rctx,

 		/* FMASK. */
 		if (!rctx->dummy_fmask ||
-		    rctx->dummy_fmask->buf->size < fmask.size ||
+		    rctx->dummy_fmask->b.b.width0 < fmask.size ||
 		    rctx->dummy_fmask->buf->alignment % fmask.alignment != 0) {
 			pipe_resource_reference((struct pipe_resource**)&rctx->dummy_fmask, NULL);
 			rctx->dummy_fmask = r600_buffer_create_helper(rscreen, fmask.size, fmask.alignment);
@@ -1694,7 +1694,7 @@ static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom
 		radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 7, 0));
 		radeon_emit(cs, (320 + buffer_index) * 7);
 		radeon_emit(cs, offset); /* RESOURCEi_WORD0 */
-		radeon_emit(cs, rbuffer->buf->size - offset - 1); /* RESOURCEi_WORD1 */
+		radeon_emit(cs, rbuffer->b.b.width0 - offset - 1); /* RESOURCEi_WORD1 */
 		radeon_emit(cs, /* RESOURCEi_WORD2 */
 				 S_038008_ENDIAN_SWAP(r600_endian_swap(32)) |
 				 S_038008_STRIDE(vb->stride));
@@ -1743,7 +1743,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx,
 		radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 7, 0));
 		radeon_emit(cs, (buffer_id_base + buffer_index) * 7);
 		radeon_emit(cs, offset); /* RESOURCEi_WORD0 */
-		radeon_emit(cs, rbuffer->buf->size - offset - 1); /* RESOURCEi_WORD1 */
+		radeon_emit(cs, rbuffer->b.b.width0 - offset - 1); /* RESOURCEi_WORD1 */
 		radeon_emit(cs, /* RESOURCEi_WORD2 */
 			    S_038008_ENDIAN_SWAP(gs_ring_buffer ? ENDIAN_NONE : r600_endian_swap(32)) |
 			    S_038008_STRIDE(gs_ring_buffer ? 4 : 16));
@@ -2051,7 +2051,7 @@ bool r600_adjust_gprs(struct r600_context *rctx)
 			/* always privilege vs stage so that at worst we have the
 			 * pixel stage producing wrong output (not the vertex
 			 * stage) */
-			new_num_ps_gprs = max_gprs - ((new_num_vs_gprs - new_num_es_gprs - new_num_gs_gprs) + def_num_clause_temp_gprs * 2);
+			new_num_ps_gprs = max_gprs - ((new_num_vs_gprs + new_num_es_gprs + new_num_gs_gprs) + def_num_clause_temp_gprs * 2);
 			new_num_vs_gprs = num_vs_gprs;
 			new_num_gs_gprs = num_gs_gprs;
 			new_num_es_gprs = num_es_gprs;
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -123,6 +123,31 @@ static unsigned r600_conv_pipe_prim(unsigned prim)
 	return prim_conv[prim];
 }

+unsigned r600_conv_prim_to_gs_out(unsigned mode)
+{
+	static const int prim_conv[] = {
+		[PIPE_PRIM_POINTS]			= V_028A6C_OUTPRIM_TYPE_POINTLIST,
+		[PIPE_PRIM_LINES]			= V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+		[PIPE_PRIM_LINE_LOOP]			= V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+		[PIPE_PRIM_LINE_STRIP]			= V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+		[PIPE_PRIM_TRIANGLES]			= V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+		[PIPE_PRIM_TRIANGLE_STRIP]		= V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+		[PIPE_PRIM_TRIANGLE_FAN]		= V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+		[PIPE_PRIM_QUADS]			= V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+		[PIPE_PRIM_QUAD_STRIP]			= V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+		[PIPE_PRIM_POLYGON]			= V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+		[PIPE_PRIM_LINES_ADJACENCY]		= V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+		[PIPE_PRIM_LINE_STRIP_ADJACENCY]	= V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+		[PIPE_PRIM_TRIANGLES_ADJACENCY]		= V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+		[PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY]	= V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+		[PIPE_PRIM_PATCHES]			= V_028A6C_OUTPRIM_TYPE_POINTLIST,
+		[R600_PRIM_RECTANGLE_LIST]		= V_028A6C_OUTPRIM_TYPE_TRISTRIP
+	};
+	assert(mode < Elements(prim_conv));
+
+	return prim_conv[mode];
+}
+
 /* common state between evergreen and r600 */

 static void r600_bind_blend_state_internal(struct r600_context *rctx,
--- a/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp
@@ -32,6 +32,7 @@ int bc_decoder::decode_cf(unsigned &i, bc_cf& bc) {
 	int r = 0;
 	uint32_t dw0 = dw[i];
 	uint32_t dw1 = dw[i+1];
+	assert(i+1 <= ndw);

 	if ((dw1 >> 29) & 1) { // CF_ALU
 		return decode_cf_alu(i, bc);
--- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
@@ -199,6 +199,9 @@ void bc_finalizer::finalize_if(region_node* r) {
 		cf_node *if_jump = sh.create_cf(CF_OP_JUMP);
 		cf_node *if_pop = sh.create_cf(CF_OP_POP);

+		if (!last_cf || last_cf->get_parent_region() == r) {
+			last_cf = if_pop;
+		}
 		if_pop->bc.pop_count = 1;
 		if_pop->jump_after(if_pop);

--- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
@@ -95,7 +95,7 @@ int bc_parser::decode_shader() {
 		if ((r = decode_cf(i, eop)))
 			return r;

-	} while (!eop || (i >> 1) <= max_cf);
+	} while (!eop || (i >> 1) < max_cf);

 	return 0;
 }
@@ -769,6 +769,7 @@ int bc_parser::prepare_ir() {
 }

 int bc_parser::prepare_loop(cf_node* c) {
+	assert(c->bc.addr-1 < cf_map.size());

 	cf_node *end = cf_map[c->bc.addr - 1];
 	assert(end->bc.op == CF_OP_LOOP_END);
@@ -788,8 +789,12 @@ int bc_parser::prepare_loop(cf_node* c) {
 }

 int bc_parser::prepare_if(cf_node* c) {
+	assert(c->bc.addr-1 < cf_map.size());
 	cf_node *c_else = NULL, *end = cf_map[c->bc.addr];

+	if (!end)
+		return 0; // not quite sure how this happens, malformed input?
+
 	BCP_DUMP(
 		sblog << "parsing JUMP @" << c->bc.id;
 		sblog << "\n";
@@ -815,7 +820,7 @@ int bc_parser::prepare_if(cf_node* c) {
 	if (c_else->parent != c->parent)
 		c_else = NULL;

-	if (end->parent != c->parent)
+	if (end && end->parent != c->parent)
 		end = NULL;

 	region_node *reg = sh->create_region();
--- a/src/gallium/drivers/r600/sb/sb_sched.cpp
+++ b/src/gallium/drivers/r600/sb/sb_sched.cpp
@@ -236,7 +236,7 @@ void rp_gpr_tracker::unreserve(alu_node* n) {

 	for (i = 0; i < nsrc; ++i) {
 		value *v = n->src[i];
-		if (v->is_readonly())
+		if (v->is_readonly() || v->is_undef())
 			continue;
 		if (i == 1 && opt)
 			continue;
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -197,7 +197,7 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (va >> 32UL) & 0xFF);
+		radeon_emit(cs, (va >> 32) & 0xFFFF);
 		break;
 	case PIPE_QUERY_PRIMITIVES_EMITTED:
 	case PIPE_QUERY_PRIMITIVES_GENERATED:
@@ -206,13 +206,13 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
 		radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | EVENT_INDEX(3));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (va >> 32UL) & 0xFF);
+		radeon_emit(cs, (va >> 32) & 0xFFFF);
 		break;
 	case PIPE_QUERY_TIME_ELAPSED:
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (3 << 29) | ((va >> 32UL) & 0xFF));
+		radeon_emit(cs, (3 << 29) | ((va >> 32) & 0xFFFF));
 		radeon_emit(cs, 0);
 		radeon_emit(cs, 0);
 		break;
@@ -220,7 +220,7 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (va >> 32UL) & 0xFF);
+		radeon_emit(cs, (va >> 32) & 0xFFFF);
 		break;
 	default:
 		assert(0);
@@ -254,7 +254,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (va >> 32UL) & 0xFF);
+		radeon_emit(cs, (va >> 32) & 0xFFFF);
 		break;
 	case PIPE_QUERY_PRIMITIVES_EMITTED:
 	case PIPE_QUERY_PRIMITIVES_GENERATED:
@@ -264,7 +264,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
 		radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | EVENT_INDEX(3));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (va >> 32UL) & 0xFF);
+		radeon_emit(cs, (va >> 32) & 0xFFFF);
 		break;
 	case PIPE_QUERY_TIME_ELAPSED:
 		va += query->buffer.results_end + query->result_size/2;
@@ -273,7 +273,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (3 << 29) | ((va >> 32UL) & 0xFF));
+		radeon_emit(cs, (3 << 29) | ((va >> 32) & 0xFFFF));
 		radeon_emit(cs, 0);
 		radeon_emit(cs, 0);
 		break;
@@ -282,7 +282,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (va >> 32UL) & 0xFF);
+		radeon_emit(cs, (va >> 32) & 0xFFFF);
 		break;
 	default:
 		assert(0);
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -33,14 +33,6 @@ void si_need_cs_space(struct si_context *ctx, unsigned num_dw,
 	struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
 	int i;

-	/* If the CS is sufficiently large, don't count the space needed
-	 * and just flush if there is less than 8096 dwords left. */
-	if (cs->max_dw >= 24 * 1024) {
-		if (cs->cdw > cs->max_dw - 8 * 1024)
-			ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
-		return;
-	}
-
 	/* There are two memory usage counters in the winsys for all buffers
 	 * that have been added (cs_add_reloc) and two counters in the pipe
 	 * driver for those that haven't been added yet.
@@ -54,6 +46,15 @@ void si_need_cs_space(struct si_context *ctx, unsigned num_dw,
 	ctx->b.gtt = 0;
 	ctx->b.vram = 0;

+	/* If the CS is sufficiently large, don't count the space needed
+	 * and just flush if there is less than 8096 dwords left.
+	 */
+	if (cs->max_dw >= 24 * 1024) {
+		if (cs->cdw > cs->max_dw - 8 * 1024)
+			ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+		return;
+	}
+
 	/* The number of dwords we already used in the CS so far. */
 	num_dw += cs->cdw;

--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -195,9 +195,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void *
 	r600_target = radeon_llvm_get_r600_target(triple);
 	sctx->tm = LLVMCreateTargetMachine(r600_target, triple,
 					   r600_get_llvm_processor_name(sscreen->b.family),
-					   sctx->b.chip_class >= VI ?
-						   "+DumpCode" :
-						   "+DumpCode,+vgpr-spilling",
+					   "+DumpCode,+vgpr-spilling",
 					   LLVMCodeGenLevelDefault,
 					   LLVMRelocDefault,
 					   LLVMCodeModelDefault);
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -190,6 +190,7 @@ struct si_shader_selector {
 	uint64_t	inputs_read;
 	uint64_t	outputs_written;
 	uint32_t	patch_outputs_written;
+	uint32_t	ps_colors_written;
 };

 /* Valid shader configurations:
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -29,6 +29,7 @@
 #include "sid.h"
 #include "radeon/r600_cs.h"

+#include "util/u_dual_blend.h"
 #include "util/u_format.h"
 #include "util/u_format_s3tc.h"
 #include "util/u_memory.h"
@@ -233,8 +234,10 @@ static unsigned si_pack_float_12p4(float x)
 * - The COLOR1 format isn't INVALID because of possible dual-source blending,
 *   so COLOR1 is enabled pretty much all the time.
 * So CB_TARGET_MASK is the only register that can disable COLOR1.
+ *
+ * Another reason is to avoid a hang with dual source blending.
 */
-static void si_update_fb_blend_state(struct si_context *sctx)
+void si_update_fb_blend_state(struct si_context *sctx)
 {
 	struct si_pm4_state *pm4;
 	struct si_state_blend *blend = sctx->queued.named.blend;
@@ -252,6 +255,16 @@ static void si_update_fb_blend_state(struct si_context *sctx)
 			mask |= 0xf << (4*i);
 	mask &= blend->cb_target_mask;

+	/* Avoid a hang that happens when dual source blending is enabled
+	 * but there is not enough color outputs. This is undefined behavior,
+	 * so disable color writes completely.
+	 *
+	 * Reproducible with Unigine Heaven 4.0 and drirc missing.
+	 */
+	if (blend->dual_src_blend &&
+	    (sctx->ps_shader->ps_colors_written & 0x3) != 0x3)
+		mask = 0;
+
 	si_pm4_set_reg(pm4, R_028238_CB_TARGET_MASK, mask);
 	si_pm4_set_state(sctx, fb_blend, pm4);
 }
@@ -343,6 +356,7 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
 		return NULL;

 	blend->alpha_to_one = state->alpha_to_one;
+	blend->dual_src_blend = util_blend_state_is_dual(state, 0);

 	if (state->logicop_enable) {
 		color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4));
@@ -3166,6 +3180,7 @@ static void si_init_config(struct si_context *sctx)
 	unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
 	unsigned raster_config, raster_config_1;
 	struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
+	int i;

 	if (pm4 == NULL)
 		return;
@@ -3196,6 +3211,11 @@ static void si_init_config(struct si_context *sctx)

 	si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);

+	for (i = 0; i < 16; i++) {
+		si_pm4_set_reg(pm4, R_0282D0_PA_SC_VPORT_ZMIN_0 + i*8, 0);
+		si_pm4_set_reg(pm4, R_0282D4_PA_SC_VPORT_ZMAX_0 + i*8, fui(1.0));
+	}
+
 	switch (sctx->screen->b.family) {
 	case CHIP_TAHITI:
 	case CHIP_PITCAIRN:
@@ -3282,8 +3302,6 @@ static void si_init_config(struct si_context *sctx)
 	si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
 	/* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */
 	si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
-	si_pm4_set_reg(pm4, R_0282D0_PA_SC_VPORT_ZMIN_0, 0);
-	si_pm4_set_reg(pm4, R_0282D4_PA_SC_VPORT_ZMAX_0, fui(1.0));
 	si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0);
 	si_pm4_set_reg(pm4, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, fui(1.0));
 	si_pm4_set_reg(pm4, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, fui(1.0));
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -39,6 +39,7 @@ struct si_state_blend {
 	struct si_pm4_state	pm4;
 	uint32_t		cb_target_mask;
 	bool			alpha_to_one;
+	bool			dual_src_blend;
 };

 struct si_state_sample_mask {
@@ -251,6 +252,7 @@ void si_shader_change_notify(struct si_context *sctx);
 /* si_state.c */
 struct si_shader_selector;

+void si_update_fb_blend_state(struct si_context *sctx);
 boolean si_is_format_supported(struct pipe_screen *screen,
                               enum pipe_format format,
                               enum pipe_texture_target target,
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -713,6 +713,15 @@ static void *si_create_shader_state(struct pipe_context *ctx,
 			}
 		}
 		break;
+	case PIPE_SHADER_FRAGMENT:
+		for (i = 0; i < sel->info.num_outputs; i++) {
+			unsigned name = sel->info.output_semantic_name[i];
+			unsigned index = sel->info.output_semantic_index[i];
+
+			if (name == TGSI_SEMANTIC_COLOR)
+				sel->ps_colors_written |= 1 << index;
+		}
+		break;
 	}

 	if (sscreen->b.debug_flags & DBG_PRECOMPILE)
@@ -840,6 +849,7 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
 	}

 	sctx->ps_shader = sel;
+	si_update_fb_blend_state(sctx);
 }

 static void si_delete_shader_selector(struct pipe_context *ctx,
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -314,6 +314,7 @@ qir_get_temp(struct vc4_compile *c)

        reg.file = QFILE_TEMP;
        reg.index = c->num_temps++;
+        reg.pack = 0;

        if (c->num_temps > c->defs_array_size) {
                uint32_t old_size = c->defs_array_size;
--- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
+++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
@@ -465,7 +465,7 @@ namespace {
            const bool is_write_only = access_qual == "write_only";
            const bool is_read_only = access_qual == "read_only";

-            typename module::argument::type marg_type;
+            enum module::argument::type marg_type;
            if (is_image2d && is_read_only) {
               marg_type = module::argument::image2d_rd;
            } else if (is_image2d && is_write_only) {
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -684,6 +684,9 @@ static boolean amdgpu_bo_get_handle(struct pb_buffer *buffer,
   enum amdgpu_bo_handle_type type;
   int r;

+   if ((void*)bo != (void*)buffer)
+      pb_cache_manager_remove_buffer(buffer);
+
   switch (whandle->type) {
   case DRM_API_HANDLE_TYPE_SHARED:
      type = amdgpu_bo_handle_type_gem_flink_name;
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -110,7 +110,7 @@ static boolean do_winsys_init(struct amdgpu_winsys *ws)
   struct amdgpu_heap_info vram, gtt;
   struct drm_amdgpu_info_hw_ip dma = {}, uvd = {}, vce = {};
   uint32_t vce_version = 0, vce_feature = 0;
-   int r;
+   int r, i, j;

   /* Query hardware and driver information. */
   r = amdgpu_query_gpu_info(ws->dev, &ws->amdinfo);
@@ -248,7 +248,6 @@ static boolean do_winsys_init(struct amdgpu_winsys *ws)
   ws->info.vram_size = vram.heap_size;
   /* convert the shader clock from KHz to MHz */
   ws->info.max_sclk = ws->amdinfo.max_engine_clk / 1000;
-   ws->info.max_compute_units = 1; /* TODO */
   ws->info.max_se = ws->amdinfo.num_shader_engines;
   ws->info.max_sh_per_se = ws->amdinfo.num_shader_arrays_per_engine;
   ws->info.has_uvd = uvd.available_rings != 0;
@@ -263,6 +262,18 @@ static boolean do_winsys_init(struct amdgpu_winsys *ws)
   ws->info.r600_virtual_address = TRUE;
   ws->info.r600_has_dma = dma.available_rings != 0;

+   /* Guess what the maximum compute unit number is by looking at the mask
+    * of enabled CUs.
+    */
+   for (i = 0; i < ws->info.max_se; i++)
+      for (j = 0; j < ws->info.max_sh_per_se; j++) {
+         unsigned max = util_last_bit(ws->amdinfo.cu_bitmap[i][j]);
+
+         if (ws->info.max_compute_units < max)
+            ws->info.max_compute_units = max;
+      }
+   ws->info.max_compute_units *= ws->info.max_se * ws->info.max_sh_per_se;
+
   memcpy(ws->info.si_tile_mode_array, ws->amdinfo.gb_tile_mode,
          sizeof(ws->amdinfo.gb_tile_mode));
   ws->info.si_tile_mode_array_valid = TRUE;
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -1126,6 +1126,9 @@ static boolean radeon_winsys_bo_get_handle(struct pb_buffer *buffer,

    memset(&flink, 0, sizeof(flink));

+    if ((void*)bo != (void*)buffer)
+       pb_cache_manager_remove_buffer(buffer);
+
    if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
        if (!bo->flink_name) {
            flink.handle = bo->handle;
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -97,22 +97,17 @@ static boolean radeon_init_cs_context(struct radeon_cs_context *csc,
 {
    int i;

-    csc->buf = MALLOC(ws->ib_max_size);
-    if (!csc->buf)
-        return FALSE;
    csc->fd = ws->fd;
    csc->nrelocs = 512;
    csc->relocs_bo = (struct radeon_bo**)
                     CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*));
    if (!csc->relocs_bo) {
-        FREE(csc->buf);
        return FALSE;
    }

    csc->relocs = (struct drm_radeon_cs_reloc*)
                  CALLOC(1, csc->nrelocs * sizeof(struct drm_radeon_cs_reloc));
    if (!csc->relocs) {
-        FREE(csc->buf);
        FREE(csc->relocs_bo);
        return FALSE;
    }
@@ -165,7 +160,6 @@ static void radeon_destroy_cs_context(struct radeon_cs_context *csc)
    radeon_cs_context_cleanup(csc);
    FREE(csc->relocs_bo);
    FREE(csc->relocs);
-    FREE(csc->buf);
 }


@@ -206,7 +200,7 @@ radeon_drm_cs_create(struct radeon_winsys_ctx *ctx,
    cs->cst = &cs->csc2;
    cs->base.buf = cs->csc->buf;
    cs->base.ring_type = ring_type;
-    cs->base.max_dw = ws->ib_max_size / 4;
+    cs->base.max_dw = ARRAY_SIZE(cs->csc->buf);

    p_atomic_inc(&ws->num_cs);
    return &cs->base;
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
@@ -30,7 +30,7 @@
 #include "radeon_drm_bo.h"

 struct radeon_cs_context {
-    uint32_t                    *buf;
+    uint32_t                    buf[16 * 1024];

    int                         fd;
    struct drm_radeon_cs        cs;
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -395,20 +395,16 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
        }

        ws->info.r600_virtual_address = FALSE;
-        ws->ib_max_size = 64 * 1024;
-
        if (ws->info.drm_minor >= 13) {
+            uint32_t ib_vm_max_size;
+
            ws->info.r600_virtual_address = TRUE;
            if (!radeon_get_drm_value(ws->fd, RADEON_INFO_VA_START, NULL,
                                      &ws->va_start))
                ws->info.r600_virtual_address = FALSE;
-
-            if (radeon_get_drm_value(ws->fd, RADEON_INFO_IB_VM_MAX_SIZE, NULL,
-                                     &ws->ib_max_size))
-                ws->ib_max_size *= 4; /* the kernel returns the size in dwords */
-            else
+            if (!radeon_get_drm_value(ws->fd, RADEON_INFO_IB_VM_MAX_SIZE, NULL,
+                                      &ib_vm_max_size))
                ws->info.r600_virtual_address = FALSE;
-
            radeon_get_drm_value(ws->fd, RADEON_INFO_VA_UNMAP_WORKING, NULL,
                                 &ws->va_unmap_working);
        }
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
@@ -73,7 +73,6 @@ struct radeon_drm_winsys {

    enum radeon_generation gen;
    struct radeon_info info;
-    uint32_t ib_max_size;
    uint32_t va_start;
    uint32_t va_unmap_working;
    uint32_t accel_working2;
--- a/src/glsl/Android.gen.mk
+++ b/src/glsl/Android.gen.mk
@@ -29,18 +29,7 @@ endif

 intermediates := $(call local-generated-sources-dir)

-sources := \
-	glsl_lexer.cpp \
-	glsl_parser.cpp \
-	glcpp/glcpp-lex.c \
-	glcpp/glcpp-parse.c \
-	nir/nir_builder_opcodes.h \
-	nir/nir_constant_expressions.c \
-	nir/nir_opcodes.c \
-	nir/nir_opcodes.h \
-	nir/nir_opt_algebraic.c
-
-LOCAL_SRC_FILES := $(filter-out $(sources), $(LOCAL_SRC_FILES))
+LOCAL_SRC_FILES := $(LOCAL_SRC_FILES)

 LOCAL_C_INCLUDES += \
 	$(intermediates)/glcpp \
@@ -51,8 +40,10 @@ LOCAL_C_INCLUDES += \
 LOCAL_EXPORT_C_INCLUDE_DIRS += \
 	$(intermediates)/nir

-sources := $(addprefix $(intermediates)/, $(sources))
-LOCAL_GENERATED_SOURCES += $(sources)
+LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/, \
+	$(LIBGLCPP_GENERATED_FILES) \
+	$(NIR_GENERATED_FILES) \
+	$(LIBGLSL_GENERATED_CXX_FILES))

 define local-l-or-ll-to-c-or-cpp
 	@mkdir -p $(dir $@)
@@ -102,8 +93,7 @@ $(intermediates)/nir/nir_builder_opcodes.h: $(nir_builder_opcodes_deps)
 nir_constant_expressions_gen := $(LOCAL_PATH)/nir/nir_constant_expressions.py
 nir_constant_expressions_deps := \
 	$(LOCAL_PATH)/nir/nir_opcodes.py \
-	$(LOCAL_PATH)/nir/nir_constant_expressions.py \
-	$(LOCAL_PATH)/nir/nir_constant_expressions.h
+	$(LOCAL_PATH)/nir/nir_constant_expressions.py

 $(intermediates)/nir/nir_constant_expressions.c: $(nir_constant_expressions_deps)
 	@mkdir -p $(dir $@)
--- a/src/glsl/Makefile.am
+++ b/src/glsl/Makefile.am
@@ -140,13 +140,16 @@ libglsl_la_SOURCES =					\
 	glsl_parser.cpp					\
 	glsl_parser.h					\
 	$(LIBGLSL_FILES)				\
-	$(NIR_FILES)
+	$(NIR_FILES)					\
+	$(NIR_GENERATED_FILES)
+

 libnir_la_SOURCES =					\
 	glsl_types.cpp					\
 	builtin_types.cpp				\
 	glsl_symbol_table.cpp				\
-	$(NIR_FILES)
+	$(NIR_FILES)					\
+	$(NIR_GENERATED_FILES)

 glsl_compiler_SOURCES = \
 	$(GLSL_COMPILER_CXX_FILES)
@@ -197,19 +200,23 @@ am__v_YACC_ = $(am__v_YACC_$(AM_DEFAULT_VERBOSITY))
 am__v_YACC_0 = @echo "  YACC    " $@;
 am__v_YACC_1 =

+MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
+YACC_GEN = $(AM_V_YACC)$(YACC) $(YFLAGS)
+LEX_GEN = $(AM_V_LEX)$(LEX) $(LFLAGS)
+
 glsl_parser.cpp glsl_parser.h: glsl_parser.yy
-	$(AM_V_YACC) $(YACC) $(YFLAGS) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl_parser.h $<
+	$(YACC_GEN) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl_parser.h $(srcdir)/glsl_parser.yy

 glsl_lexer.cpp: glsl_lexer.ll
-	$(AM_V_LEX) $(LEX) $(LFLAGS) -o $@ $<
+	$(LEX_GEN) -o $@ $(srcdir)/glsl_lexer.ll

 glcpp/glcpp-parse.c glcpp/glcpp-parse.h: glcpp/glcpp-parse.y
-	$(AM_V_at)$(MKDIR_P) glcpp
-	$(AM_V_YACC) $(YACC) $(YFLAGS) -o $@ -p "glcpp_parser_" --defines=$(builddir)/glcpp/glcpp-parse.h $<
+	$(MKDIR_GEN)
+	$(YACC_GEN) -o $@ -p "glcpp_parser_" --defines=$(builddir)/glcpp/glcpp-parse.h $(srcdir)/glcpp/glcpp-parse.y

 glcpp/glcpp-lex.c: glcpp/glcpp-lex.l
-	$(AM_V_at)$(MKDIR_P) glcpp
-	$(AM_V_LEX) $(LEX) $(LFLAGS) -o $@ $<
+	$(MKDIR_GEN)
+	$(LEX_GEN) -o $@ $(srcdir)/glcpp/glcpp-lex.l

 # Only the parsers (specifically the header files generated at the same time)
 # need to be in BUILT_SOURCES. Though if we list the parser headers YACC is
@@ -222,11 +229,7 @@ BUILT_SOURCES =						\
 	glsl_lexer.cpp					\
 	glcpp/glcpp-parse.c				\
 	glcpp/glcpp-lex.c				\
-	nir/nir_builder_opcodes.h				\
-	nir/nir_constant_expressions.c			\
-	nir/nir_opcodes.c				\
-	nir/nir_opcodes.h				\
-	nir/nir_opt_algebraic.c
+	$(NIR_GENERATED_FILES)
 CLEANFILES =						\
 	glcpp/glcpp-parse.h				\
 	glsl_parser.h					\
@@ -239,22 +242,24 @@ dist-hook:
 	$(RM) glcpp/tests/*.out
 	$(RM) glcpp/tests/subtest*/*.out

-nir/nir_builder_opcodes.h: nir/nir_opcodes.py nir/nir_builder_opcodes_h.py
-	$(AM_V_at)$(MKDIR_P) nir
-	$(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_builder_opcodes_h.py > $@
+PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)

-nir/nir_constant_expressions.c: nir/nir_opcodes.py nir/nir_constant_expressions.py nir/nir_constant_expressions.h
-	$(AM_V_at)$(MKDIR_P) nir
-	$(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_constant_expressions.py > $@
+nir/nir_builder_opcodes.h: nir/nir_opcodes.py nir/nir_builder_opcodes_h.py
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/nir/nir_builder_opcodes_h.py > $@
+
+nir/nir_constant_expressions.c: nir/nir_opcodes.py nir/nir_constant_expressions.py
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/nir/nir_constant_expressions.py > $@

 nir/nir_opcodes.h: nir/nir_opcodes.py nir/nir_opcodes_h.py
-	$(AM_V_at)$(MKDIR_P) nir
-	$(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opcodes_h.py > $@
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/nir/nir_opcodes_h.py > $@

 nir/nir_opcodes.c: nir/nir_opcodes.py nir/nir_opcodes_c.py
-	$(AM_V_at)$(MKDIR_P) nir
-	$(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opcodes_c.py > $@
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/nir/nir_opcodes_c.py > $@

 nir/nir_opt_algebraic.c: nir/nir_opt_algebraic.py nir/nir_algebraic.py
-	$(AM_V_at)$(MKDIR_P) nir
-	$(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opt_algebraic.py > $@
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/nir/nir_opt_algebraic.py > $@
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -69,8 +69,7 @@ NIR_FILES = \
 	nir/nir_vla.h \
 	nir/nir_worklist.c \
 	nir/nir_worklist.h \
-	nir/nir_types.cpp \
-	$(NIR_GENERATED_FILES)
+	nir/nir_types.cpp

 # libglsl

--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -2339,6 +2339,7 @@ assign_attribute_or_color_locations(gl_shader_program *prog,
    */
   unsigned used_locations = (max_index >= 32)
      ? ~0 : ~((1 << max_index) - 1);
+   unsigned double_storage_locations = 0;

   assert((target_index == MESA_SHADER_VERTEX)
 	  || (target_index == MESA_SHADER_FRAGMENT));
@@ -2452,34 +2453,6 @@ assign_attribute_or_color_locations(gl_shader_program *prog,

      const unsigned slots = var->type->count_attribute_slots();

-      /* From GL4.5 core spec, section 11.1.1 (Vertex Attributes):
-       *
-       * "A program with more than the value of MAX_VERTEX_ATTRIBS active
-       * attribute variables may fail to link, unless device-dependent
-       * optimizations are able to make the program fit within available
-       * hardware resources. For the purposes of this test, attribute variables
-       * of the type dvec3, dvec4, dmat2x3, dmat2x4, dmat3, dmat3x4, dmat4x3,
-       * and dmat4 may count as consuming twice as many attributes as equivalent
-       * single-precision types. While these types use the same number of
-       * generic attributes as their single-precision equivalents,
-       * implementations are permitted to consume two single-precision vectors
-       * of internal storage for each three- or four-component double-precision
-       * vector."
-       * Until someone has a good reason in Mesa, enforce that now.
-       */
-      if (target_index == MESA_SHADER_VERTEX) {
-	 total_attribs_size += slots;
-	 if (var->type->without_array() == glsl_type::dvec3_type ||
-	     var->type->without_array() == glsl_type::dvec4_type ||
-	     var->type->without_array() == glsl_type::dmat2x3_type ||
-	     var->type->without_array() == glsl_type::dmat2x4_type ||
-	     var->type->without_array() == glsl_type::dmat3_type ||
-	     var->type->without_array() == glsl_type::dmat3x4_type ||
-	     var->type->without_array() == glsl_type::dmat4x3_type ||
-	     var->type->without_array() == glsl_type::dmat4_type)
-	    total_attribs_size += slots;
-      }
-
      /* If the variable is not a built-in and has a location statically
       * assigned in the shader (presumably via a layout qualifier), make sure
       * that it doesn't collide with other assigned locations.  Otherwise,
@@ -2594,6 +2567,38 @@ assign_attribute_or_color_locations(gl_shader_program *prog,
 	    }

 	    used_locations |= (use_mask << attr);
+
+            /* From the GL 4.5 core spec, section 11.1.1 (Vertex Attributes):
+             *
+             * "A program with more than the value of MAX_VERTEX_ATTRIBS
+             *  active attribute variables may fail to link, unless
+             *  device-dependent optimizations are able to make the program
+             *  fit within available hardware resources. For the purposes
+             *  of this test, attribute variables of the type dvec3, dvec4,
+             *  dmat2x3, dmat2x4, dmat3, dmat3x4, dmat4x3, and dmat4 may
+             *  count as consuming twice as many attributes as equivalent
+             *  single-precision types. While these types use the same number
+             *  of generic attributes as their single-precision equivalents,
+             *  implementations are permitted to consume two single-precision
+             *  vectors of internal storage for each three- or four-component
+             *  double-precision vector."
+             *
+             * Mark this attribute slot as taking up twice as much space
+             * so we can count it properly against limits.  According to
+             * issue (3) of the GL_ARB_vertex_attrib_64bit behavior, this
+             * is optional behavior, but it seems preferable.
+             */
+            const glsl_type *type = var->type->without_array();
+            if (type == glsl_type::dvec3_type ||
+                type == glsl_type::dvec4_type ||
+                type == glsl_type::dmat2x3_type ||
+                type == glsl_type::dmat2x4_type ||
+                type == glsl_type::dmat3_type ||
+                type == glsl_type::dmat3x4_type ||
+                type == glsl_type::dmat4x3_type ||
+                type == glsl_type::dmat4_type) {
+               double_storage_locations |= (use_mask << attr);
+            }
 	 }

 	 continue;
@@ -2605,6 +2610,9 @@ assign_attribute_or_color_locations(gl_shader_program *prog,
   }

   if (target_index == MESA_SHADER_VERTEX) {
+      unsigned total_attribs_size =
+         _mesa_bitcount(used_locations & ((1 << max_index) - 1)) +
+         _mesa_bitcount(double_storage_locations);
      if (total_attribs_size > max_index) {
 	 linker_error(prog,
 		      "attempt to use %d vertex attribute slots only %d available ",
--- a/src/glsl/opt_constant_propagation.cpp
+++ b/src/glsl/opt_constant_propagation.cpp
@@ -40,6 +40,7 @@
 #include "ir_basic_block.h"
 #include "ir_optimization.h"
 #include "glsl_types.h"
+#include "util/hash_table.h"

 namespace {

@@ -95,7 +96,8 @@ public:
      killed_all = false;
      mem_ctx = ralloc_context(0);
      this->acp = new(mem_ctx) exec_list;
-      this->kills = new(mem_ctx) exec_list;
+      this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+                                            _mesa_key_pointer_equal);
   }
   ~ir_constant_propagation_visitor()
   {
@@ -123,7 +125,7 @@ public:
    * List of kill_entry: The masks of variables whose values were
    * killed in this block.
    */
-   exec_list *kills;
+   hash_table *kills;

   bool progress;

@@ -263,11 +265,12 @@ ir_constant_propagation_visitor::visit_enter(ir_function_signature *ir)
    * main() at link time, so they're irrelevant to us.
    */
   exec_list *orig_acp = this->acp;
-   exec_list *orig_kills = this->kills;
+   hash_table *orig_kills = this->kills;
   bool orig_killed_all = this->killed_all;

   this->acp = new(mem_ctx) exec_list;
-   this->kills = new(mem_ctx) exec_list;
+   this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+                                         _mesa_key_pointer_equal);
   this->killed_all = false;

   visit_list_elements(this, &ir->body);
@@ -352,11 +355,12 @@ void
 ir_constant_propagation_visitor::handle_if_block(exec_list *instructions)
 {
   exec_list *orig_acp = this->acp;
-   exec_list *orig_kills = this->kills;
+   hash_table *orig_kills = this->kills;
   bool orig_killed_all = this->killed_all;

   this->acp = new(mem_ctx) exec_list;
-   this->kills = new(mem_ctx) exec_list;
+   this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+                                         _mesa_key_pointer_equal);
   this->killed_all = false;

   /* Populate the initial acp with a constant of the original */
@@ -370,12 +374,14 @@ ir_constant_propagation_visitor::handle_if_block(exec_list *instructions)
      orig_acp->make_empty();
   }

-   exec_list *new_kills = this->kills;
+   hash_table *new_kills = this->kills;
   this->kills = orig_kills;
   this->acp = orig_acp;
   this->killed_all = this->killed_all || orig_killed_all;

-   foreach_in_list(kill_entry, k, new_kills) {
+   hash_entry *htk;
+   hash_table_foreach(new_kills, htk) {
+      kill_entry *k = (kill_entry *) htk->data;
      kill(k->var, k->write_mask);
   }
 }
@@ -397,7 +403,7 @@ ir_visitor_status
 ir_constant_propagation_visitor::visit_enter(ir_loop *ir)
 {
   exec_list *orig_acp = this->acp;
-   exec_list *orig_kills = this->kills;
+   hash_table *orig_kills = this->kills;
   bool orig_killed_all = this->killed_all;

   /* FINISHME: For now, the initial acp for loops is totally empty.
@@ -405,7 +411,8 @@ ir_constant_propagation_visitor::visit_enter(ir_loop *ir)
    * cloned minus the killed entries after the first run through.
    */
   this->acp = new(mem_ctx) exec_list;
-   this->kills = new(mem_ctx) exec_list;
+   this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+                                         _mesa_key_pointer_equal);
   this->killed_all = false;

   visit_list_elements(this, &ir->body_instructions);
@@ -414,12 +421,14 @@ ir_constant_propagation_visitor::visit_enter(ir_loop *ir)
      orig_acp->make_empty();
   }

-   exec_list *new_kills = this->kills;
+   hash_table *new_kills = this->kills;
   this->kills = orig_kills;
   this->acp = orig_acp;
   this->killed_all = this->killed_all || orig_killed_all;

-   foreach_in_list(kill_entry, k, new_kills) {
+   hash_entry *htk;
+   hash_table_foreach(new_kills, htk) {
+      kill_entry *k = (kill_entry *) htk->data;
      kill(k->var, k->write_mask);
   }

@@ -448,14 +457,15 @@ ir_constant_propagation_visitor::kill(ir_variable *var, unsigned write_mask)
   /* Add this writemask of the variable to the list of killed
    * variables in this block.
    */
-   foreach_in_list(kill_entry, entry, this->kills) {
-      if (entry->var == var) {
-	 entry->write_mask |= write_mask;
-	 return;
-      }
+   hash_entry *kill_hash_entry = _mesa_hash_table_search(this->kills, var);
+   if (kill_hash_entry) {
+      kill_entry *entry = (kill_entry *) kill_hash_entry->data;
+      entry->write_mask |= write_mask;
+      return;
   }
   /* Not already in the list.  Make new entry. */
-   this->kills->push_tail(new(this->mem_ctx) kill_entry(var, write_mask));
+   _mesa_hash_table_insert(this->kills, var,
+                           new(this->mem_ctx) kill_entry(var, write_mask));
 }

 /**
--- a/src/glsl/opt_constant_variable.cpp
+++ b/src/glsl/opt_constant_variable.cpp
@@ -36,11 +36,11 @@
 #include "ir_visitor.h"
 #include "ir_optimization.h"
 #include "glsl_types.h"
+#include "util/hash_table.h"

 namespace {

 struct assignment_entry {
-   exec_node link;
   int assignment_count;
   ir_variable *var;
   ir_constant *constval;
@@ -54,31 +54,32 @@ public:
   virtual ir_visitor_status visit_enter(ir_assignment *);
   virtual ir_visitor_status visit_enter(ir_call *);

-   exec_list list;
+   struct hash_table *ht;
 };

 } /* unnamed namespace */

 static struct assignment_entry *
-get_assignment_entry(ir_variable *var, exec_list *list)
+get_assignment_entry(ir_variable *var, struct hash_table *ht)
 {
+   struct hash_entry *hte = _mesa_hash_table_search(ht, var);
   struct assignment_entry *entry;

-   foreach_list_typed(struct assignment_entry, entry, link, list) {
-      if (entry->var == var)
-	 return entry;
+   if (hte) {
+      entry = (struct assignment_entry *) hte->data;
+   } else {
+      entry = (struct assignment_entry *) calloc(1, sizeof(*entry));
+      entry->var = var;
+      _mesa_hash_table_insert(ht, var, entry);
   }

-   entry = (struct assignment_entry *)calloc(1, sizeof(*entry));
-   entry->var = var;
-   list->push_head(&entry->link);
   return entry;
 }

 ir_visitor_status
 ir_constant_variable_visitor::visit(ir_variable *ir)
 {
-   struct assignment_entry *entry = get_assignment_entry(ir, &this->list);
+   struct assignment_entry *entry = get_assignment_entry(ir, this->ht);
   entry->our_scope = true;
   return visit_continue;
 }
@@ -97,7 +98,7 @@ ir_constant_variable_visitor::visit_enter(ir_assignment *ir)
   ir_constant *constval;
   struct assignment_entry *entry;

-   entry = get_assignment_entry(ir->lhs->variable_referenced(), &this->list);
+   entry = get_assignment_entry(ir->lhs->variable_referenced(), this->ht);
   assert(entry);
   entry->assignment_count++;

@@ -150,7 +151,7 @@ ir_constant_variable_visitor::visit_enter(ir_call *ir)
 	 struct assignment_entry *entry;

 	 assert(var);
-	 entry = get_assignment_entry(var, &this->list);
+	 entry = get_assignment_entry(var, this->ht);
 	 entry->assignment_count++;
      }
   }
@@ -161,7 +162,7 @@ ir_constant_variable_visitor::visit_enter(ir_call *ir)
      struct assignment_entry *entry;

      assert(var);
-      entry = get_assignment_entry(var, &this->list);
+      entry = get_assignment_entry(var, this->ht);
      entry->assignment_count++;
   }

@@ -177,20 +178,22 @@ do_constant_variable(exec_list *instructions)
   bool progress = false;
   ir_constant_variable_visitor v;

+   v.ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                                  _mesa_key_pointer_equal);
   v.run(instructions);

-   while (!v.list.is_empty()) {
-
-      struct assignment_entry *entry;
-      entry = exec_node_data(struct assignment_entry, v.list.head, link);
+   struct hash_entry *hte;
+   hash_table_foreach(v.ht, hte) {
+      struct assignment_entry *entry = (struct assignment_entry *) hte->data;

      if (entry->assignment_count == 1 && entry->constval && entry->our_scope) {
 	 entry->var->constant_value = entry->constval;
 	 progress = true;
      }
-      entry->link.remove();
+      hte->data = NULL;
      free(entry);
   }
+   _mesa_hash_table_destroy(v.ht, NULL);

   return progress;
 }
--- a/src/mapi/Makefile.am
+++ b/src/mapi/Makefile.am
@@ -50,19 +50,14 @@ AM_CPPFLAGS =							\

 include Makefile.sources

+MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
+PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
+
 glapi_gen_mapi_deps := \
 	mapi_abi.py \
 	$(wildcard glapi/gen/*.xml) \
 	$(wildcard glapi/gen/*.py)

-# $(1): path to an XML file
-# $(2): name of the printer
-define glapi_gen_mapi
-@$(MKDIR_P) $(dir $@)
-$(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/mapi_abi.py \
-	--mode lib --printer $(2) $(1) > $@
-endef
-
 if HAVE_SHARED_GLAPI
 BUILT_SOURCES += shared-glapi/glapi_mapi_tmp.h

@@ -93,7 +88,9 @@ shared_glapi_test_LDADD = \
 endif

 shared-glapi/glapi_mapi_tmp.h : glapi/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps)
-	$(call glapi_gen_mapi,$<,shared-glapi)
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/mapi_abi.py --mode lib --printer shared-glapi \
+		$(srcdir)/glapi/gen/gl_and_es_API.xml > $@

 if HAVE_OPENGL
 noinst_LTLIBRARIES = glapi/libglapi.la
@@ -185,7 +182,9 @@ endif
 endif

 es1api/glapi_mapi_tmp.h: glapi/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps)
-	$(call glapi_gen_mapi,$<,es1api)
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/mapi_abi.py --mode lib --printer es1api \
+		$(srcdir)/glapi/gen/gl_and_es_API.xml > $@

 if HAVE_OPENGL_ES2
 TESTS += es2api/ABI-check
@@ -229,6 +228,8 @@ endif
 endif

 es2api/glapi_mapi_tmp.h: glapi/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps)
-	$(call glapi_gen_mapi,$<,es2api)
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/mapi_abi.py --mode lib --printer es2api \
+		$(srcdir)/glapi/gen/gl_and_es_API.xml > $@

 include $(top_srcdir)/install-lib-links.mk
--- a/src/mapi/glapi/gen/Makefile.am
+++ b/src/mapi/glapi/gen/Makefile.am
@@ -20,7 +20,7 @@ XORG_INDENT_FLAGS = -linux -bad -bap -blf -bli0 -cbi0 -cdw -nce -cs -i4 -lc80 -p

 MESA_DIR = $(top_builddir)/src/mesa
 MESA_GLAPI_DIR = $(top_builddir)/src/mapi/glapi
-MESA_MAPI_DIR = $(top_builddir)/src/mapi
+MESA_MAPI_DIR = $(top_srcdir)/src/mapi
 MESA_GLX_DIR = $(top_builddir)/src/glx

 MESA_GLAPI_OUTPUTS = \
@@ -209,7 +209,7 @@ COMMON = $(API_XML) \

 COMMON_GLX = $(COMMON) glX_API.xml glX_XML.py glX_proto_common.py

-PYTHON_GEN = $(AM_V_GEN) $(PYTHON2) $(PYTHON_FLAGS)
+PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)

 ######################################################################

@@ -241,65 +241,65 @@ $(XORG_GLAPI_DIR)/%.h: $(MESA_GLAPI_DIR)/%.h
 ######################################################################

 $(MESA_GLAPI_DIR)/glapi_mapi_tmp.h: $(MESA_MAPI_DIR)/mapi_abi.py $(COMMON)
-	$(PYTHON_GEN) $< \
+	$(PYTHON_GEN) $(MESA_MAPI_DIR)/mapi_abi.py \
 		--printer glapi --mode lib $(srcdir)/gl_and_es_API.xml > $@

 $(MESA_GLAPI_DIR)/glprocs.h: gl_procs.py $(COMMON)
-	$(PYTHON_GEN) $< -c -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/gl_procs.py -c -f $(srcdir)/gl_and_es_API.xml > $@

 $(MESA_GLAPI_DIR)/glapitemp.h: gl_apitemp.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/gl_apitemp.py -f $(srcdir)/gl_and_es_API.xml > $@

 $(MESA_GLAPI_DIR)/glapitable.h: gl_table.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/gl_table.py -f $(srcdir)/gl_and_es_API.xml > $@

 $(MESA_GLAPI_DIR)/glapi_gentable.c: gl_gentable.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/gl_gentable.py -f $(srcdir)/gl_and_es_API.xml > $@

 ######################################################################

 $(MESA_GLAPI_DIR)/glapi_x86.S: gl_x86_asm.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/gl_x86_asm.py -f $(srcdir)/gl_and_es_API.xml > $@

 $(MESA_GLAPI_DIR)/glapi_x86-64.S: gl_x86-64_asm.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/gl_x86-64_asm.py -f $(srcdir)/gl_and_es_API.xml > $@

 $(MESA_GLAPI_DIR)/glapi_sparc.S: gl_SPARC_asm.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/gl_SPARC_asm.py -f $(srcdir)/gl_and_es_API.xml > $@

 ######################################################################

 $(MESA_DIR)/main/enums.c: gl_enums.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/gl_enums.py -f $(srcdir)/gl_and_es_API.xml > $@

 $(MESA_DIR)/main/api_exec.c: gl_genexec.py apiexec.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/gl_genexec.py -f $(srcdir)/gl_and_es_API.xml > $@

 $(MESA_DIR)/main/dispatch.h: gl_table.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml -m remap_table > $@
+	$(PYTHON_GEN) $(srcdir)/gl_table.py -f $(srcdir)/gl_and_es_API.xml -m remap_table > $@

 $(MESA_DIR)/main/remap_helper.h: remap_helper.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/remap_helper.py -f $(srcdir)/gl_and_es_API.xml > $@

 ######################################################################

 $(MESA_GLX_DIR)/indirect.c: glX_proto_send.py $(COMMON_GLX)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_API.xml -m proto \
+	$(PYTHON_GEN) $(srcdir)/glX_proto_send.py -f $(srcdir)/gl_API.xml -m proto \
 	  | $(INDENT) $(INDENT_FLAGS) > $@

 $(MESA_GLX_DIR)/indirect.h: glX_proto_send.py $(COMMON_GLX)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_API.xml -m init_h > $@
+	$(PYTHON_GEN) $(srcdir)/glX_proto_send.py -f $(srcdir)/gl_API.xml -m init_h > $@

 $(MESA_GLX_DIR)/indirect_init.c: glX_proto_send.py $(COMMON_GLX)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_API.xml -m init_c > $@
+	$(PYTHON_GEN) $(srcdir)/glX_proto_send.py -f $(srcdir)/gl_API.xml -m init_c > $@

 $(MESA_GLX_DIR)/indirect_size.h $(XORG_GLX_DIR)/indirect_size.h: glX_proto_size.py $(COMMON_GLX)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_API.xml -m size_h --only-set \
+	$(PYTHON_GEN) $(srcdir)/glX_proto_size.py -f $(srcdir)/gl_API.xml -m size_h --only-set \
 	    --header-tag _INDIRECT_SIZE_H_ \
 	  | $(INDENT) $(INDENT_FLAGS) > $@

 $(MESA_GLX_DIR)/indirect_size.c: glX_proto_size.py $(COMMON_GLX)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_API.xml -m size_c --only-set \
+	$(PYTHON_GEN) $(srcdir)/glX_proto_size.py -f $(srcdir)/gl_API.xml -m size_c --only-set \
 	  | $(INDENT) $(INDENT_FLAGS) > $@

 ######################################################################
--- a/src/mesa/Makefile.am
+++ b/src/mesa/Makefile.am
@@ -90,37 +90,24 @@ CLEANFILES = \
 	program/program_parse.tab.h \
 	main/git_sha1.h.tmp

-GET_HASH_GEN = main/get_hash_generator.py
+PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)

-main/get_hash.h: ../mapi/glapi/gen/gl_and_es_API.xml main/get_hash_params.py 	\
-		 $(GET_HASH_GEN)
-	$(AM_V_GEN)set -e;						\
-	$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/$(GET_HASH_GEN)		\
-		-f $< > $@.tmp;						\
-	mv $@.tmp $@;
+main/get_hash.h: ../mapi/glapi/gen/gl_and_es_API.xml main/get_hash_params.py \
+                 main/get_hash_generator.py
+	$(PYTHON_GEN) $(srcdir)/main/get_hash_generator.py \
+		-f $(srcdir)/../mapi/glapi/gen/gl_and_es_API.xml > $@

-main/format_info.h: main/formats.csv                                    \
+main/format_info.h: main/formats.csv \
                    main/format_parser.py main/format_info.py
-	$(AM_V_GEN)set -e;						\
-	$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/main/format_info.py        \
-                   $< > $@.tmp;                                         \
-	mv $@.tmp $@;
+	$(PYTHON_GEN) $(srcdir)/main/format_info.py $(srcdir)/main/formats.csv > $@

-main/format_pack.c: main/format_pack.py main/formats.csv		\
+main/format_pack.c: main/format_pack.py main/formats.csv \
                    main/format_parser.py
-	$(AM_V_GEN)set -e;						\
-	$(PYTHON2) $(PYTHON_FLAGS)					\
-			$(srcdir)/main/format_pack.py			\
-			$(srcdir)/main/formats.csv			\
-		| $(INDENT) $(INDENT_FLAGS) > $@;
+	$(PYTHON_GEN) $(srcdir)/main/format_pack.py $(srcdir)/main/formats.csv > $@

 main/format_unpack.c: main/format_unpack.py main/formats.csv	\
                      main/format_parser.py
-	$(AM_V_GEN)set -e;						\
-	$(PYTHON2) $(PYTHON_FLAGS)					\
-			$(srcdir)/main/format_unpack.py			\
-			$(srcdir)/main/formats.csv			\
-		| $(INDENT) $(INDENT_FLAGS) > $@;
+	$(PYTHON_GEN) $(srcdir)/main/format_unpack.py $(srcdir)/main/formats.csv > $@

 main/formats.c: main/format_info.h

@@ -201,13 +188,17 @@ libmesa_sse41_la_CFLAGS = $(AM_CFLAGS) $(SSE41_CFLAGS)
 pkgconfigdir = $(libdir)/pkgconfig
 pkgconfig_DATA = gl.pc

+MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
+YACC_GEN = $(AM_V_GEN)$(YACC) $(YFLAGS)
+LEX_GEN = $(AM_V_GEN)$(LEX) $(LFLAGS)
+
 program/lex.yy.c: program/program_lexer.l
-	$(AM_V_at)$(MKDIR_P) program
-	$(AM_V_GEN) $(LEX) --never-interactive --outfile=$@ $<
+	$(MKDIR_GEN)
+	$(LEX_GEN) -o $@ $(srcdir)/program/program_lexer.l

 program/program_parse.tab.c program/program_parse.tab.h: program/program_parse.y
-	$(AM_V_at)$(MKDIR_P) program
-	$(AM_V_GEN) $(YACC) -p "_mesa_program_" -v -d --output=program/program_parse.tab.c $<
+	$(MKDIR_GEN)
+	$(YACC_GEN) -o $@ -p "_mesa_program_" --defines=$(builddir)/program/program_parse.tab.h $(srcdir)/program/program_parse.y

 if GEN_ASM_OFFSETS
 matypes.h: $(gen_matypes_SOURCES)
--- a/src/mesa/drivers/common/meta_generate_mipmap.c
+++ b/src/mesa/drivers/common/meta_generate_mipmap.c
@@ -163,7 +163,6 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target,
   const GLuint maxLevel = texObj->MaxLevel;
   const GLint maxLevelSave = texObj->MaxLevel;
   const GLboolean genMipmapSave = texObj->GenerateMipmap;
-   const GLuint currentTexUnitSave = ctx->Texture.CurrentUnit;
   const GLboolean use_glsl_version = ctx->Extensions.ARB_vertex_shader &&
                                      ctx->Extensions.ARB_fragment_shader;
   GLenum faceTarget;
@@ -202,8 +201,12 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target,
   samplerSave = ctx->Texture.Unit[ctx->Texture.CurrentUnit].Sampler ?
      ctx->Texture.Unit[ctx->Texture.CurrentUnit].Sampler->Name : 0;

-   if (currentTexUnitSave != 0)
-      _mesa_BindTexture(target, texObj->Name);
+   /* We may have been called from glGenerateTextureMipmap with CurrentUnit
+    * still set to 0, so we don't know when we can skip binding the texture.
+    * Assume that _mesa_BindTexture will be fast if we're rebinding the same
+    * texture.
+    */
+   _mesa_BindTexture(target, texObj->Name);

   if (!mipmap->Sampler) {
      _mesa_GenSamplers(1, &mipmap->Sampler);
--- a/src/mesa/drivers/dri/common/xmlpool/Makefile.am
+++ b/src/mesa/drivers/dri/common/xmlpool/Makefile.am
@@ -67,7 +67,7 @@ CLEANFILES = \
 	$(MOS)

 # Default target options.h
-options.h: LOCALEDIR := .
+LOCALEDIR := .
 options.h: t_options.h $(MOS)
 	$(AM_V_GEN) $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/gen_xmlpool.py $(srcdir)/t_options.h $(LOCALEDIR) $(LANGS) > options.h

--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -215,6 +215,10 @@ brw_blorp_copytexsubimage(struct brw_context *brw,
   struct intel_renderbuffer *src_irb = intel_renderbuffer(src_rb);
   struct intel_texture_image *intel_image = intel_texture_image(dst_image);

+   /* No pixel transfer operations (zoom, bias, mapping), just a blit */
+   if (brw->ctx._ImageTransferState)
+      return false;
+
   /* Sync up the state of window system buffers.  We need to do this before
    * we go looking at the src renderbuffer's miptree.
    */
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -323,6 +323,15 @@ brw_initialize_context_constants(struct brw_context *brw)

   ctx->Const.StripTextureBorder = true;

+   ctx->Const.MaxUniformBlockSize = 65536;
+   for (int i = 0; i < MESA_SHADER_STAGES; i++) {
+      struct gl_program_constants *prog = &ctx->Const.Program[i];
+      prog->MaxUniformBlocks = 12;
+      prog->MaxCombinedUniformComponents =
+         prog->MaxUniformComponents +
+         ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
+   }
+
   ctx->Const.MaxDualSourceDrawBuffers = 1;
   ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = max_samplers;
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -793,21 +793,6 @@ brw_emit_vertices(struct brw_context *brw)
                    ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
   }

-   if (brw->gen >= 6 && gen6_edgeflag_input) {
-      uint32_t format =
-         brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
-
-      OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) |
-                GEN6_VE0_VALID |
-                GEN6_VE0_EDGE_FLAG_ENABLE |
-                (format << BRW_VE0_FORMAT_SHIFT) |
-                (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
-      OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
-                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
-                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
-                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
-   }
-
   if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) {
      uint32_t dw0 = 0, dw1 = 0;
      uint32_t comp0 = BRW_VE1_COMPONENT_STORE_0;
@@ -848,6 +833,21 @@ brw_emit_vertices(struct brw_context *brw)
      OUT_BATCH(dw1);
   }

+   if (brw->gen >= 6 && gen6_edgeflag_input) {
+      uint32_t format =
+         brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
+
+      OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) |
+                GEN6_VE0_VALID |
+                GEN6_VE0_EDGE_FLAG_ENABLE |
+                (format << BRW_VE0_FORMAT_SHIFT) |
+                (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
+      OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
+                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
+                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
+                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
+   }
+
   ADVANCE_BATCH();
 }

--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3211,7 +3211,8 @@ fs_visitor::lower_integer_multiplication()
             * schedule multi-component multiplications much better.
             */

-            if (inst->conditional_mod && inst->dst.is_null()) {
+            fs_reg orig_dst = inst->dst;
+            if (orig_dst.is_null() || orig_dst.file == MRF) {
               inst->dst = fs_reg(GRF, alloc.allocate(dispatch_width / 8),
                                  inst->dst.type);
            }
@@ -3277,10 +3278,9 @@ fs_visitor::lower_integer_multiplication()

            ibld.ADD(dst, low, high);

-            if (inst->conditional_mod) {
-               fs_reg null(retype(ibld.null_reg_f(), inst->dst.type));
+            if (inst->conditional_mod || orig_dst.file == MRF) {
               set_condmod(inst->conditional_mod,
-                           ibld.MOV(null, inst->dst));
+                           ibld.MOV(orig_dst, inst->dst));
            }
         }

@@ -4801,12 +4801,12 @@ fs_visitor::optimize()
    */
   bld = fs_builder(this, 64);

-   split_virtual_grfs();
-
   move_uniform_array_access_to_pull_constants();
   assign_constant_locations();
   demote_pull_constants();

+   split_virtual_grfs();
+
 #define OPT(pass, args...) ({                                           \
      pass_num++;                                                       \
      bool this_progress = pass(args);                                  \
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -279,6 +279,7 @@ static bool
 can_change_source_types(fs_inst *inst)
 {
   return !inst->src[0].abs && !inst->src[0].negate &&
+          inst->dst.type == inst->src[0].type &&
          (inst->opcode == BRW_OPCODE_MOV ||
           (inst->opcode == BRW_OPCODE_SEL &&
            inst->predicate != BRW_PREDICATE_NONE &&
--- a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
@@ -43,6 +43,7 @@
 #include "glsl/ir_visitor.h"
 #include "glsl/ir_rvalue_visitor.h"
 #include "glsl/glsl_types.h"
+#include "util/hash_table.h"

 static bool debug = false;

@@ -72,7 +73,8 @@ public:
   ir_vector_reference_visitor(void)
   {
      this->mem_ctx = ralloc_context(NULL);
-      this->variable_list.make_empty();
+      this->ht = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+                                         _mesa_key_pointer_equal);
   }

   ~ir_vector_reference_visitor(void)
@@ -89,7 +91,7 @@ public:
   variable_entry *get_variable_entry(ir_variable *var);

   /* List of variable_entry */
-   exec_list variable_list;
+   struct hash_table *ht;

   void *mem_ctx;
 };
@@ -119,13 +121,12 @@ ir_vector_reference_visitor::get_variable_entry(ir_variable *var)
      break;
   }

-   foreach_in_list(variable_entry, entry, &variable_list) {
-      if (entry->var == var)
-	 return entry;
-   }
+   struct hash_entry *hte = _mesa_hash_table_search(ht, var);
+   if (hte)
+      return (struct variable_entry *) hte->data;

   variable_entry *entry = new(mem_ctx) variable_entry(var);
-   this->variable_list.push_tail(entry);
+   _mesa_hash_table_insert(ht, var, entry);
   return entry;
 }

@@ -195,9 +196,9 @@ ir_vector_reference_visitor::visit_enter(ir_function_signature *ir)

 class ir_vector_splitting_visitor : public ir_rvalue_visitor {
 public:
-   ir_vector_splitting_visitor(exec_list *vars)
+   ir_vector_splitting_visitor(struct hash_table *vars)
   {
-      this->variable_list = vars;
+      this->ht = vars;
   }

   virtual ir_visitor_status visit_leave(ir_assignment *);
@@ -205,7 +206,7 @@ public:
   void handle_rvalue(ir_rvalue **rvalue);
   variable_entry *get_splitting_entry(ir_variable *var);

-   exec_list *variable_list;
+   struct hash_table *ht;
 };

 variable_entry *
@@ -216,13 +217,8 @@ ir_vector_splitting_visitor::get_splitting_entry(ir_variable *var)
   if (!var->type->is_vector())
      return NULL;

-   foreach_in_list(variable_entry, entry, variable_list) {
-      if (entry->var == var) {
-	 return entry;
-      }
-   }
-
-   return NULL;
+   struct hash_entry *hte = _mesa_hash_table_search(ht, var);
+   return hte ? (struct variable_entry *) hte->data : NULL;
 }

 void
@@ -329,12 +325,15 @@ ir_vector_splitting_visitor::visit_leave(ir_assignment *ir)
 bool
 brw_do_vector_splitting(exec_list *instructions)
 {
+   struct hash_entry *hte;
+
   ir_vector_reference_visitor refs;

   visit_list_elements(&refs, instructions);

   /* Trim out variables we can't split. */
-   foreach_in_list_safe(variable_entry, entry, &refs.variable_list) {
+   hash_table_foreach(refs.ht, hte) {
+      struct variable_entry *entry = (struct variable_entry *) hte->data;
      if (debug) {
 	 fprintf(stderr, "vector %s@%p: whole_access %d\n",
                 entry->var->name, (void *) entry->var,
@@ -342,11 +341,11 @@ brw_do_vector_splitting(exec_list *instructions)
      }

      if (entry->whole_vector_access) {
-	 entry->remove();
+         _mesa_hash_table_remove(refs.ht, hte);
      }
   }

-   if (refs.variable_list.is_empty())
+   if (refs.ht->entries == 0)
      return false;

   void *mem_ctx = ralloc_context(NULL);
@@ -354,7 +353,8 @@ brw_do_vector_splitting(exec_list *instructions)
   /* Replace the decls of the vectors to be split with their split
    * components.
    */
-   foreach_in_list(variable_entry, entry, &refs.variable_list) {
+   hash_table_foreach(refs.ht, hte) {
+      struct variable_entry *entry = (struct variable_entry *) hte->data;
      const struct glsl_type *type;
      type = glsl_type::get_instance(entry->var->type->base_type, 1, 1);

@@ -378,7 +378,7 @@ brw_do_vector_splitting(exec_list *instructions)
      entry->var->remove();
   }

-   ir_vector_splitting_visitor split(&refs.variable_list);
+   ir_vector_splitting_visitor split(refs.ht);
   visit_list_elements(&split, instructions);

   ralloc_free(mem_ctx);
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -878,7 +878,8 @@ brw_upload_invariant_state(struct brw_context *brw)
 {
   const bool is_965 = brw->gen == 4 && !brw->is_g4x;

-   brw_select_pipeline(brw, BRW_RENDER_PIPELINE);
+   brw_emit_select_pipeline(brw, BRW_RENDER_PIPELINE);
+   brw->last_pipeline = BRW_RENDER_PIPELINE;

   if (brw->gen < 6) {
      /* Disable depth offset clamping. */
--- a/src/mesa/drivers/dri/i965/gen8_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/gen8_draw_upload.c
@@ -40,16 +40,25 @@ gen8_emit_vertices(struct brw_context *brw)
 {
   struct gl_context *ctx = &brw->ctx;
   uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
+   bool uses_edge_flag;

   brw_prepare_vertices(brw);
   brw_prepare_shader_draw_parameters(brw);

+   uses_edge_flag = (ctx->Polygon.FrontMode != GL_FILL ||
+                     ctx->Polygon.BackMode != GL_FILL);
+
   if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) {
      unsigned vue = brw->vb.nr_enabled;

-      WARN_ONCE(brw->vs.prog_data->inputs_read & VERT_BIT_EDGEFLAG,
-                "Using VID/IID with edgeflags, need to reorder the "
-                "vertex attributes");
+      /* The element for the edge flags must always be last, so we have to
+       * insert the SGVS before it in that case.
+       */
+      if (uses_edge_flag) {
+         assert(vue > 0);
+         vue--;
+      }
+
      WARN_ONCE(vue >= 33,
                "Trying to insert VID/IID past 33rd vertex element, "
                "need to reorder the vertex attrbutes.");
@@ -138,7 +147,18 @@ gen8_emit_vertices(struct brw_context *brw)
      ADVANCE_BATCH();
   }

-   unsigned nr_elements = brw->vb.nr_enabled + brw->vs.prog_data->uses_vertexid;
+   /* Normally we don't need an element for the SGVS attribute because the
+    * 3DSTATE_VF_SGVS instruction lets you store the generated attribute in an
+    * element that is past the list in 3DSTATE_VERTEX_ELEMENTS. However if the
+    * vertex ID is used then it needs an element for the base vertex buffer.
+    * Additionally if there is an edge flag element then the SGVS can't be
+    * inserted past that so we need a dummy element to ensure that the edge
+    * flag is the last one.
+    */
+   bool needs_sgvs_element = (brw->vs.prog_data->uses_vertexid ||
+                              (brw->vs.prog_data->uses_instanceid &&
+                               uses_edge_flag));
+   unsigned nr_elements = brw->vb.nr_enabled + needs_sgvs_element;

   /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS,
    * presumably for VertexID/InstanceID.
@@ -192,6 +212,24 @@ gen8_emit_vertices(struct brw_context *brw)
                (comp3 << BRW_VE1_COMPONENT_3_SHIFT));
   }

+   if (needs_sgvs_element) {
+      if (brw->vs.prog_data->uses_vertexid) {
+         OUT_BATCH(GEN6_VE0_VALID |
+                   brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT |
+                   BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT);
+         OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
+                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
+                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
+                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
+      } else {
+         OUT_BATCH(GEN6_VE0_VALID);
+         OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
+                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
+                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
+                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
+      }
+   }
+
   if (gen6_edgeflag_input) {
      uint32_t format =
         brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
@@ -206,25 +244,26 @@ gen8_emit_vertices(struct brw_context *brw)
                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
   }
-
-   if (brw->vs.prog_data->uses_vertexid) {
-      OUT_BATCH(GEN6_VE0_VALID |
-                brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT |
-                BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT);
-      OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
-                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
-                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
-                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
-   }
   ADVANCE_BATCH();

-   for (unsigned i = 0; i < brw->vb.nr_enabled; i++) {
+   for (unsigned i = 0, j = 0; i < brw->vb.nr_enabled; i++) {
      const struct brw_vertex_element *input = brw->vb.enabled[i];
      const struct brw_vertex_buffer *buffer = &brw->vb.buffers[input->buffer];
+      unsigned element_index;
+
+      /* The edge flag element is reordered to be the last one in the code
+       * above so we need to compensate for that in the element indices used
+       * below.
+       */
+      if (input == gen6_edgeflag_input)
+         element_index = nr_elements - 1;
+      else
+         element_index = j++;

      BEGIN_BATCH(3);
      OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2));
-      OUT_BATCH(i | (buffer->step_rate ? GEN8_VF_INSTANCING_ENABLE : 0));
+      OUT_BATCH(element_index |
+                (buffer->step_rate ? GEN8_VF_INSTANCING_ENABLE : 0));
      OUT_BATCH(buffer->step_rate);
      ADVANCE_BATCH();
   }
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -796,47 +796,43 @@ intel_emit_linear_blit(struct brw_context *brw,
   int16_t src_x, dst_x;
   bool ok;

-   /* The pitch given to the GPU must be DWORD aligned, and
-    * we want width to match pitch. Max width is (1 << 15 - 1),
-    * rounding that down to the nearest DWORD is 1 << 15 - 4
-    */
-   pitch = ROUND_DOWN_TO(MIN2(size, (1 << 15) - 1), 4);
-   height = (pitch == 0) ? 1 : size / pitch;
-   src_x = src_offset % 64;
-   dst_x = dst_offset % 64;
-   ok = intelEmitCopyBlit(brw, 1,
-			  pitch, src_bo, src_offset - src_x, I915_TILING_NONE,
-                          INTEL_MIPTREE_TRMODE_NONE,
-			  pitch, dst_bo, dst_offset - dst_x, I915_TILING_NONE,
-                          INTEL_MIPTREE_TRMODE_NONE,
-			  src_x, 0, /* src x/y */
-			  dst_x, 0, /* dst x/y */
-			  pitch, height, /* w, h */
-			  GL_COPY);
-   if (!ok)
-      _mesa_problem(ctx, "Failed to linear blit %dx%d\n", pitch, height);
+   do {
+      /* The pitch given to the GPU must be DWORD aligned, and
+       * we want width to match pitch. Max width is (1 << 15 - 1),
+       * rounding that down to the nearest DWORD is 1 << 15 - 4
+       */
+      pitch = ROUND_DOWN_TO(MIN2(size, (1 << 15) - 64), 4);
+      height = (size < pitch || pitch == 0) ? 1 : size / pitch;

-   src_offset += pitch * height;
-   dst_offset += pitch * height;
-   src_x = src_offset % 64;
-   dst_x = dst_offset % 64;
-   size -= pitch * height;
-   assert (size < (1 << 15));
-   pitch = ALIGN(size, 4);
+      src_x = src_offset % 64;
+      dst_x = dst_offset % 64;
+      pitch = ALIGN(MIN2(size, (1 << 15) - 64), 4);
+      assert(src_x + pitch < 1 << 15);
+      assert(dst_x + pitch < 1 << 15);

-   if (size != 0) {
      ok = intelEmitCopyBlit(brw, 1,
-			     pitch, src_bo, src_offset - src_x, I915_TILING_NONE,
+                             pitch, src_bo, src_offset - src_x, I915_TILING_NONE,
                             INTEL_MIPTREE_TRMODE_NONE,
-			     pitch, dst_bo, dst_offset - dst_x, I915_TILING_NONE,
+                             pitch, dst_bo, dst_offset - dst_x, I915_TILING_NONE,
                             INTEL_MIPTREE_TRMODE_NONE,
-			     src_x, 0, /* src x/y */
-			     dst_x, 0, /* dst x/y */
-			     size, 1, /* w, h */
-			     GL_COPY);
-      if (!ok)
-         _mesa_problem(ctx, "Failed to linear blit %dx%d\n", size, 1);
-   }
+                             src_x, 0, /* src x/y */
+                             dst_x, 0, /* dst x/y */
+                             MIN2(size, pitch), height, /* w, h */
+                             GL_COPY);
+      if (!ok) {
+         _mesa_problem(ctx, "Failed to linear blit %dx%d\n",
+                       MIN2(size, pitch), height);
+         return;
+      }
+
+      pitch *= height;
+      if (size <= pitch)
+         return;
+
+      src_offset += pitch;
+      dst_offset += pitch;
+      size -= pitch;
+   } while (1);
 }

 /**
--- a/src/mesa/drivers/dri/i965/intel_pixel_read.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c
@@ -109,6 +109,10 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx,
       pack->Invert)
      return false;

+   /* Only a simple blit, no scale, bias or other mapping. */
+   if (ctx->_ImageTransferState)
+      return false;
+
   /* This renderbuffer can come from a texture.  In this case, we impose
    * some of the same restrictions we have for textures and adjust for
    * miplevels.
--- a/src/mesa/drivers/dri/i965/intel_tex_copy.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_copy.c
@@ -55,6 +55,10 @@ intel_copy_texsubimage(struct brw_context *brw,
   const GLenum internalFormat = intelImage->base.Base.InternalFormat;
   bool ret;

+   /* No pixel transfer operations (zoom, bias, mapping), just a blit */
+   if (brw->ctx._ImageTransferState)
+      return false;
+
   intel_prepare_render(brw);

   /* glCopyTexSubImage() can be called on a multisampled renderbuffer (if
--- a/src/mesa/drivers/dri/i965/intel_tex_subimage.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
@@ -118,6 +118,10 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx,
       packing->Invert)
      return false;

+   /* Only a simple blit, no scale, bias or other mapping. */
+   if (ctx->_ImageTransferState)
+      return false;
+
   if (!intel_get_memcpy(texImage->TexFormat, format, type, &mem_copy, &cpp,
                         INTEL_UPLOAD))
      return false;
--- a/src/mesa/main/formats.c
+++ b/src/mesa/main/formats.c
@@ -1013,6 +1013,10 @@ _mesa_format_to_type_and_comps(mesa_format format,
   case MESA_FORMAT_R8G8B8X8_UNORM:
   case MESA_FORMAT_B8G8R8X8_UNORM:
   case MESA_FORMAT_X8R8G8B8_UNORM:
+   case MESA_FORMAT_A8B8G8R8_UINT:
+   case MESA_FORMAT_R8G8B8A8_UINT:
+   case MESA_FORMAT_B8G8R8A8_UINT:
+   case MESA_FORMAT_A8R8G8B8_UINT:
      *datatype = GL_UNSIGNED_BYTE;
      *comps = 4;
      return;
@@ -1023,6 +1027,8 @@ _mesa_format_to_type_and_comps(mesa_format format,
      return;
   case MESA_FORMAT_B5G6R5_UNORM:
   case MESA_FORMAT_R5G6B5_UNORM:
+   case MESA_FORMAT_B5G6R5_UINT:
+   case MESA_FORMAT_R5G6B5_UINT:
      *datatype = GL_UNSIGNED_SHORT_5_6_5;
      *comps = 3;
      return;
@@ -1030,6 +1036,8 @@ _mesa_format_to_type_and_comps(mesa_format format,
   case MESA_FORMAT_B4G4R4A4_UNORM:
   case MESA_FORMAT_A4R4G4B4_UNORM:
   case MESA_FORMAT_B4G4R4X4_UNORM:
+   case MESA_FORMAT_B4G4R4A4_UINT:
+   case MESA_FORMAT_A4R4G4B4_UINT:
      *datatype = GL_UNSIGNED_SHORT_4_4_4_4;
      *comps = 4;
      return;
@@ -1037,6 +1045,8 @@ _mesa_format_to_type_and_comps(mesa_format format,
   case MESA_FORMAT_B5G5R5A1_UNORM:
   case MESA_FORMAT_A1R5G5B5_UNORM:
   case MESA_FORMAT_B5G5R5X1_UNORM:
+   case MESA_FORMAT_B5G5R5A1_UINT:
+   case MESA_FORMAT_A1R5G5B5_UINT:
      *datatype = GL_UNSIGNED_SHORT_1_5_5_5_REV;
      *comps = 4;
      return;
@@ -1047,6 +1057,7 @@ _mesa_format_to_type_and_comps(mesa_format format,
      return;

   case MESA_FORMAT_A1B5G5R5_UNORM:
+   case MESA_FORMAT_A1B5G5R5_UINT:
      *datatype = GL_UNSIGNED_SHORT_5_5_5_1;
      *comps = 4;
      return;
@@ -1081,19 +1092,23 @@ _mesa_format_to_type_and_comps(mesa_format format,
      return;

   case MESA_FORMAT_R3G3B2_UNORM:
+   case MESA_FORMAT_R3G3B2_UINT:
      *datatype = GL_UNSIGNED_BYTE_2_3_3_REV;
      *comps = 3;
      return;
   case MESA_FORMAT_A4B4G4R4_UNORM:
+   case MESA_FORMAT_A4B4G4R4_UINT:
      *datatype = GL_UNSIGNED_SHORT_4_4_4_4;
      *comps = 4;
      return;

   case MESA_FORMAT_R4G4B4A4_UNORM:
+   case MESA_FORMAT_R4G4B4A4_UINT:
      *datatype = GL_UNSIGNED_SHORT_4_4_4_4;
      *comps = 4;
      return;
   case MESA_FORMAT_R5G5B5A1_UNORM:
+   case MESA_FORMAT_R5G5B5A1_UINT:
      *datatype = GL_UNSIGNED_SHORT_1_5_5_5_REV;
      *comps = 4;
      return;
@@ -1109,6 +1124,7 @@ _mesa_format_to_type_and_comps(mesa_format format,
      return;

   case MESA_FORMAT_B2G3R3_UNORM:
+   case MESA_FORMAT_B2G3R3_UINT:
      *datatype = GL_UNSIGNED_BYTE_3_3_2;
      *comps = 3;
      return;
@@ -2138,6 +2154,96 @@ _mesa_format_matches_format_and_type(mesa_format mesa_format,
              type == GL_UNSIGNED_INT_2_10_10_10_REV &&
              !swapBytes);

+   case MESA_FORMAT_B5G6R5_UINT:
+      return format == GL_RGB_INTEGER && type == GL_UNSIGNED_SHORT_5_6_5;
+
+   case MESA_FORMAT_R5G6B5_UINT:
+      return format == GL_RGB_INTEGER && type == GL_UNSIGNED_SHORT_5_6_5_REV;
+
+   case MESA_FORMAT_B2G3R3_UINT:
+      return format == GL_RGB_INTEGER && type == GL_UNSIGNED_BYTE_3_3_2;
+
+   case MESA_FORMAT_R3G3B2_UINT:
+      return format == GL_RGB_INTEGER && type == GL_UNSIGNED_BYTE_2_3_3_REV;
+
+   case MESA_FORMAT_A4B4G4R4_UINT:
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_SHORT_4_4_4_4 && !swapBytes)
+         return GL_TRUE;
+
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_SHORT_4_4_4_4_REV && swapBytes)
+         return GL_TRUE;
+      return GL_FALSE;
+
+   case MESA_FORMAT_R4G4B4A4_UINT:
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_SHORT_4_4_4_4_REV && !swapBytes)
+         return GL_TRUE;
+
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_SHORT_4_4_4_4 && swapBytes)
+         return GL_TRUE;
+
+      return GL_FALSE;
+
+   case MESA_FORMAT_B4G4R4A4_UINT:
+      return format == GL_BGRA_INTEGER && type == GL_UNSIGNED_SHORT_4_4_4_4_REV &&
+         !swapBytes;
+
+   case MESA_FORMAT_A4R4G4B4_UINT:
+      return GL_FALSE;
+
+   case MESA_FORMAT_A1B5G5R5_UINT:
+      return format == GL_RGBA_INTEGER && type == GL_UNSIGNED_SHORT_5_5_5_1 &&
+         !swapBytes;
+
+   case MESA_FORMAT_B5G5R5A1_UINT:
+      return format == GL_BGRA_INTEGER && type == GL_UNSIGNED_SHORT_1_5_5_5_REV &&
+         !swapBytes;
+
+   case MESA_FORMAT_A1R5G5B5_UINT:
+      return format == GL_BGRA_INTEGER && type == GL_UNSIGNED_SHORT_5_5_5_1 &&
+         !swapBytes;
+
+   case MESA_FORMAT_R5G5B5A1_UINT:
+      return format == GL_RGBA_INTEGER && type == GL_UNSIGNED_SHORT_1_5_5_5_REV;
+
+   case MESA_FORMAT_A8B8G8R8_UINT:
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8 && !swapBytes)
+         return GL_TRUE;
+
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8_REV && swapBytes)
+         return GL_TRUE;
+      return GL_FALSE;
+
+   case MESA_FORMAT_A8R8G8B8_UINT:
+      if (format == GL_BGRA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8 &&
+          !swapBytes)
+         return GL_TRUE;
+
+      if (format == GL_BGRA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8_REV &&
+          swapBytes)
+         return GL_TRUE;
+
+      return GL_FALSE;
+
+   case MESA_FORMAT_R8G8B8A8_UINT:
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8_REV &&
+          !swapBytes)
+         return GL_TRUE;
+
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8 && swapBytes)
+         return GL_TRUE;
+
+      return GL_FALSE;
+
+   case MESA_FORMAT_B8G8R8A8_UINT:
+      if (format == GL_BGRA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8_REV &&
+          !swapBytes)
+         return GL_TRUE;
+
+      if (format == GL_BGRA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8 && swapBytes)
+         return GL_TRUE;
+
+      return GL_FALSE;
+
   case MESA_FORMAT_R9G9B9E5_FLOAT:
      return format == GL_RGB && type == GL_UNSIGNED_INT_5_9_9_9_REV &&
         !swapBytes;
--- a/Show More
+++ b/Show More