docs: add release notes for 11.0.1

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Update version to 11.0.1
2015-09-26 13:32:07 +01:00 · 2015-09-26 13:22:34 +01:00 · 2015-09-23 21:10:42 +01:00 · 2015-09-23 21:10:11 +01:00 · 2015-09-23 21:09:41 +01:00 · 2015-09-23 21:09:10 +01:00
81 changed files with 1255 additions and 420 deletions
--- a/Android.common.mk
+++ b/Android.common.mk
@@ -42,6 +42,7 @@ LOCAL_CFLAGS += \
 	-DANDROID_VERSION=0x0$(MESA_ANDROID_MAJOR_VERSION)0$(MESA_ANDROID_MINOR_VERSION)

 LOCAL_CFLAGS += \
+	-D__STDC_LIMIT_MACROS \
 	-DHAVE___BUILTIN_EXPECT \
 	-DHAVE___BUILTIN_FFS \
 	-DHAVE___BUILTIN_FFSLL \
@@ -70,7 +71,7 @@ endif

 ifeq ($(MESA_ENABLE_LLVM),true)
 LOCAL_CFLAGS += \
-	-DHAVE_LLVM=0x0305 -DLLVM_VERSION_PATCH=2 \
+	-DHAVE_LLVM=0x0305 -DMESA_LLVM_VERSION_PATCH=2 \
 	-D__STDC_CONSTANT_MACROS \
 	-D__STDC_FORMAT_MACROS \
 	-D__STDC_LIMIT_MACROS
--- a/2
+++ b/2
@@ -1 +1 @@
-11.0.0-rc3
+11.0.1
--- a/configure.ac
+++ b/configure.ac
@@ -1150,6 +1150,16 @@ AC_SUBST(GLX_TLS, ${GLX_USE_TLS})
 AS_IF([test "x$GLX_USE_TLS" = xyes -a "x$ax_pthread_ok" = xyes],
      [DEFINES="${DEFINES} -DGLX_USE_TLS"])

+dnl Read-only text section on x86 hardened platforms
+AC_ARG_ENABLE([glx-read-only-text],
+    [AS_HELP_STRING([--enable-glx-read-only-text],
+        [Disable writable .text section on x86 (decreases performance) @<:@default=disabled@:>@])],
+    [enable_glx_read_only_text="$enableval"],
+    [enable_glx_read_only_text=no])
+if test "x$enable_glx_read_only_text" = xyes; then
+    DEFINES="$DEFINES -DGLX_X86_READONLY_TEXT"
+fi
+
 dnl
 dnl More DRI setup
 dnl
--- a/docs/relnotes/11.0.0.html
+++ b/docs/relnotes/11.0.0.html
@@ -14,7 +14,7 @@
 <iframe src="../contents.html"></iframe>
 <div class="content">

-<h1>Mesa 11.0.0 Release Notes / TBD</h1>
+<h1>Mesa 11.0.0 Release Notes / September 12, 2015</h1>

 <p>
 Mesa 11.0.0 is a new development release.
@@ -33,7 +33,8 @@ because compatibility contexts are not supported.

 <h2>SHA256 checksums</h2>
 <pre>
-TBD.
+7d7e4ddffa3b162506efa01e2cc41e329caa4995336b92e5cc21f2e1fb36c1b3  mesa-11.0.0.tar.gz
+e095a3eb2eca9dfde7efca8946527c8ae20a0cc938a8c78debc7f158ad44af32  mesa-11.0.0.tar.xz
 </pre>


@@ -83,13 +84,175 @@ Note: some of the new features are only available with certain drivers.
 <li>EGL 1.5 on r600, radeonsi, nv50, nvc0</li>
 </ul>

+
 <h2>Bug fixes</h2>

-TBD.
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=51658">Bug 51658</a> - r200 (&amp; possibly radeon) DRI fixes for gnome shell on Mesa 8.0.3</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=65525">Bug 65525</a> - [llvmpipe] lp_scene.h:210:lp_scene_alloc: Assertion `size &lt;= (64 * 1024)' failed.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=66346">Bug 66346</a> - shader_query.cpp:49: error: invalid conversion from 'void*' to 'GLuint'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=73512">Bug 73512</a> - [clover] mesa.icd. should contain full path</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=73528">Bug 73528</a> - Deferred lighting in Second Life causes system hiccups and screen flickering</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=74329">Bug 74329</a> - Please expose OES_texture_float and OES_texture_half_float on the ES3 context</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=80500">Bug 80500</a> - Flickering shadows in unreleased title trace</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=82186">Bug 82186</a> - [r600g] BARTS GPU lockup with minecraft shaders</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=84225">Bug 84225</a> - Allow constant-index-expression sampler array indexing with GLSL-ES &lt; 300</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=84677">Bug 84677</a> - Triangle disappears with glPolygonMode GL_LINE</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=85252">Bug 85252</a> - Segfault in compiler while processing ternary operator with void arguments</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89131">Bug 89131</a> - [Bisected] Graphical corruption in Weston,  shows old framebuffer pieces</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90000">Bug 90000</a> - [i965 Bisected NIR] Piglit/gglean_fragprog1-z-write_test fail</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90073">Bug 90073</a> - Leaks in xcb_dri3_open_reply_fds() and get_render_node_from_id_path_tag</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90249">Bug 90249</a> - Fails to build egl_dri2 on osx</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90310">Bug 90310</a> - Fails to build gallium_dri.so at linking stage with clang because of multiple redefinitions</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90347">Bug 90347</a> - [NVE0+] Failure to insert texbar under some circumstances (causing bad colors in Terasology)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90466">Bug 90466</a> - arm: linker error ndefined reference to `nir_metadata_preserve'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90520">Bug 90520</a> - Register spilling clobbers registers used elsewhere in the shader</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90537">Bug 90537</a> - radeonsi bo/va conflict on RADEON_GEM_VA (rscreen-&gt;ws-&gt;buffer_from_handle returns NULL)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90547">Bug 90547</a> - [BDW/BSW/SKL Bisected]Piglit/glean&#64;vertprog1-rsq_test_2_(reciprocal_square_root_of_negative_value) fais</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90580">Bug 90580</a> - [HSW bisected] integer multiplication bug</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90600">Bug 90600</a> - IOError: [Errno 2] No such file or directory: 'gl_API.xml'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90621">Bug 90621</a> - Mesa fail to build from git</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90629">Bug 90629</a> - [i965] SIMD16 dual_source_blend assertion `src[i].file != GRF || src[i].width == dst.width' failed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90691">Bug 90691</a> - [BSW]Piglit/spec/nv_conditional_render/dlist fails intermittently</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90728">Bug 90728</a> - dvd playback with vlc and vdpau causes segmentation fault</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90734">Bug 90734</a> - glBufferSubData is corrupting data when buffer is &gt; 32k</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90748">Bug 90748</a> - [BDW Bisected]dEQP-GLES3.functional.fbo.completeness.renderable.texture.depth.rg_half_float_oes fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90749">Bug 90749</a> - [BDW Bisected]dEQP-GLES3.functional.rasterization.fbo.rbo_multisample_max.primitives.lines_wide fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90751">Bug 90751</a> - [BDW Bisected]dEQP-GLES3.functional.fbo.completeness.renderable.texture.stencil.stencil_index8 fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90797">Bug 90797</a> - [ALL bisected] Mesa change cause performance case manhattan fail.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90817">Bug 90817</a> - swrast fails to load with certain remote X servers</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90830">Bug 90830</a> - [bsw bisected regression] GPU hang for spec.arb_gpu_shader5.execution.sampler_array_indexing.vs-nonzero-base</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90839">Bug 90839</a> - [10.5.5/10.6 regression, bisected] PBO glDrawPixels no longer using blit fastpath</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90873">Bug 90873</a> - Kernel hang, TearFree On, Mate desktop environment</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90887">Bug 90887</a> - PhiMovesPass in register allocator broken</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90895">Bug 90895</a> - [IVB/HSW/BDW/BSW Bisected] GLB2.7 Egypt, GfxBench3.0 T-Rex &amp; ALU and many SynMark cases performance reduced by 10-23%</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90902">Bug 90902</a> - [bsw][regression] dEQP: &quot;Found invalid pixel values&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90903">Bug 90903</a> - egl_dri2.c:dri2_load fails to load libglapi on osx</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90904">Bug 90904</a> - OSX: EXC_BAD_ACCESS when using translate_sse + gallium + softpipe/llvmpipe</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90905">Bug 90905</a> - mesa: Finish subdir-objects transition</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90925">Bug 90925</a> - &quot;high fidelity&quot;: Segfault in _mesa_program_resource_find_name</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91022">Bug 91022</a> - [g45 g965 bisected] assertions generated from textureGrad cube samplers fix</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91047">Bug 91047</a> - [SNB Bisected] Messed up Fog in Super Smash Bros. Melee in Dolphin</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91056">Bug 91056</a> - The Bard's Tale (2005, native)  has rendering issues</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91077">Bug 91077</a> - dri2_glx.c:1186: undefined reference to `loader_open_device'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91099">Bug 91099</a> - [llvmpipe] piglit glsl-max-varyings &gt;max_varying_components regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91101">Bug 91101</a> - [softpipe] piglit glsl-1.50&#64;execution&#64;geometry&#64;max-input-components regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91117">Bug 91117</a> - Nimbus (running in wine) has rendering issues, objects are semi-transparent</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91124">Bug 91124</a> - Civilization V (in Wine) has rendering issues: text missing, menu bar corrupted</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91173">Bug 91173</a> - Oddworld: Stranger's Wrath HD: disfigured models in wrong colors</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91193">Bug 91193</a> - [290x] Dota2 reborn ingame rendering breaks with git-af4b9c7</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91222">Bug 91222</a> - lp_test_format regression on CentOS 7</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91226">Bug 91226</a> - Crash in glLinkProgram (NEW)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91231">Bug 91231</a> - [NV92] Psychonauts (native) segfaults on start when DRI3 enabled</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91254">Bug 91254</a> - (regresion) video using VA-API on Intel slow and freeze system with mesa 10.6 or 10.6.1</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91290">Bug 91290</a> - SIGSEGV glcpp/glcpp-parse.y:1077</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91292">Bug 91292</a> - [BDW+] glVertexAttribDivisor not working in combination with glPolygonMode</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91337">Bug 91337</a> - OSMesaGetProcAdress(&quot;OSMesaPixelStore&quot;) returns nil</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91418">Bug 91418</a> - Visual Studio 2015 vsnprintf build error</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91425">Bug 91425</a> - [regression, bisected] Piglit spec/ext_packed_float/ getteximage-invalid-format-for-packed-type fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91441">Bug 91441</a> - make check DispatchSanity_test.GL30 regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91444">Bug 91444</a> - regression bisected radeonsi: don't change pipe_resource in resource_copy_region</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91461">Bug 91461</a> - gl_TessLevel* writes have no effect for all but the last TCS invocation</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91513">Bug 91513</a> - [IVB/HSW/BDW/SKL Bisected] Lightsmark performance reduced by 7%-10%</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91526">Bug 91526</a> - World of Warcraft (on Wine) has UI corruption with nouveau</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91544">Bug 91544</a> - [i965, regression, bisected] regression of several tests in 93977d3a151675946c03e</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91551">Bug 91551</a> - DXTn compressed normal maps produce severe artifacts on all NV5x and NVDx chipsets</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91570">Bug 91570</a> - Upgrading mesa to 10.6 causes segfault in OpenGL applications with GeForce4 MX 440 / AGP 8X</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91591">Bug 91591</a> - rounding.h:102:2: error: #error &quot;Unsupported or undefined LONG_BIT&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91610">Bug 91610</a> - [BSW] GPU hang for spec.shaders.point-vertex-id gl_instanceid divisor</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91673">Bug 91673</a> - Segfault when calling glTexSubImage2D on storage texture to bound FBO</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91726">Bug 91726</a> - R600 asserts in tgsi_cmp/make_src_for_op3</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91847">Bug 91847</a> - glGenerateTextureMipmap not working (no errors) unless glActiveTexture(GL_TEXTURE1) is called before</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91857">Bug 91857</a> - Mesa 10.6.3 linker is slow</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91881">Bug 91881</a> - regression: GPU lockups since mesa-11.0.0_rc1 on RV620 (r600) driver</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91890">Bug 91890</a> - [nve7] witcher2: blurry image &amp; DATA_ERRORs (class 0xa097 mthd 0x2380/0x238c)</li>
+
+</ul>
+

 <h2>Changes</h2>

-TBD.
+<li>Removed the EGL loader from the Linux SCons build.</li>

 </div>
 </body>
--- a/docs/relnotes/11.0.1.html
+++ b/docs/relnotes/11.0.1.html
@@ -0,0 +1,133 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 11.0.1 Release Notes / September 26, 2015</h1>
+
+<p>
+Mesa 11.0.1 is a bug fix release which fixes bugs found since the 11.0.0 release.
+</p>
+<p>
+Mesa 11.0.1 implements the OpenGL 4.1 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.1.  OpenGL
+4.1 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+TBD
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=38109">Bug 38109</a> - i915 driver crashes if too few vertices are submitted (Mesa 7.10.2)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91114">Bug 91114</a> - ES3-CTS.gtf.GL3Tests.shadow.shadow_execution_vert fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91716">Bug 91716</a> - [bisected] piglit.shaders.glsl-vs-int-attrib regresses on 32 bit BYT, HSW, IVB, SNB</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91719">Bug 91719</a> - [SNB,HSW,BYT] dEQP regressions associated with using NIR for vertex shaders</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92009">Bug 92009</a> - ES3-CTS.gtf.GL3Tests.packed_pixels.packed_pixels fails</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Antia Puentes (2):</p>
+<ul>
+  <li>i965/vec4: Fix saturation errors when coalescing registers</li>
+  <li>i965/vec4_nir: Load constants as integers</li>
+</ul>
+
+<p>Anuj Phogat (1):</p>
+<ul>
+  <li>meta: Abort meta pbo path if TexSubImage need signed unsigned conversion</li>
+</ul>
+
+<p>Emil Velikov (2):</p>
+<ul>
+  <li>docs: add sha256 checksums for 11.0.0</li>
+  <li>Update version to 11.0.1</li>
+</ul>
+
+<p>Iago Toral Quiroga (1):</p>
+<ul>
+  <li>mesa: Fix GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE for default framebuffer.</li>
+</ul>
+
+<p>Ian Romanick (5):</p>
+<ul>
+  <li>t_dd_dmatmp: Make "count" actually be the count</li>
+  <li>t_dd_dmatmp: Clean up improper code formatting from previous patch</li>
+  <li>t_dd_dmatmp: Use '&amp; 3' instead of '% 4' everywhere</li>
+  <li>t_dd_dmatmp: Pull out common 'count -= count &amp; 3' code</li>
+  <li>t_dd_dmatmp: Use addition instead of subtraction in loop bounds</li>
+</ul>
+
+<p>Ilia Mirkin (6):</p>
+<ul>
+  <li>st/mesa: avoid integer overflows with buffers &gt;= 512MB</li>
+  <li>nv50, nvc0: fix max texture buffer size to 128M elements</li>
+  <li>freedreno/a3xx: fix blending of L8 format</li>
+  <li>nv50,nvc0: detect underlying resource changes and update tic</li>
+  <li>nv50,nvc0: flush texture cache in presence of coherent bufs</li>
+  <li>radeonsi: load fmask ptr relative to the resources array</li>
+</ul>
+
+<p>Jason Ekstrand (2):</p>
+<ul>
+  <li>nir: Fix a bunch of ralloc parenting errors</li>
+  <li>i965/vec4: Don't reswizzle hardware registers</li>
+</ul>
+
+<p>Jeremy Huddleston (1):</p>
+<ul>
+  <li>configure.ac: Add support to enable read-only text segment on x86.</li>
+</ul>
+
+<p>Ray Strode (1):</p>
+<ul>
+  <li>gbm: convert gbm bo format to fourcc format on dma-buf import</li>
+</ul>
+
+<p>Tapani Pälli (2):</p>
+<ul>
+  <li>mesa: fix errors when reading depth with glReadPixels</li>
+  <li>i965: fix textureGrad for cubemaps</li>
+</ul>
+
+<p>Ulrich Weigand (1):</p>
+<ul>
+  <li>mesa: Fix texture compression on big-endian systems</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/src/gallium/auxiliary/Makefile.am
+++ b/src/gallium/auxiliary/Makefile.am
@@ -38,18 +38,23 @@ libgallium_la_SOURCES += \

 endif

-indices/u_indices_gen.c: $(srcdir)/indices/u_indices_gen.py
-	$(AM_V_at)$(MKDIR_P) indices
-	$(AM_V_GEN) $(PYTHON2) $< > $@
+MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
+PYTHON_GEN =  $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)

-indices/u_unfilled_gen.c: $(srcdir)/indices/u_unfilled_gen.py
-	$(AM_V_at)$(MKDIR_P) indices
-	$(AM_V_GEN) $(PYTHON2) $< > $@
+indices/u_indices_gen.c: indices/u_indices_gen.py
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/indices/u_indices_gen.py > $@

-util/u_format_table.c: $(srcdir)/util/u_format_table.py $(srcdir)/util/u_format_pack.py $(srcdir)/util/u_format_parse.py $(srcdir)/util/u_format.csv
-	$(AM_V_at)$(MKDIR_P) util
-	$(AM_V_GEN) $(PYTHON2) $(srcdir)/util/u_format_table.py $(srcdir)/util/u_format.csv > $@
+indices/u_unfilled_gen.c: indices/u_unfilled_gen.py
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/indices/u_unfilled_gen.py > $@

+util/u_format_table.c: util/u_format_table.py \
+                       util/u_format_pack.py \
+                       util/u_format_parse.py \
+                       util/u_format.csv
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/util/u_format_table.py $(srcdir)/util/u_format.csv > $@

 noinst_LTLIBRARIES += libgalliumvl_stub.la
 libgalliumvl_stub_la_SOURCES = \
--- a/src/gallium/drivers/freedreno/a3xx/fd3_format.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_format.c
@@ -355,6 +355,8 @@ fd3_fs_output_format(enum pipe_format format)
 	case PIPE_FORMAT_R16G16_FLOAT:
 	case PIPE_FORMAT_R11G11B10_FLOAT:
 		return RB_R16G16B16A16_FLOAT;
+	case PIPE_FORMAT_L8_UNORM:
+		return RB_R8G8B8A8_UNORM;
 	default:
 		return fd3_pipe2color(format);
 	}
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
@@ -884,7 +884,7 @@ CodeEmitterNV50::emitINTERP(const Instruction *i)
   defId(i->def(0), 2);
   srcAddr8(i->src(0), 16);

-   if (i->getInterpMode() == NV50_IR_INTERP_FLAT) {
+   if (i->encSize != 8 && i->getInterpMode() == NV50_IR_INTERP_FLAT) {
      code[0] |= 1 << 8;
   } else {
      if (i->op == OP_PINTERP) {
@@ -896,10 +896,11 @@ CodeEmitterNV50::emitINTERP(const Instruction *i)
   }

   if (i->encSize == 8) {
-      code[1] =
-         (code[0] & (3 << 24)) >> (24 - 16) |
-         (code[0] & (1 <<  8)) << (18 -  8);
-      code[0] &= ~0x03000100;
+      if (i->getInterpMode() == NV50_IR_INTERP_FLAT)
+         code[1] = 4 << 16;
+      else
+         code[1] = (code[0] & (3 << 24)) >> (24 - 16);
+      code[0] &= ~0x03000000;
      code[0] |= 1;
      emitFlagsRd(i);
   }
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
@@ -202,7 +202,10 @@ NV50LegalizePostRA::visit(Function *fn)
   Program *prog = fn->getProgram();

   r63 = new_LValue(fn, FILE_GPR);
-   r63->reg.data.id = 63;
+   if (prog->maxGPR < 63)
+      r63->reg.data.id = 63;
+   else
+      r63->reg.data.id = 127;

   // this is actually per-program, but we can do it all on visiting main()
   std::list<Instruction *> *outWrites =
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -2602,6 +2602,10 @@ NV50PostRaConstantFolding::visit(BasicBlock *bb)
             !isFloatType(i->dType))
            break;

+         if (i->getDef(0)->reg.data.id >= 64 ||
+             i->getSrc(0)->reg.data.id >= 64)
+            break;
+
         def = i->getSrc(1)->getInsn();
         if (def->op == OP_MOV && def->src(0).getFile() == FILE_IMMEDIATE) {
            vtmp = i->getSrc(1);
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
@@ -411,7 +411,7 @@ int ImmediateValue::print(char *buf, size_t size, DataType ty) const
   case TYPE_U64:
   case TYPE_S64:
   default:
-      PRINT("0x%016"PRIx64, reg.data.u64);
+      PRINT("0x%016" PRIx64, reg.data.u64);
      break;
   }
   return pos;
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -25,6 +25,7 @@

 #include <stack>
 #include <limits>
+#include <tr1/unordered_map>

 namespace nv50_ir {

@@ -222,6 +223,7 @@ private:
   private:
      virtual bool visit(BasicBlock *);
      inline bool needNewElseBlock(BasicBlock *b, BasicBlock *p);
+      inline void splitEdges(BasicBlock *b);
   };

   class ArgumentMovesPass : public Pass {
@@ -345,28 +347,55 @@ RegAlloc::PhiMovesPass::needNewElseBlock(BasicBlock *b, BasicBlock *p)
   return (n == 2);
 }

-// For each operand of each PHI in b, generate a new value by inserting a MOV
-// at the end of the block it is coming from and replace the operand with its
-// result. This eliminates liveness conflicts and enables us to let values be
-// copied to the right register if such a conflict exists nonetheless.
+struct PhiMapHash {
+   size_t operator()(const std::pair<Instruction *, BasicBlock *>& val) const {
+      return std::tr1::hash<Instruction*>()(val.first) * 31 +
+         std::tr1::hash<BasicBlock*>()(val.second);
+   }
+};
+
+typedef std::tr1::unordered_map<
+   std::pair<Instruction *, BasicBlock *>, Value *, PhiMapHash> PhiMap;
+
+// Critical edges need to be split up so that work can be inserted along
+// specific edge transitions. Unfortunately manipulating incident edges into a
+// BB invalidates all the PHI nodes since their sources are implicitly ordered
+// by incident edge order.
 //
-// These MOVs are also crucial in making sure the live intervals of phi srces
-// are extended until the end of the loop, since they are not included in the
-// live-in sets.
-bool
-RegAlloc::PhiMovesPass::visit(BasicBlock *bb)
+// TODO: Make it so that that is not the case, and PHI nodes store pointers to
+// the original BBs.
+void
+RegAlloc::PhiMovesPass::splitEdges(BasicBlock *bb)
 {
-   Instruction *phi, *mov;
   BasicBlock *pb, *pn;
-
+   Instruction *phi;
+   Graph::EdgeIterator ei;
   std::stack<BasicBlock *> stack;
+   int j = 0;

-   for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
+   for (ei = bb->cfg.incident(); !ei.end(); ei.next()) {
      pb = BasicBlock::get(ei.getNode());
      assert(pb);
      if (needNewElseBlock(bb, pb))
         stack.push(pb);
   }
+
+   // No critical edges were found, no need to perform any work.
+   if (stack.empty())
+      return;
+
+   // We're about to, potentially, reorder the inbound edges. This means that
+   // we need to hold on to the (phi, bb) -> src mapping, and fix up the phi
+   // nodes after the graph has been modified.
+   PhiMap phis;
+
+   j = 0;
+   for (ei = bb->cfg.incident(); !ei.end(); ei.next(), j++) {
+      pb = BasicBlock::get(ei.getNode());
+      for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next)
+         phis.insert(std::make_pair(std::make_pair(phi, pb), phi->getSrc(j)));
+   }
+
   while (!stack.empty()) {
      pb = stack.top();
      pn = new BasicBlock(func);
@@ -379,12 +408,47 @@ RegAlloc::PhiMovesPass::visit(BasicBlock *bb)
      assert(pb->getExit()->op != OP_CALL);
      if (pb->getExit()->asFlow()->target.bb == bb)
         pb->getExit()->asFlow()->target.bb = pn;
+
+      for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) {
+         PhiMap::iterator it = phis.find(std::make_pair(phi, pb));
+         assert(it != phis.end());
+         phis.insert(std::make_pair(std::make_pair(phi, pn), it->second));
+         phis.erase(it);
+      }
   }

+   // Now go through and fix up all of the phi node sources.
+   j = 0;
+   for (ei = bb->cfg.incident(); !ei.end(); ei.next(), j++) {
+      pb = BasicBlock::get(ei.getNode());
+      for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) {
+         PhiMap::const_iterator it = phis.find(std::make_pair(phi, pb));
+         assert(it != phis.end());
+
+         phi->setSrc(j, it->second);
+      }
+   }
+}
+
+// For each operand of each PHI in b, generate a new value by inserting a MOV
+// at the end of the block it is coming from and replace the operand with its
+// result. This eliminates liveness conflicts and enables us to let values be
+// copied to the right register if such a conflict exists nonetheless.
+//
+// These MOVs are also crucial in making sure the live intervals of phi srces
+// are extended until the end of the loop, since they are not included in the
+// live-in sets.
+bool
+RegAlloc::PhiMovesPass::visit(BasicBlock *bb)
+{
+   Instruction *phi, *mov;
+
+   splitEdges(bb);
+
   // insert MOVs (phi->src(j) should stem from j-th in-BB)
   int j = 0;
   for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
-      pb = BasicBlock::get(ei.getNode());
+      BasicBlock *pb = BasicBlock::get(ei.getNode());
      if (!pb->isTerminated())
         pb->insertTail(new_FlowInstruction(func, OP_BRA, bb));

--- a/src/gallium/drivers/nouveau/nouveau_buffer.c
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
@@ -206,8 +206,8 @@ nouveau_transfer_write(struct nouveau_context *nv, struct nouveau_transfer *tx,
      nv->copy_data(nv, buf->bo, buf->offset + base, buf->domain,
                    tx->bo, tx->offset + offset, NOUVEAU_BO_GART, size);
   else
-   if ((buf->base.bind & PIPE_BIND_CONSTANT_BUFFER) && nv->push_cb && can_cb)
-      nv->push_cb(nv, buf->bo, buf->domain, buf->offset, buf->base.width0,
+   if (nv->push_cb && can_cb)
+      nv->push_cb(nv, buf,
                  base, size / 4, (const uint32_t *)data);
   else
      nv->push_data(nv, buf->bo, buf->offset + base, buf->domain, size, data);
--- a/src/gallium/drivers/nouveau/nouveau_buffer.h
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.h
@@ -41,6 +41,8 @@ struct nv04_resource {
   uint8_t status;
   uint8_t domain;

+   uint16_t cb_bindings[6]; /* per-shader per-slot bindings */
+
   struct nouveau_fence *fence;
   struct nouveau_fence *fence_wr;

--- a/src/gallium/drivers/nouveau/nouveau_context.h
+++ b/src/gallium/drivers/nouveau/nouveau_context.h
@@ -6,6 +6,8 @@

 #define NOUVEAU_MAX_SCRATCH_BUFS 4

+struct nv04_resource;
+
 struct nouveau_context {
   struct pipe_context pipe;
   struct nouveau_screen *screen;
@@ -23,8 +25,7 @@ struct nouveau_context {
                     unsigned, const void *);
   /* base, size refer to the whole constant buffer */
   void (*push_cb)(struct nouveau_context *,
-                   struct nouveau_bo *, unsigned domain,
-                   unsigned base, unsigned size,
+                   struct nv04_resource *,
                   unsigned offset, unsigned words, const uint32_t *);

   /* @return: @ref reduced by nr of references found in context */
--- a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
@@ -149,14 +149,50 @@ static void
 nv30_resource_resolve(struct nv30_context *nv30,
                      const struct pipe_blit_info *info)
 {
+   struct nv30_miptree *src_mt = nv30_miptree(info->src.resource);
   struct nv30_rect src, dst;
+   unsigned x, x0, x1, y, y1, w, h;

   define_rect(info->src.resource, 0, info->src.box.z, info->src.box.x,
      info->src.box.y, info->src.box.width, info->src.box.height, &src);
   define_rect(info->dst.resource, 0, info->dst.box.z, info->dst.box.x,
      info->dst.box.y, info->dst.box.width, info->dst.box.height, &dst);

-   nv30_transfer_rect(nv30, BILINEAR, &src, &dst);
+   x0 = src.x0;
+   x1 = src.x1;
+   y1 = src.y1;
+
+   /* On nv3x we must use sifm which is restricted to 1024x1024 tiles */
+   for (y = src.y0; y < y1; y += h) {
+      h = y1 - y;
+      if (h > 1024)
+         h = 1024;
+
+      src.y0 = 0;
+      src.y1 = h;
+      src.h = h;
+
+      dst.y1 = dst.y0 + (h >> src_mt->ms_y);
+      dst.h = h >> src_mt->ms_y;
+
+      for (x = x0; x < x1; x += w) {
+         w = x1 - x;
+         if (w > 1024)
+            w = 1024;
+
+         src.offset = y * src.pitch + x * src.cpp;
+         src.x0 = 0;
+         src.x1 = w;
+         src.w = w;
+
+         dst.offset = (y >> src_mt->ms_y) * dst.pitch +
+                      (x >> src_mt->ms_x) * dst.cpp;
+         dst.x1 = dst.x0 + (w >> src_mt->ms_x);
+         dst.w = w >> src_mt->ms_x;
+
+         nv30_transfer_rect(nv30, BILINEAR, &src, &dst);
+      }
+   }
 }

 void
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -319,8 +319,9 @@ nv30_screen_is_format_supported(struct pipe_screen *pscreen,
                                unsigned sample_count,
                                unsigned bindings)
 {
-   if (sample_count > 4)
+   if (sample_count > nv30_screen(pscreen)->max_sample_count)
      return false;
+
   if (!(0x00000017 & (1 << sample_count)))
      return false;

@@ -450,6 +451,23 @@ nv30_screen_create(struct nouveau_device *dev)
      return NULL;
   }

+   /*
+    * Some modern apps try to use msaa without keeping in mind the
+    * restrictions on videomem of older cards. Resulting in dmesg saying:
+    * [ 1197.850642] nouveau E[soffice.bin[3785]] fail ttm_validate
+    * [ 1197.850648] nouveau E[soffice.bin[3785]] validating bo list
+    * [ 1197.850654] nouveau E[soffice.bin[3785]] validate: -12
+    *
+    * Because we are running out of video memory, after which the program
+    * using the msaa visual freezes, and eventually the entire system freezes.
+    *
+    * To work around this we do not allow msaa visauls by default and allow
+    * the user to override this via NV30_MAX_MSAA.
+    */
+   screen->max_sample_count = debug_get_num_option("NV30_MAX_MSAA", 0);
+   if (screen->max_sample_count > 4)
+      screen->max_sample_count = 4;
+
   pscreen = &screen->base.base;
   pscreen->destroy = nv30_screen_destroy;
   pscreen->get_param = nv30_screen_get_param;
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.h
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.h
@@ -38,6 +38,8 @@ struct nv30_screen {
   /*XXX: nvfx state */
   struct nouveau_heap *vp_exec_heap;
   struct nouveau_heap *vp_data_heap;
+
+   unsigned max_sample_count;
 };

 static inline struct nv30_screen *
--- a/src/gallium/drivers/nouveau/nv30/nv30_transfer.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_transfer.c
@@ -371,7 +371,7 @@ nv30_transfer_rect_blit(XFER_ARGS)
 static bool
 nv30_transfer_sifm(XFER_ARGS)
 {
-   if (!src->pitch || (src->w | src->h) > 1024 || src->w < 2 || src->h < 2)
+   if (!src->pitch || src->w > 1024 || src->h > 1024 || src->w < 2 || src->h < 2)
      return false;

   if (src->d > 1 || dst->d > 1)
@@ -381,7 +381,7 @@ nv30_transfer_sifm(XFER_ARGS)
      return false;

   if (!dst->pitch) {
-      if ((dst->w | dst->h) > 2048 || dst->w < 2 || dst->h < 2)
+      if (dst->w > 2048 || dst->h > 2048 || dst->w < 2 || dst->h < 2)
         return false;
   } else {
      if (dst->domain != NOUVEAU_BO_VRAM)
--- a/src/gallium/drivers/nouveau/nv50/nv50_formats.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_formats.c
@@ -203,8 +203,10 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
   F3B(B5G6R5_UNORM, B5G6R5_UNORM, C2, C1, C0, xx, UNORM, 5_6_5, TD),
   C4B(B5G5R5A1_UNORM, BGR5_A1_UNORM, C2, C1, C0, C3, UNORM, 5_5_5_1, TD),
   F3B(B5G5R5X1_UNORM, BGR5_X1_UNORM, C2, C1, C0, xx, UNORM, 5_5_5_1, TD),
+#if NOUVEAU_DRIVER != 0xc0
   C4B(B4G4R4A4_UNORM, NONE, C2, C1, C0, C3, UNORM, 4_4_4_4, T),
   F3B(B4G4R4X4_UNORM, NONE, C2, C1, C0, xx, UNORM, 4_4_4_4, T),
+#endif
   F3B(R9G9B9E5_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 9_9_9_E5, T),

   C4A(R10G10B10A2_UNORM, RGB10_A2_UNORM, C0, C1, C2, C3, UNORM, 10_10_10_2,
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -100,7 +100,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
   case PIPE_CAP_MAX_TEXEL_OFFSET:
      return 7;
   case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
-      return 65536;
+      return 128 * 1024 * 1024;
   case PIPE_CAP_GLSL_FEATURE_LEVEL:
      return 330;
   case PIPE_CAP_MAX_RENDER_TARGETS:
--- a/src/gallium/drivers/nouveau/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_tex.c
@@ -221,6 +221,26 @@ nv50_create_texture_view(struct pipe_context *pipe,
   return &view->pipe;
 }

+static void
+nv50_update_tic(struct nv50_context *nv50, struct nv50_tic_entry *tic,
+                struct nv04_resource *res)
+{
+   uint64_t address = res->address;
+   if (res->base.target != PIPE_BUFFER)
+      return;
+   address += tic->pipe.u.buf.first_element *
+      util_format_get_blocksize(tic->pipe.format);
+   if (tic->tic[1] == (uint32_t)address &&
+       (tic->tic[2] & 0xff) == address >> 32)
+      return;
+
+   nv50_screen_tic_unlock(nv50->screen, tic);
+   tic->id = -1;
+   tic->tic[1] = address;
+   tic->tic[2] &= 0xffffff00;
+   tic->tic[2] |= address >> 32;
+}
+
 static bool
 nv50_validate_tic(struct nv50_context *nv50, int s)
 {
@@ -240,6 +260,7 @@ nv50_validate_tic(struct nv50_context *nv50, int s)
         continue;
      }
      res = &nv50_miptree(tic->pipe.texture)->base;
+      nv50_update_tic(nv50, tic, res);

      if (tic->id < 0) {
         tic->id = nv50_screen_tic_alloc(nv50->screen, tic);
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -768,6 +768,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
 {
   struct nv50_context *nv50 = nv50_context(pipe);
   struct nouveau_pushbuf *push = nv50->base.pushbuf;
+   bool tex_dirty = false;
   int i, s;

   /* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */
@@ -797,6 +798,9 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)

   push->kick_notify = nv50_draw_vbo_kick_notify;

+   /* TODO: Instead of iterating over all the buffer resources looking for
+    * coherent buffers, keep track of a context-wide count.
+    */
   for (s = 0; s < 3 && !nv50->cb_dirty; ++s) {
      uint32_t valid = nv50->constbuf_valid[s];

@@ -824,6 +828,21 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
      nv50->cb_dirty = false;
   }

+   for (s = 0; s < 3 && !tex_dirty; ++s) {
+      for (i = 0; i < nv50->num_textures[s] && !tex_dirty; ++i) {
+         if (!nv50->textures[s][i] ||
+             nv50->textures[s][i]->texture->target != PIPE_BUFFER)
+            continue;
+         if (nv50->textures[s][i]->texture->flags &
+             PIPE_RESOURCE_FLAG_MAP_COHERENT)
+            tex_dirty = true;
+      }
+   }
+   if (tex_dirty) {
+      BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1);
+      PUSH_DATA (push, 0x20);
+   }
+
   if (nv50->vbo_fifo) {
      nv50_push_vbo(nv50, info);
      push->kick_notify = nv50_default_kick_notify;
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -299,10 +299,10 @@ nve4_p2mf_push_linear(struct nouveau_context *nv,
                      struct nouveau_bo *dst, unsigned offset, unsigned domain,
                      unsigned size, const void *data);
 void
-nvc0_cb_push(struct nouveau_context *,
-             struct nouveau_bo *bo, unsigned domain,
-             unsigned base, unsigned size,
-             unsigned offset, unsigned words, const uint32_t *data);
+nvc0_cb_bo_push(struct nouveau_context *,
+                struct nouveau_bo *bo, unsigned domain,
+                unsigned base, unsigned size,
+                unsigned offset, unsigned words, const uint32_t *data);

 /* nvc0_vbo.c */
 void nvc0_draw_vbo(struct pipe_context *, const struct pipe_draw_info *);
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -449,7 +449,7 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info)

   for (i = 0; i < info->numOutputs; ++i) {
      if (info->out[i].sn == TGSI_SEMANTIC_COLOR)
-         fp->hdr[18] |= info->out[i].mask << info->out[i].slot[0];
+         fp->hdr[18] |= 0xf << info->out[i].slot[0];
   }

   fp->fp.early_z = info->prop.fp.earlyFragTests;
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -87,7 +87,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
   case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
      return 31;
   case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
-      return 65536;
+      return 128 * 1024 * 1024;
   case PIPE_CAP_GLSL_FEATURE_LEVEL:
      return 410;
   case PIPE_CAP_MAX_RENDER_TARGETS:
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
@@ -831,6 +831,8 @@ nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
   }
   nvc0->constbuf_dirty[s] |= 1 << i;

+   if (nvc0->constbuf[s][i].u.buf)
+      nv04_resource(nvc0->constbuf[s][i].u.buf)->cb_bindings[s] &= ~(1 << i);
   pipe_resource_reference(&nvc0->constbuf[s][i].u.buf, res);

   nvc0->constbuf[s][i].user = (cb && cb->user_buffer) ? true : false;
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -440,7 +440,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
               BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1);
               PUSH_DATA (push, (0 << 4) | 1);
            }
-            nvc0_cb_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),
+            nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),
                         base, nvc0->state.uniform_buffer_bound[s],
                         0, (size + 3) / 4,
                         nvc0->constbuf[s][0].u.data);
@@ -458,6 +458,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
               BCTX_REFN(nvc0->bufctx_3d, CB(s, i), res, RD);

               nvc0->cb_dirty = 1; /* Force cache flush for UBO. */
+               res->cb_bindings[s] |= 1 << i;
            } else {
               BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1);
               PUSH_DATA (push, (i << 4) | 0);
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
@@ -226,6 +226,26 @@ nvc0_create_texture_view(struct pipe_context *pipe,
   return &view->pipe;
 }

+static void
+nvc0_update_tic(struct nvc0_context *nvc0, struct nv50_tic_entry *tic,
+                struct nv04_resource *res)
+{
+   uint64_t address = res->address;
+   if (res->base.target != PIPE_BUFFER)
+      return;
+   address += tic->pipe.u.buf.first_element *
+      util_format_get_blocksize(tic->pipe.format);
+   if (tic->tic[1] == (uint32_t)address &&
+       (tic->tic[2] & 0xff) == address >> 32)
+      return;
+
+   nvc0_screen_tic_unlock(nvc0->screen, tic);
+   tic->id = -1;
+   tic->tic[1] = address;
+   tic->tic[2] &= 0xffffff00;
+   tic->tic[2] |= address >> 32;
+}
+
 static bool
 nvc0_validate_tic(struct nvc0_context *nvc0, int s)
 {
@@ -247,6 +267,7 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s)
         continue;
      }
      res = nv04_resource(tic->pipe.texture);
+      nvc0_update_tic(nvc0, tic, res);

      if (tic->id < 0) {
         tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
@@ -313,6 +334,7 @@ nve4_validate_tic(struct nvc0_context *nvc0, unsigned s)
         continue;
      }
      res = nv04_resource(tic->pipe.texture);
+      nvc0_update_tic(nvc0, tic, res);

      if (tic->id < 0) {
         tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
@@ -506,11 +506,48 @@ nvc0_miptree_transfer_unmap(struct pipe_context *pctx,
 }

 /* This happens rather often with DTD9/st. */
-void
+static void
 nvc0_cb_push(struct nouveau_context *nv,
-             struct nouveau_bo *bo, unsigned domain,
-             unsigned base, unsigned size,
+             struct nv04_resource *res,
             unsigned offset, unsigned words, const uint32_t *data)
+{
+   struct nvc0_context *nvc0 = nvc0_context(&nv->pipe);
+   struct nvc0_constbuf *cb = NULL;
+   int s;
+
+   /* Go through all the constbuf binding points of this buffer and try to
+    * find one which contains the region to be updated.
+    */
+   for (s = 0; s < 6 && !cb; s++) {
+      uint16_t bindings = res->cb_bindings[s];
+      while (bindings) {
+         int i = ffs(bindings) - 1;
+         uint32_t cb_offset = nvc0->constbuf[s][i].offset;
+
+         bindings &= ~(1 << i);
+         if (cb_offset <= offset &&
+             cb_offset + nvc0->constbuf[s][i].size >= offset + words * 4) {
+            cb = &nvc0->constbuf[s][i];
+            break;
+         }
+      }
+   }
+
+   if (cb) {
+      nvc0_cb_bo_push(nv, res->bo, res->domain,
+                      res->offset + cb->offset, cb->size,
+                      offset - cb->offset, words, data);
+   } else {
+      nv->push_data(nv, res->bo, res->offset + offset, res->domain,
+                    words * 4, data);
+   }
+}
+
+void
+nvc0_cb_bo_push(struct nouveau_context *nv,
+                struct nouveau_bo *bo, unsigned domain,
+                unsigned base, unsigned size,
+                unsigned offset, unsigned words, const uint32_t *data)
 {
   struct nouveau_pushbuf *push = nv->pushbuf;

@@ -520,6 +557,9 @@ nvc0_cb_push(struct nouveau_context *nv,
   assert(!(offset & 3));
   size = align(size, 0x100);

+   assert(offset < size);
+   assert(offset + words * 4 <= size);
+
   BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
   PUSH_DATA (push, size);
   PUSH_DATAh(push, bo->offset + base);
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
@@ -899,6 +899,9 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)

   push->kick_notify = nvc0_draw_vbo_kick_notify;

+   /* TODO: Instead of iterating over all the buffer resources looking for
+    * coherent buffers, keep track of a context-wide count.
+    */
   for (s = 0; s < 5 && !nvc0->cb_dirty; ++s) {
      uint32_t valid = nvc0->constbuf_valid[s];

@@ -924,6 +927,23 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
      nvc0->cb_dirty = false;
   }

+   for (s = 0; s < 5; ++s) {
+      for (int i = 0; i < nvc0->num_textures[s]; ++i) {
+         struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
+         struct pipe_resource *res;
+         if (!tic)
+            continue;
+         res = nvc0->textures[s][i]->texture;
+         if (res->target != PIPE_BUFFER ||
+             !(res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT))
+            continue;
+
+         BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
+         PUSH_DATA (push, (tic->id << 4) | 1);
+         NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_cache_flush_count, 1);
+      }
+   }
+
   if (nvc0->state.vbo_mode) {
      nvc0_push_vbo(nvc0, info);
      push->kick_notify = nvc0_default_kick_notify;
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1853,7 +1853,7 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx,
 		radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags);
 		radeon_emit(cs, (resource_offset + buffer_index) * 8);
 		radeon_emit(cs, va); /* RESOURCEi_WORD0 */
-		radeon_emit(cs, rbuffer->buf->size - vb->buffer_offset - 1); /* RESOURCEi_WORD1 */
+		radeon_emit(cs, rbuffer->b.b.width0 - vb->buffer_offset - 1); /* RESOURCEi_WORD1 */
 		radeon_emit(cs, /* RESOURCEi_WORD2 */
 				 S_030008_ENDIAN_SWAP(r600_endian_swap(32)) |
 				 S_030008_STRIDE(vb->stride) |
@@ -1923,7 +1923,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
 		radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags);
 		radeon_emit(cs, (buffer_id_base + buffer_index) * 8);
 		radeon_emit(cs, va); /* RESOURCEi_WORD0 */
-		radeon_emit(cs, rbuffer->buf->size - cb->buffer_offset - 1); /* RESOURCEi_WORD1 */
+		radeon_emit(cs, rbuffer->b.b.width0 - cb->buffer_offset - 1); /* RESOURCEi_WORD1 */
 		radeon_emit(cs, /* RESOURCEi_WORD2 */
 			    S_030008_ENDIAN_SWAP(gs_ring_buffer ? ENDIAN_NONE : r600_endian_swap(32)) |
 			    S_030008_STRIDE(gs_ring_buffer ? 4 : 16) |
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -141,7 +141,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
 	bool dump = r600_can_dump_shader(&rctx->screen->b, sel->tokens);
 	unsigned use_sb = !(rctx->screen->b.debug_flags & DBG_NO_SB);
 	unsigned sb_disasm = use_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM);
-	unsigned export_shader = key.vs.as_es;
+	unsigned export_shader;

 	shader->shader.bc.isa = rctx->isa;

@@ -220,6 +220,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
 		}
 		break;
 	case TGSI_PROCESSOR_VERTEX:
+		export_shader = key.vs.as_es;
 		if (rctx->b.chip_class >= EVERGREEN) {
 			if (export_shader)
 				evergreen_update_es_state(ctx, shader);
@@ -1830,8 +1831,6 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 	ctx.shader = shader;
 	ctx.native_integers = true;

-	shader->vs_as_gs_a = key.vs.as_gs_a;
-	shader->vs_as_es = key.vs.as_es;

 	r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family,
 			   rscreen->has_compressed_msaa_texturing);
@@ -1844,9 +1843,14 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 	shader->processor_type = ctx.type;
 	ctx.bc->type = shader->processor_type;

-	ring_outputs = key.vs.as_es || (ctx.type == TGSI_PROCESSOR_GEOMETRY);
+	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
+		shader->vs_as_gs_a = key.vs.as_gs_a;
+		shader->vs_as_es = key.vs.as_es;
+	}

-	if (key.vs.as_es) {
+	ring_outputs = shader->vs_as_es || ctx.type == TGSI_PROCESSOR_GEOMETRY;
+
+	if (shader->vs_as_es) {
 		ctx.gs_for_vs = &rctx->gs_shader->current->shader;
 	} else {
 		ctx.gs_for_vs = NULL;
@@ -1866,7 +1870,8 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 	shader->nr_ps_color_exports = 0;
 	shader->nr_ps_max_color_exports = 0;

-	shader->two_side = key.ps.color_two_side;
+	if (ctx.type == TGSI_PROCESSOR_FRAGMENT)
+		shader->two_side = key.ps.color_two_side;

 	/* register allocations */
 	/* Values [0,127] correspond to GPR[0..127].
@@ -2270,7 +2275,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 	convert_edgeflag_to_int(&ctx);

 	if (ring_outputs) {
-		if (key.vs.as_es)
+		if (shader->vs_as_es)
 			emit_gs_ring_writes(&ctx, FALSE);
 	} else {
 		/* Export output */
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -1007,7 +1007,7 @@ static void r600_init_color_surface(struct r600_context *rctx,

 		/* CMASK. */
 		if (!rctx->dummy_cmask ||
-		    rctx->dummy_cmask->buf->size < cmask.size ||
+		    rctx->dummy_cmask->b.b.width0 < cmask.size ||
 		    rctx->dummy_cmask->buf->alignment % cmask.alignment != 0) {
 			struct pipe_transfer *transfer;
 			void *ptr;
@@ -1025,7 +1025,7 @@ static void r600_init_color_surface(struct r600_context *rctx,

 		/* FMASK. */
 		if (!rctx->dummy_fmask ||
-		    rctx->dummy_fmask->buf->size < fmask.size ||
+		    rctx->dummy_fmask->b.b.width0 < fmask.size ||
 		    rctx->dummy_fmask->buf->alignment % fmask.alignment != 0) {
 			pipe_resource_reference((struct pipe_resource**)&rctx->dummy_fmask, NULL);
 			rctx->dummy_fmask = r600_buffer_create_helper(rscreen, fmask.size, fmask.alignment);
@@ -1694,7 +1694,7 @@ static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom
 		radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 7, 0));
 		radeon_emit(cs, (320 + buffer_index) * 7);
 		radeon_emit(cs, offset); /* RESOURCEi_WORD0 */
-		radeon_emit(cs, rbuffer->buf->size - offset - 1); /* RESOURCEi_WORD1 */
+		radeon_emit(cs, rbuffer->b.b.width0 - offset - 1); /* RESOURCEi_WORD1 */
 		radeon_emit(cs, /* RESOURCEi_WORD2 */
 				 S_038008_ENDIAN_SWAP(r600_endian_swap(32)) |
 				 S_038008_STRIDE(vb->stride));
@@ -1743,7 +1743,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx,
 		radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 7, 0));
 		radeon_emit(cs, (buffer_id_base + buffer_index) * 7);
 		radeon_emit(cs, offset); /* RESOURCEi_WORD0 */
-		radeon_emit(cs, rbuffer->buf->size - offset - 1); /* RESOURCEi_WORD1 */
+		radeon_emit(cs, rbuffer->b.b.width0 - offset - 1); /* RESOURCEi_WORD1 */
 		radeon_emit(cs, /* RESOURCEi_WORD2 */
 			    S_038008_ENDIAN_SWAP(gs_ring_buffer ? ENDIAN_NONE : r600_endian_swap(32)) |
 			    S_038008_STRIDE(gs_ring_buffer ? 4 : 16));
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -195,9 +195,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void *
 	r600_target = radeon_llvm_get_r600_target(triple);
 	sctx->tm = LLVMCreateTargetMachine(r600_target, triple,
 					   r600_get_llvm_processor_name(sscreen->b.family),
-					   sctx->b.chip_class >= VI ?
-						   "+DumpCode" :
-						   "+DumpCode,+vgpr-spilling",
+					   "+DumpCode,+vgpr-spilling",
 					   LLVMCodeGenLevelDefault,
 					   LLVMRelocDefault,
 					   LLVMCodeModelDefault);
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2300,7 +2300,7 @@ static void tex_fetch_args(
 						 lp_build_const_int32(gallivm,
 								      SI_FMASK_TEX_OFFSET), "");
 			fmask_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
-			fmask_ptr = build_indexed_load_const(si_shader_ctx, res_ptr, ind_index);
+			fmask_ptr = build_indexed_load_const(si_shader_ctx, fmask_ptr, ind_index);
 		}
 	} else {
 		res_ptr = si_shader_ctx->resources[sampler_index];
--- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
+++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
@@ -465,7 +465,7 @@ namespace {
            const bool is_write_only = access_qual == "write_only";
            const bool is_read_only = access_qual == "read_only";

-            typename module::argument::type marg_type;
+            enum module::argument::type marg_type;
            if (is_image2d && is_read_only) {
               marg_type = module::argument::image2d_rd;
            } else if (is_image2d && is_write_only) {
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -110,7 +110,7 @@ static boolean do_winsys_init(struct amdgpu_winsys *ws)
   struct amdgpu_heap_info vram, gtt;
   struct drm_amdgpu_info_hw_ip dma = {}, uvd = {}, vce = {};
   uint32_t vce_version = 0, vce_feature = 0;
-   int r;
+   int r, i, j;

   /* Query hardware and driver information. */
   r = amdgpu_query_gpu_info(ws->dev, &ws->amdinfo);
@@ -248,7 +248,6 @@ static boolean do_winsys_init(struct amdgpu_winsys *ws)
   ws->info.vram_size = vram.heap_size;
   /* convert the shader clock from KHz to MHz */
   ws->info.max_sclk = ws->amdinfo.max_engine_clk / 1000;
-   ws->info.max_compute_units = 1; /* TODO */
   ws->info.max_se = ws->amdinfo.num_shader_engines;
   ws->info.max_sh_per_se = ws->amdinfo.num_shader_arrays_per_engine;
   ws->info.has_uvd = uvd.available_rings != 0;
@@ -263,6 +262,18 @@ static boolean do_winsys_init(struct amdgpu_winsys *ws)
   ws->info.r600_virtual_address = TRUE;
   ws->info.r600_has_dma = dma.available_rings != 0;

+   /* Guess what the maximum compute unit number is by looking at the mask
+    * of enabled CUs.
+    */
+   for (i = 0; i < ws->info.max_se; i++)
+      for (j = 0; j < ws->info.max_sh_per_se; j++) {
+         unsigned max = util_last_bit(ws->amdinfo.cu_bitmap[i][j]);
+
+         if (ws->info.max_compute_units < max)
+            ws->info.max_compute_units = max;
+      }
+   ws->info.max_compute_units *= ws->info.max_se * ws->info.max_sh_per_se;
+
   memcpy(ws->info.si_tile_mode_array, ws->amdinfo.gb_tile_mode,
          sizeof(ws->amdinfo.gb_tile_mode));
   ws->info.si_tile_mode_array_valid = TRUE;
--- a/src/gbm/backends/dri/gbm_dri.c
+++ b/src/gbm/backends/dri/gbm_dri.c
@@ -706,14 +706,30 @@ gbm_dri_bo_import(struct gbm_device *gbm,
   {
      struct gbm_import_fd_data *fd_data = buffer;
      int stride = fd_data->stride, offset = 0;
+      int dri_format;
+
+      switch (fd_data->format) {
+      case GBM_BO_FORMAT_XRGB8888:
+         dri_format = GBM_FORMAT_XRGB8888;
+         break;
+      case GBM_BO_FORMAT_ARGB8888:
+         dri_format = GBM_FORMAT_ARGB8888;
+         break;
+      default:
+         dri_format = fd_data->format;
+      }

      image = dri->image->createImageFromFds(dri->screen,
                                             fd_data->width,
                                             fd_data->height,
-                                             fd_data->format,
+                                             dri_format,
                                             &fd_data->fd, 1,
                                             &stride, &offset,
                                             NULL);
+      if (image == NULL) {
+         errno = EINVAL;
+         return NULL;
+      }
      gbm_format = fd_data->format;
      break;
   }
--- a/src/glsl/Android.gen.mk
+++ b/src/glsl/Android.gen.mk
@@ -29,18 +29,7 @@ endif

 intermediates := $(call local-generated-sources-dir)

-sources := \
-	glsl_lexer.cpp \
-	glsl_parser.cpp \
-	glcpp/glcpp-lex.c \
-	glcpp/glcpp-parse.c \
-	nir/nir_builder_opcodes.h \
-	nir/nir_constant_expressions.c \
-	nir/nir_opcodes.c \
-	nir/nir_opcodes.h \
-	nir/nir_opt_algebraic.c
-
-LOCAL_SRC_FILES := $(filter-out $(sources), $(LOCAL_SRC_FILES))
+LOCAL_SRC_FILES := $(LOCAL_SRC_FILES)

 LOCAL_C_INCLUDES += \
 	$(intermediates)/glcpp \
@@ -51,8 +40,10 @@ LOCAL_C_INCLUDES += \
 LOCAL_EXPORT_C_INCLUDE_DIRS += \
 	$(intermediates)/nir

-sources := $(addprefix $(intermediates)/, $(sources))
-LOCAL_GENERATED_SOURCES += $(sources)
+LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/, \
+	$(LIBGLCPP_GENERATED_FILES) \
+	$(NIR_GENERATED_FILES) \
+	$(LIBGLSL_GENERATED_CXX_FILES))

 define local-l-or-ll-to-c-or-cpp
 	@mkdir -p $(dir $@)
@@ -102,8 +93,7 @@ $(intermediates)/nir/nir_builder_opcodes.h: $(nir_builder_opcodes_deps)
 nir_constant_expressions_gen := $(LOCAL_PATH)/nir/nir_constant_expressions.py
 nir_constant_expressions_deps := \
 	$(LOCAL_PATH)/nir/nir_opcodes.py \
-	$(LOCAL_PATH)/nir/nir_constant_expressions.py \
-	$(LOCAL_PATH)/nir/nir_constant_expressions.h
+	$(LOCAL_PATH)/nir/nir_constant_expressions.py

 $(intermediates)/nir/nir_constant_expressions.c: $(nir_constant_expressions_deps)
 	@mkdir -p $(dir $@)
--- a/src/glsl/Makefile.am
+++ b/src/glsl/Makefile.am
@@ -140,13 +140,16 @@ libglsl_la_SOURCES =					\
 	glsl_parser.cpp					\
 	glsl_parser.h					\
 	$(LIBGLSL_FILES)				\
-	$(NIR_FILES)
+	$(NIR_FILES)					\
+	$(NIR_GENERATED_FILES)
+

 libnir_la_SOURCES =					\
 	glsl_types.cpp					\
 	builtin_types.cpp				\
 	glsl_symbol_table.cpp				\
-	$(NIR_FILES)
+	$(NIR_FILES)					\
+	$(NIR_GENERATED_FILES)

 glsl_compiler_SOURCES = \
 	$(GLSL_COMPILER_CXX_FILES)
@@ -197,19 +200,23 @@ am__v_YACC_ = $(am__v_YACC_$(AM_DEFAULT_VERBOSITY))
 am__v_YACC_0 = @echo "  YACC    " $@;
 am__v_YACC_1 =

+MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
+YACC_GEN = $(AM_V_YACC)$(YACC) $(YFLAGS)
+LEX_GEN = $(AM_V_LEX)$(LEX) $(LFLAGS)
+
 glsl_parser.cpp glsl_parser.h: glsl_parser.yy
-	$(AM_V_YACC) $(YACC) $(YFLAGS) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl_parser.h $<
+	$(YACC_GEN) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl_parser.h $(srcdir)/glsl_parser.yy

 glsl_lexer.cpp: glsl_lexer.ll
-	$(AM_V_LEX) $(LEX) $(LFLAGS) -o $@ $<
+	$(LEX_GEN) -o $@ $(srcdir)/glsl_lexer.ll

 glcpp/glcpp-parse.c glcpp/glcpp-parse.h: glcpp/glcpp-parse.y
-	$(AM_V_at)$(MKDIR_P) glcpp
-	$(AM_V_YACC) $(YACC) $(YFLAGS) -o $@ -p "glcpp_parser_" --defines=$(builddir)/glcpp/glcpp-parse.h $<
+	$(MKDIR_GEN)
+	$(YACC_GEN) -o $@ -p "glcpp_parser_" --defines=$(builddir)/glcpp/glcpp-parse.h $(srcdir)/glcpp/glcpp-parse.y

 glcpp/glcpp-lex.c: glcpp/glcpp-lex.l
-	$(AM_V_at)$(MKDIR_P) glcpp
-	$(AM_V_LEX) $(LEX) $(LFLAGS) -o $@ $<
+	$(MKDIR_GEN)
+	$(LEX_GEN) -o $@ $(srcdir)/glcpp/glcpp-lex.l

 # Only the parsers (specifically the header files generated at the same time)
 # need to be in BUILT_SOURCES. Though if we list the parser headers YACC is
@@ -222,11 +229,7 @@ BUILT_SOURCES =						\
 	glsl_lexer.cpp					\
 	glcpp/glcpp-parse.c				\
 	glcpp/glcpp-lex.c				\
-	nir/nir_builder_opcodes.h				\
-	nir/nir_constant_expressions.c			\
-	nir/nir_opcodes.c				\
-	nir/nir_opcodes.h				\
-	nir/nir_opt_algebraic.c
+	$(NIR_GENERATED_FILES)
 CLEANFILES =						\
 	glcpp/glcpp-parse.h				\
 	glsl_parser.h					\
@@ -239,22 +242,24 @@ dist-hook:
 	$(RM) glcpp/tests/*.out
 	$(RM) glcpp/tests/subtest*/*.out

-nir/nir_builder_opcodes.h: nir/nir_opcodes.py nir/nir_builder_opcodes_h.py
-	$(AM_V_at)$(MKDIR_P) nir
-	$(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_builder_opcodes_h.py > $@
+PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)

-nir/nir_constant_expressions.c: nir/nir_opcodes.py nir/nir_constant_expressions.py nir/nir_constant_expressions.h
-	$(AM_V_at)$(MKDIR_P) nir
-	$(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_constant_expressions.py > $@
+nir/nir_builder_opcodes.h: nir/nir_opcodes.py nir/nir_builder_opcodes_h.py
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/nir/nir_builder_opcodes_h.py > $@
+
+nir/nir_constant_expressions.c: nir/nir_opcodes.py nir/nir_constant_expressions.py
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/nir/nir_constant_expressions.py > $@

 nir/nir_opcodes.h: nir/nir_opcodes.py nir/nir_opcodes_h.py
-	$(AM_V_at)$(MKDIR_P) nir
-	$(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opcodes_h.py > $@
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/nir/nir_opcodes_h.py > $@

 nir/nir_opcodes.c: nir/nir_opcodes.py nir/nir_opcodes_c.py
-	$(AM_V_at)$(MKDIR_P) nir
-	$(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opcodes_c.py > $@
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/nir/nir_opcodes_c.py > $@

 nir/nir_opt_algebraic.c: nir/nir_opt_algebraic.py nir/nir_algebraic.py
-	$(AM_V_at)$(MKDIR_P) nir
-	$(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opt_algebraic.py > $@
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/nir/nir_opt_algebraic.py > $@
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -69,8 +69,7 @@ NIR_FILES = \
 	nir/nir_vla.h \
 	nir/nir_worklist.c \
 	nir/nir_worklist.h \
-	nir/nir_types.cpp \
-	$(NIR_GENERATED_FILES)
+	nir/nir_types.cpp

 # libglsl

--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -145,7 +145,7 @@ void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx)
   }
 }

-void nir_dest_copy(nir_dest *dest, const nir_dest *src, void *mem_ctx)
+void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr)
 {
   /* Copying an SSA definition makes no sense whatsoever. */
   assert(!src->is_ssa);
@@ -155,17 +155,18 @@ void nir_dest_copy(nir_dest *dest, const nir_dest *src, void *mem_ctx)
   dest->reg.base_offset = src->reg.base_offset;
   dest->reg.reg = src->reg.reg;
   if (src->reg.indirect) {
-      dest->reg.indirect = ralloc(mem_ctx, nir_src);
-      nir_src_copy(dest->reg.indirect, src->reg.indirect, mem_ctx);
+      dest->reg.indirect = ralloc(instr, nir_src);
+      nir_src_copy(dest->reg.indirect, src->reg.indirect, instr);
   } else {
      dest->reg.indirect = NULL;
   }
 }

 void
-nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, void *mem_ctx)
+nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src,
+                 nir_alu_instr *instr)
 {
-   nir_src_copy(&dest->src, &src->src, mem_ctx);
+   nir_src_copy(&dest->src, &src->src, &instr->instr);
   dest->abs = src->abs;
   dest->negate = src->negate;
   for (unsigned i = 0; i < 4; i++)
@@ -173,9 +174,10 @@ nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, void *mem_ctx)
 }

 void
-nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src, void *mem_ctx)
+nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
+                  nir_alu_instr *instr)
 {
-   nir_dest_copy(&dest->dest, &src->dest, mem_ctx);
+   nir_dest_copy(&dest->dest, &src->dest, &instr->instr);
   dest->write_mask = src->write_mask;
   dest->saturate = src->saturate;
 }
@@ -1921,14 +1923,14 @@ nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src, void *mem_ctx)
   nir_foreach_use_safe(def, use_src) {
      nir_instr *src_parent_instr = use_src->parent_instr;
      list_del(&use_src->use_link);
-      nir_src_copy(use_src, &new_src, mem_ctx);
+      nir_src_copy(use_src, &new_src, src_parent_instr);
      src_add_all_uses(use_src, src_parent_instr, NULL);
   }

   nir_foreach_if_use_safe(def, use_src) {
      nir_if *src_parent_if = use_src->parent_if;
      list_del(&use_src->use_link);
-      nir_src_copy(use_src, &new_src, mem_ctx);
+      nir_src_copy(use_src, &new_src, src_parent_if);
      src_add_all_uses(use_src, NULL, src_parent_if);
   }
 }
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -580,8 +580,8 @@ nir_dest_for_reg(nir_register *reg)
   return dest;
 }

-void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx);
-void nir_dest_copy(nir_dest *dest, const nir_dest *src, void *mem_ctx);
+void nir_src_copy(nir_src *dest, const nir_src *src, void *instr_or_if);
+void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr);

 typedef struct {
   nir_src src;
@@ -630,10 +630,6 @@ typedef struct {
   unsigned write_mask : 4; /* ignored if dest.is_ssa is true */
 } nir_alu_dest;

-void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, void *mem_ctx);
-void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
-                       void *mem_ctx);
-
 typedef enum {
   nir_type_invalid = 0, /* Not a valid type */
   nir_type_float,
@@ -702,6 +698,11 @@ typedef struct nir_alu_instr {
   nir_alu_src src[];
 } nir_alu_instr;

+void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src,
+                      nir_alu_instr *instr);
+void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
+                       nir_alu_instr *instr);
+
 /* is this source channel used? */
 static inline bool
 nir_alu_instr_channel_used(nir_alu_instr *instr, unsigned src, unsigned channel)
--- a/src/glsl/nir/nir_from_ssa.c
+++ b/src/glsl/nir/nir_from_ssa.c
@@ -561,7 +561,7 @@ emit_copy(nir_parallel_copy_instr *pcopy, nir_src src, nir_src dest_src,
      assert(src.reg.reg->num_components >= dest_src.reg.reg->num_components);

   nir_alu_instr *mov = nir_alu_instr_create(mem_ctx, nir_op_imov);
-   nir_src_copy(&mov->src[0].src, &src, mem_ctx);
+   nir_src_copy(&mov->src[0].src, &src, mov);
   mov->dest.dest = nir_dest_for_reg(dest_src.reg.reg);
   mov->dest.write_mask = (1 << dest_src.reg.reg->num_components) - 1;

--- a/src/glsl/nir/nir_lower_alu_to_scalar.c
+++ b/src/glsl/nir/nir_lower_alu_to_scalar.c
@@ -46,11 +46,11 @@ lower_reduction(nir_alu_instr *instr, nir_op chan_op, nir_op merge_op,
   for (unsigned i = 0; i < num_components; i++) {
      nir_alu_instr *chan = nir_alu_instr_create(mem_ctx, chan_op);
      nir_alu_ssa_dest_init(chan, 1);
-      nir_alu_src_copy(&chan->src[0], &instr->src[0], mem_ctx);
+      nir_alu_src_copy(&chan->src[0], &instr->src[0], chan);
      chan->src[0].swizzle[0] = chan->src[0].swizzle[i];
      if (nir_op_infos[chan_op].num_inputs > 1) {
         assert(nir_op_infos[chan_op].num_inputs == 2);
-         nir_alu_src_copy(&chan->src[1], &instr->src[1], mem_ctx);
+         nir_alu_src_copy(&chan->src[1], &instr->src[1], chan);
         chan->src[1].swizzle[0] = chan->src[1].swizzle[i];
      }

@@ -153,7 +153,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, void *mem_ctx)
         unsigned src_chan = (nir_op_infos[instr->op].input_sizes[i] == 1 ?
                              0 : chan);

-         nir_alu_src_copy(&lower->src[i], &instr->src[i], mem_ctx);
+         nir_alu_src_copy(&lower->src[i], &instr->src[i], lower);
         for (int j = 0; j < 4; j++)
            lower->src[i].swizzle[j] = instr->src[i].swizzle[src_chan];
      }
--- a/src/glsl/nir/nir_lower_atomics.c
+++ b/src/glsl/nir/nir_lower_atomics.c
@@ -91,7 +91,7 @@ lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl)
         nir_alu_instr *mul = nir_alu_instr_create(mem_ctx, nir_op_imul);
         nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, NULL);
         mul->dest.write_mask = 0x1;
-         nir_src_copy(&mul->src[0].src, &deref_array->indirect, mem_ctx);
+         nir_src_copy(&mul->src[0].src, &deref_array->indirect, mul);
         mul->src[1].src.is_ssa = true;
         mul->src[1].src.ssa = &atomic_counter_size->def;
         nir_instr_insert_before(&instr->instr, &mul->instr);
--- a/src/glsl/nir/nir_lower_io.c
+++ b/src/glsl/nir/nir_lower_io.c
@@ -376,7 +376,7 @@ nir_lower_io_block(nir_block *block, void *void_state)

         store->const_index[0] = offset;

-         nir_src_copy(&store->src[0], &intrin->src[0], state->mem_ctx);
+         nir_src_copy(&store->src[0], &intrin->src[0], store);

         if (has_indirect)
            store->src[1] = indirect;
--- a/src/glsl/nir/nir_lower_locals_to_regs.c
+++ b/src/glsl/nir/nir_lower_locals_to_regs.c
@@ -183,8 +183,7 @@ get_deref_reg_src(nir_deref_var *deref, nir_instr *instr,
            nir_alu_instr *add = nir_alu_instr_create(state->shader,
                                                      nir_op_iadd);
            add->src[0].src = *src.reg.indirect;
-            nir_src_copy(&add->src[1].src, &deref_array->indirect,
-                         state->shader);
+            nir_src_copy(&add->src[1].src, &deref_array->indirect, add);
            add->dest.write_mask = 1;
            nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL);
            nir_instr_insert_before(instr, &add->instr);
@@ -225,7 +224,7 @@ lower_locals_to_regs_block(nir_block *block, void *void_state)
                                     nir_src_for_ssa(&mov->dest.dest.ssa),
                                     state->shader);
         } else {
-            nir_dest_copy(&mov->dest.dest, &intrin->dest, state->shader);
+            nir_dest_copy(&mov->dest.dest, &intrin->dest, &mov->instr);
         }
         nir_instr_insert_before(&intrin->instr, &mov->instr);

@@ -241,7 +240,7 @@ lower_locals_to_regs_block(nir_block *block, void *void_state)
                                             &intrin->instr, state);

         nir_alu_instr *mov = nir_alu_instr_create(state->shader, nir_op_imov);
-         nir_src_copy(&mov->src[0].src, &intrin->src[0], state->shader);
+         nir_src_copy(&mov->src[0].src, &intrin->src[0], mov);
         mov->dest.write_mask = (1 << intrin->num_components) - 1;
         mov->dest.dest.is_ssa = false;
         mov->dest.dest.reg.reg = reg_src.reg.reg;
--- a/src/glsl/nir/nir_lower_vec_to_movs.c
+++ b/src/glsl/nir/nir_lower_vec_to_movs.c
@@ -60,8 +60,8 @@ insert_mov(nir_alu_instr *vec, unsigned start_channel,
   assert(src_idx < nir_op_infos[vec->op].num_inputs);

   nir_alu_instr *mov = nir_alu_instr_create(mem_ctx, nir_op_imov);
-   nir_alu_src_copy(&mov->src[0], &vec->src[src_idx], mem_ctx);
-   nir_alu_dest_copy(&mov->dest, &vec->dest, mem_ctx);
+   nir_alu_src_copy(&mov->src[0], &vec->src[src_idx], mov);
+   nir_alu_dest_copy(&mov->dest, &vec->dest, mov);

   mov->dest.write_mask = (1u << start_channel);
   mov->src[0].swizzle[start_channel] = vec->src[src_idx].swizzle[0];
--- a/src/glsl/nir/nir_opt_peephole_ffma.c
+++ b/src/glsl/nir/nir_opt_peephole_ffma.c
@@ -216,8 +216,7 @@ nir_opt_peephole_ffma_block(nir_block *block, void *void_state)
         for (unsigned j = 0; j < add->dest.dest.ssa.num_components; j++)
            ffma->src[i].swizzle[j] = mul->src[i].swizzle[swizzle[j]];
      }
-      nir_alu_src_copy(&ffma->src[2], &add->src[1 - add_mul_src],
-                       state->mem_ctx);
+      nir_alu_src_copy(&ffma->src[2], &add->src[1 - add_mul_src], ffma);

      assert(add->dest.dest.is_ssa);

--- a/src/glsl/nir/nir_opt_peephole_select.c
+++ b/src/glsl/nir/nir_opt_peephole_select.c
@@ -195,7 +195,7 @@ nir_opt_peephole_select_block(nir_block *block, void *void_state)

      nir_phi_instr *phi = nir_instr_as_phi(instr);
      nir_alu_instr *sel = nir_alu_instr_create(state->mem_ctx, nir_op_bcsel);
-      nir_src_copy(&sel->src[0].src, &if_stmt->condition, state->mem_ctx);
+      nir_src_copy(&sel->src[0].src, &if_stmt->condition, sel);
      /* Splat the condition to all channels */
      memset(sel->src[0].swizzle, 0, sizeof sel->src[0].swizzle);

@@ -205,7 +205,7 @@ nir_opt_peephole_select_block(nir_block *block, void *void_state)
         assert(src->src.is_ssa);

         unsigned idx = src->pred == then_block ? 1 : 2;
-         nir_src_copy(&sel->src[idx].src, &src->src, state->mem_ctx);
+         nir_src_copy(&sel->src[idx].src, &src->src, sel);
      }

      nir_ssa_dest_init(&sel->instr, &sel->dest.dest,
--- a/src/glsl/opt_constant_propagation.cpp
+++ b/src/glsl/opt_constant_propagation.cpp
@@ -40,6 +40,7 @@
 #include "ir_basic_block.h"
 #include "ir_optimization.h"
 #include "glsl_types.h"
+#include "util/hash_table.h"

 namespace {

@@ -95,7 +96,8 @@ public:
      killed_all = false;
      mem_ctx = ralloc_context(0);
      this->acp = new(mem_ctx) exec_list;
-      this->kills = new(mem_ctx) exec_list;
+      this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+                                            _mesa_key_pointer_equal);
   }
   ~ir_constant_propagation_visitor()
   {
@@ -123,7 +125,7 @@ public:
    * List of kill_entry: The masks of variables whose values were
    * killed in this block.
    */
-   exec_list *kills;
+   hash_table *kills;

   bool progress;

@@ -263,11 +265,12 @@ ir_constant_propagation_visitor::visit_enter(ir_function_signature *ir)
    * main() at link time, so they're irrelevant to us.
    */
   exec_list *orig_acp = this->acp;
-   exec_list *orig_kills = this->kills;
+   hash_table *orig_kills = this->kills;
   bool orig_killed_all = this->killed_all;

   this->acp = new(mem_ctx) exec_list;
-   this->kills = new(mem_ctx) exec_list;
+   this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+                                         _mesa_key_pointer_equal);
   this->killed_all = false;

   visit_list_elements(this, &ir->body);
@@ -352,11 +355,12 @@ void
 ir_constant_propagation_visitor::handle_if_block(exec_list *instructions)
 {
   exec_list *orig_acp = this->acp;
-   exec_list *orig_kills = this->kills;
+   hash_table *orig_kills = this->kills;
   bool orig_killed_all = this->killed_all;

   this->acp = new(mem_ctx) exec_list;
-   this->kills = new(mem_ctx) exec_list;
+   this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+                                         _mesa_key_pointer_equal);
   this->killed_all = false;

   /* Populate the initial acp with a constant of the original */
@@ -370,12 +374,14 @@ ir_constant_propagation_visitor::handle_if_block(exec_list *instructions)
      orig_acp->make_empty();
   }

-   exec_list *new_kills = this->kills;
+   hash_table *new_kills = this->kills;
   this->kills = orig_kills;
   this->acp = orig_acp;
   this->killed_all = this->killed_all || orig_killed_all;

-   foreach_in_list(kill_entry, k, new_kills) {
+   hash_entry *htk;
+   hash_table_foreach(new_kills, htk) {
+      kill_entry *k = (kill_entry *) htk->data;
      kill(k->var, k->write_mask);
   }
 }
@@ -397,7 +403,7 @@ ir_visitor_status
 ir_constant_propagation_visitor::visit_enter(ir_loop *ir)
 {
   exec_list *orig_acp = this->acp;
-   exec_list *orig_kills = this->kills;
+   hash_table *orig_kills = this->kills;
   bool orig_killed_all = this->killed_all;

   /* FINISHME: For now, the initial acp for loops is totally empty.
@@ -405,7 +411,8 @@ ir_constant_propagation_visitor::visit_enter(ir_loop *ir)
    * cloned minus the killed entries after the first run through.
    */
   this->acp = new(mem_ctx) exec_list;
-   this->kills = new(mem_ctx) exec_list;
+   this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+                                         _mesa_key_pointer_equal);
   this->killed_all = false;

   visit_list_elements(this, &ir->body_instructions);
@@ -414,12 +421,14 @@ ir_constant_propagation_visitor::visit_enter(ir_loop *ir)
      orig_acp->make_empty();
   }

-   exec_list *new_kills = this->kills;
+   hash_table *new_kills = this->kills;
   this->kills = orig_kills;
   this->acp = orig_acp;
   this->killed_all = this->killed_all || orig_killed_all;

-   foreach_in_list(kill_entry, k, new_kills) {
+   hash_entry *htk;
+   hash_table_foreach(new_kills, htk) {
+      kill_entry *k = (kill_entry *) htk->data;
      kill(k->var, k->write_mask);
   }

@@ -448,14 +457,15 @@ ir_constant_propagation_visitor::kill(ir_variable *var, unsigned write_mask)
   /* Add this writemask of the variable to the list of killed
    * variables in this block.
    */
-   foreach_in_list(kill_entry, entry, this->kills) {
-      if (entry->var == var) {
-	 entry->write_mask |= write_mask;
-	 return;
-      }
+   hash_entry *kill_hash_entry = _mesa_hash_table_search(this->kills, var);
+   if (kill_hash_entry) {
+      kill_entry *entry = (kill_entry *) kill_hash_entry->data;
+      entry->write_mask |= write_mask;
+      return;
   }
   /* Not already in the list.  Make new entry. */
-   this->kills->push_tail(new(this->mem_ctx) kill_entry(var, write_mask));
+   _mesa_hash_table_insert(this->kills, var,
+                           new(this->mem_ctx) kill_entry(var, write_mask));
 }

 /**
--- a/src/glsl/opt_constant_variable.cpp
+++ b/src/glsl/opt_constant_variable.cpp
@@ -36,11 +36,11 @@
 #include "ir_visitor.h"
 #include "ir_optimization.h"
 #include "glsl_types.h"
+#include "util/hash_table.h"

 namespace {

 struct assignment_entry {
-   exec_node link;
   int assignment_count;
   ir_variable *var;
   ir_constant *constval;
@@ -54,31 +54,32 @@ public:
   virtual ir_visitor_status visit_enter(ir_assignment *);
   virtual ir_visitor_status visit_enter(ir_call *);

-   exec_list list;
+   struct hash_table *ht;
 };

 } /* unnamed namespace */

 static struct assignment_entry *
-get_assignment_entry(ir_variable *var, exec_list *list)
+get_assignment_entry(ir_variable *var, struct hash_table *ht)
 {
+   struct hash_entry *hte = _mesa_hash_table_search(ht, var);
   struct assignment_entry *entry;

-   foreach_list_typed(struct assignment_entry, entry, link, list) {
-      if (entry->var == var)
-	 return entry;
+   if (hte) {
+      entry = (struct assignment_entry *) hte->data;
+   } else {
+      entry = (struct assignment_entry *) calloc(1, sizeof(*entry));
+      entry->var = var;
+      _mesa_hash_table_insert(ht, var, entry);
   }

-   entry = (struct assignment_entry *)calloc(1, sizeof(*entry));
-   entry->var = var;
-   list->push_head(&entry->link);
   return entry;
 }

 ir_visitor_status
 ir_constant_variable_visitor::visit(ir_variable *ir)
 {
-   struct assignment_entry *entry = get_assignment_entry(ir, &this->list);
+   struct assignment_entry *entry = get_assignment_entry(ir, this->ht);
   entry->our_scope = true;
   return visit_continue;
 }
@@ -97,7 +98,7 @@ ir_constant_variable_visitor::visit_enter(ir_assignment *ir)
   ir_constant *constval;
   struct assignment_entry *entry;

-   entry = get_assignment_entry(ir->lhs->variable_referenced(), &this->list);
+   entry = get_assignment_entry(ir->lhs->variable_referenced(), this->ht);
   assert(entry);
   entry->assignment_count++;

@@ -150,7 +151,7 @@ ir_constant_variable_visitor::visit_enter(ir_call *ir)
 	 struct assignment_entry *entry;

 	 assert(var);
-	 entry = get_assignment_entry(var, &this->list);
+	 entry = get_assignment_entry(var, this->ht);
 	 entry->assignment_count++;
      }
   }
@@ -161,7 +162,7 @@ ir_constant_variable_visitor::visit_enter(ir_call *ir)
      struct assignment_entry *entry;

      assert(var);
-      entry = get_assignment_entry(var, &this->list);
+      entry = get_assignment_entry(var, this->ht);
      entry->assignment_count++;
   }

@@ -177,20 +178,22 @@ do_constant_variable(exec_list *instructions)
   bool progress = false;
   ir_constant_variable_visitor v;

+   v.ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+                                  _mesa_key_pointer_equal);
   v.run(instructions);

-   while (!v.list.is_empty()) {
-
-      struct assignment_entry *entry;
-      entry = exec_node_data(struct assignment_entry, v.list.head, link);
+   struct hash_entry *hte;
+   hash_table_foreach(v.ht, hte) {
+      struct assignment_entry *entry = (struct assignment_entry *) hte->data;

      if (entry->assignment_count == 1 && entry->constval && entry->our_scope) {
 	 entry->var->constant_value = entry->constval;
 	 progress = true;
      }
-      entry->link.remove();
+      hte->data = NULL;
      free(entry);
   }
+   _mesa_hash_table_destroy(v.ht, NULL);

   return progress;
 }
--- a/src/mapi/Makefile.am
+++ b/src/mapi/Makefile.am
@@ -50,19 +50,14 @@ AM_CPPFLAGS =							\

 include Makefile.sources

+MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
+PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
+
 glapi_gen_mapi_deps := \
 	mapi_abi.py \
 	$(wildcard glapi/gen/*.xml) \
 	$(wildcard glapi/gen/*.py)

-# $(1): path to an XML file
-# $(2): name of the printer
-define glapi_gen_mapi
-@$(MKDIR_P) $(dir $@)
-$(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/mapi_abi.py \
-	--mode lib --printer $(2) $(1) > $@
-endef
-
 if HAVE_SHARED_GLAPI
 BUILT_SOURCES += shared-glapi/glapi_mapi_tmp.h

@@ -93,7 +88,9 @@ shared_glapi_test_LDADD = \
 endif

 shared-glapi/glapi_mapi_tmp.h : glapi/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps)
-	$(call glapi_gen_mapi,$<,shared-glapi)
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/mapi_abi.py --mode lib --printer shared-glapi \
+		$(srcdir)/glapi/gen/gl_and_es_API.xml > $@

 if HAVE_OPENGL
 noinst_LTLIBRARIES = glapi/libglapi.la
@@ -185,7 +182,9 @@ endif
 endif

 es1api/glapi_mapi_tmp.h: glapi/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps)
-	$(call glapi_gen_mapi,$<,es1api)
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/mapi_abi.py --mode lib --printer es1api \
+		$(srcdir)/glapi/gen/gl_and_es_API.xml > $@

 if HAVE_OPENGL_ES2
 TESTS += es2api/ABI-check
@@ -229,6 +228,8 @@ endif
 endif

 es2api/glapi_mapi_tmp.h: glapi/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps)
-	$(call glapi_gen_mapi,$<,es2api)
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/mapi_abi.py --mode lib --printer es2api \
+		$(srcdir)/glapi/gen/gl_and_es_API.xml > $@

 include $(top_srcdir)/install-lib-links.mk
--- a/src/mapi/glapi/gen/Makefile.am
+++ b/src/mapi/glapi/gen/Makefile.am
@@ -20,7 +20,7 @@ XORG_INDENT_FLAGS = -linux -bad -bap -blf -bli0 -cbi0 -cdw -nce -cs -i4 -lc80 -p

 MESA_DIR = $(top_builddir)/src/mesa
 MESA_GLAPI_DIR = $(top_builddir)/src/mapi/glapi
-MESA_MAPI_DIR = $(top_builddir)/src/mapi
+MESA_MAPI_DIR = $(top_srcdir)/src/mapi
 MESA_GLX_DIR = $(top_builddir)/src/glx

 MESA_GLAPI_OUTPUTS = \
@@ -209,7 +209,7 @@ COMMON = $(API_XML) \

 COMMON_GLX = $(COMMON) glX_API.xml glX_XML.py glX_proto_common.py

-PYTHON_GEN = $(AM_V_GEN) $(PYTHON2) $(PYTHON_FLAGS)
+PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)

 ######################################################################

@@ -241,65 +241,65 @@ $(XORG_GLAPI_DIR)/%.h: $(MESA_GLAPI_DIR)/%.h
 ######################################################################

 $(MESA_GLAPI_DIR)/glapi_mapi_tmp.h: $(MESA_MAPI_DIR)/mapi_abi.py $(COMMON)
-	$(PYTHON_GEN) $< \
+	$(PYTHON_GEN) $(MESA_MAPI_DIR)/mapi_abi.py \
 		--printer glapi --mode lib $(srcdir)/gl_and_es_API.xml > $@

 $(MESA_GLAPI_DIR)/glprocs.h: gl_procs.py $(COMMON)
-	$(PYTHON_GEN) $< -c -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/gl_procs.py -c -f $(srcdir)/gl_and_es_API.xml > $@

 $(MESA_GLAPI_DIR)/glapitemp.h: gl_apitemp.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/gl_apitemp.py -f $(srcdir)/gl_and_es_API.xml > $@

 $(MESA_GLAPI_DIR)/glapitable.h: gl_table.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/gl_table.py -f $(srcdir)/gl_and_es_API.xml > $@

 $(MESA_GLAPI_DIR)/glapi_gentable.c: gl_gentable.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/gl_gentable.py -f $(srcdir)/gl_and_es_API.xml > $@

 ######################################################################

 $(MESA_GLAPI_DIR)/glapi_x86.S: gl_x86_asm.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/gl_x86_asm.py -f $(srcdir)/gl_and_es_API.xml > $@

 $(MESA_GLAPI_DIR)/glapi_x86-64.S: gl_x86-64_asm.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/gl_x86-64_asm.py -f $(srcdir)/gl_and_es_API.xml > $@

 $(MESA_GLAPI_DIR)/glapi_sparc.S: gl_SPARC_asm.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/gl_SPARC_asm.py -f $(srcdir)/gl_and_es_API.xml > $@

 ######################################################################

 $(MESA_DIR)/main/enums.c: gl_enums.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/gl_enums.py -f $(srcdir)/gl_and_es_API.xml > $@

 $(MESA_DIR)/main/api_exec.c: gl_genexec.py apiexec.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/gl_genexec.py -f $(srcdir)/gl_and_es_API.xml > $@

 $(MESA_DIR)/main/dispatch.h: gl_table.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml -m remap_table > $@
+	$(PYTHON_GEN) $(srcdir)/gl_table.py -f $(srcdir)/gl_and_es_API.xml -m remap_table > $@

 $(MESA_DIR)/main/remap_helper.h: remap_helper.py $(COMMON)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_and_es_API.xml > $@
+	$(PYTHON_GEN) $(srcdir)/remap_helper.py -f $(srcdir)/gl_and_es_API.xml > $@

 ######################################################################

 $(MESA_GLX_DIR)/indirect.c: glX_proto_send.py $(COMMON_GLX)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_API.xml -m proto \
+	$(PYTHON_GEN) $(srcdir)/glX_proto_send.py -f $(srcdir)/gl_API.xml -m proto \
 	  | $(INDENT) $(INDENT_FLAGS) > $@

 $(MESA_GLX_DIR)/indirect.h: glX_proto_send.py $(COMMON_GLX)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_API.xml -m init_h > $@
+	$(PYTHON_GEN) $(srcdir)/glX_proto_send.py -f $(srcdir)/gl_API.xml -m init_h > $@

 $(MESA_GLX_DIR)/indirect_init.c: glX_proto_send.py $(COMMON_GLX)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_API.xml -m init_c > $@
+	$(PYTHON_GEN) $(srcdir)/glX_proto_send.py -f $(srcdir)/gl_API.xml -m init_c > $@

 $(MESA_GLX_DIR)/indirect_size.h $(XORG_GLX_DIR)/indirect_size.h: glX_proto_size.py $(COMMON_GLX)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_API.xml -m size_h --only-set \
+	$(PYTHON_GEN) $(srcdir)/glX_proto_size.py -f $(srcdir)/gl_API.xml -m size_h --only-set \
 	    --header-tag _INDIRECT_SIZE_H_ \
 	  | $(INDENT) $(INDENT_FLAGS) > $@

 $(MESA_GLX_DIR)/indirect_size.c: glX_proto_size.py $(COMMON_GLX)
-	$(PYTHON_GEN) $< -f $(srcdir)/gl_API.xml -m size_c --only-set \
+	$(PYTHON_GEN) $(srcdir)/glX_proto_size.py -f $(srcdir)/gl_API.xml -m size_c --only-set \
 	  | $(INDENT) $(INDENT_FLAGS) > $@

 ######################################################################
--- a/src/mesa/Makefile.am
+++ b/src/mesa/Makefile.am
@@ -90,37 +90,24 @@ CLEANFILES = \
 	program/program_parse.tab.h \
 	main/git_sha1.h.tmp

-GET_HASH_GEN = main/get_hash_generator.py
+PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)

-main/get_hash.h: ../mapi/glapi/gen/gl_and_es_API.xml main/get_hash_params.py 	\
-		 $(GET_HASH_GEN)
-	$(AM_V_GEN)set -e;						\
-	$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/$(GET_HASH_GEN)		\
-		-f $< > $@.tmp;						\
-	mv $@.tmp $@;
+main/get_hash.h: ../mapi/glapi/gen/gl_and_es_API.xml main/get_hash_params.py \
+                 main/get_hash_generator.py
+	$(PYTHON_GEN) $(srcdir)/main/get_hash_generator.py \
+		-f $(srcdir)/../mapi/glapi/gen/gl_and_es_API.xml > $@

-main/format_info.h: main/formats.csv                                    \
+main/format_info.h: main/formats.csv \
                    main/format_parser.py main/format_info.py
-	$(AM_V_GEN)set -e;						\
-	$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/main/format_info.py        \
-                   $< > $@.tmp;                                         \
-	mv $@.tmp $@;
+	$(PYTHON_GEN) $(srcdir)/main/format_info.py $(srcdir)/main/formats.csv > $@

-main/format_pack.c: main/format_pack.py main/formats.csv		\
+main/format_pack.c: main/format_pack.py main/formats.csv \
                    main/format_parser.py
-	$(AM_V_GEN)set -e;						\
-	$(PYTHON2) $(PYTHON_FLAGS)					\
-			$(srcdir)/main/format_pack.py			\
-			$(srcdir)/main/formats.csv			\
-		| $(INDENT) $(INDENT_FLAGS) > $@;
+	$(PYTHON_GEN) $(srcdir)/main/format_pack.py $(srcdir)/main/formats.csv > $@

 main/format_unpack.c: main/format_unpack.py main/formats.csv	\
                      main/format_parser.py
-	$(AM_V_GEN)set -e;						\
-	$(PYTHON2) $(PYTHON_FLAGS)					\
-			$(srcdir)/main/format_unpack.py			\
-			$(srcdir)/main/formats.csv			\
-		| $(INDENT) $(INDENT_FLAGS) > $@;
+	$(PYTHON_GEN) $(srcdir)/main/format_unpack.py $(srcdir)/main/formats.csv > $@

 main/formats.c: main/format_info.h

@@ -201,13 +188,17 @@ libmesa_sse41_la_CFLAGS = $(AM_CFLAGS) $(SSE41_CFLAGS)
 pkgconfigdir = $(libdir)/pkgconfig
 pkgconfig_DATA = gl.pc

+MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
+YACC_GEN = $(AM_V_GEN)$(YACC) $(YFLAGS)
+LEX_GEN = $(AM_V_GEN)$(LEX) $(LFLAGS)
+
 program/lex.yy.c: program/program_lexer.l
-	$(AM_V_at)$(MKDIR_P) program
-	$(AM_V_GEN) $(LEX) --never-interactive --outfile=$@ $<
+	$(MKDIR_GEN)
+	$(LEX_GEN) -o $@ $(srcdir)/program/program_lexer.l

 program/program_parse.tab.c program/program_parse.tab.h: program/program_parse.y
-	$(AM_V_at)$(MKDIR_P) program
-	$(AM_V_GEN) $(YACC) -p "_mesa_program_" -v -d --output=program/program_parse.tab.c $<
+	$(MKDIR_GEN)
+	$(YACC_GEN) -o $@ -p "_mesa_program_" --defines=$(builddir)/program/program_parse.tab.h $(srcdir)/program/program_parse.y

 if GEN_ASM_OFFSETS
 matypes.h: $(gen_matypes_SOURCES)
--- a/src/mesa/drivers/common/meta_generate_mipmap.c
+++ b/src/mesa/drivers/common/meta_generate_mipmap.c
@@ -163,7 +163,6 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target,
   const GLuint maxLevel = texObj->MaxLevel;
   const GLint maxLevelSave = texObj->MaxLevel;
   const GLboolean genMipmapSave = texObj->GenerateMipmap;
-   const GLuint currentTexUnitSave = ctx->Texture.CurrentUnit;
   const GLboolean use_glsl_version = ctx->Extensions.ARB_vertex_shader &&
                                      ctx->Extensions.ARB_fragment_shader;
   GLenum faceTarget;
@@ -202,8 +201,12 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target,
   samplerSave = ctx->Texture.Unit[ctx->Texture.CurrentUnit].Sampler ?
      ctx->Texture.Unit[ctx->Texture.CurrentUnit].Sampler->Name : 0;

-   if (currentTexUnitSave != 0)
-      _mesa_BindTexture(target, texObj->Name);
+   /* We may have been called from glGenerateTextureMipmap with CurrentUnit
+    * still set to 0, so we don't know when we can skip binding the texture.
+    * Assume that _mesa_BindTexture will be fast if we're rebinding the same
+    * texture.
+    */
+   _mesa_BindTexture(target, texObj->Name);

   if (!mipmap->Sampler) {
      _mesa_GenSamplers(1, &mipmap->Sampler);
--- a/src/mesa/drivers/common/meta_tex_subimage.c
+++ b/src/mesa/drivers/common/meta_tex_subimage.c
@@ -45,6 +45,24 @@
 #include "uniforms.h"
 #include "varray.h"

+static bool
+need_signed_unsigned_int_conversion(mesa_format mesaFormat,
+                                    GLenum format, GLenum type)
+{
+   const GLenum mesaFormatType = _mesa_get_format_datatype(mesaFormat);
+   const bool is_format_integer = _mesa_is_enum_format_integer(format);
+   return (mesaFormatType == GL_INT &&
+           is_format_integer &&
+           (type == GL_UNSIGNED_INT ||
+            type == GL_UNSIGNED_SHORT ||
+            type == GL_UNSIGNED_BYTE)) ||
+          (mesaFormatType == GL_UNSIGNED_INT &&
+           is_format_integer &&
+           (type == GL_INT ||
+            type == GL_SHORT ||
+            type == GL_BYTE));
+}
+
 static struct gl_texture_image *
 create_texture_for_pbo(struct gl_context *ctx, bool create_pbo,
                       GLenum pbo_target, int width, int height,
@@ -166,6 +184,13 @@ _mesa_meta_pbo_TexSubImage(struct gl_context *ctx, GLuint dims,
   if (ctx->_ImageTransferState)
      return false;

+   /* This function rely on BlitFramebuffer to fill in the pixel data for
+    * glTex[Sub]Image*D. But, BlitFrameBuffer doesn't support signed to
+    * unsigned or unsigned to signed integer conversions.
+    */
+   if (need_signed_unsigned_int_conversion(tex_image->TexFormat, format, type))
+      return false;
+
   /* For arrays, use a tall (height * depth) 2D texture but taking into
    * account the inter-image padding specified with the image height packing
    * property.
@@ -250,24 +275,6 @@ fail:
   return success;
 }

-static bool
-need_signed_unsigned_int_conversion(mesa_format rbFormat,
-                                    GLenum format, GLenum type)
-{
-   const GLenum srcType = _mesa_get_format_datatype(rbFormat);
-   const bool is_dst_format_integer = _mesa_is_enum_format_integer(format);
-   return (srcType == GL_INT &&
-           is_dst_format_integer &&
-           (type == GL_UNSIGNED_INT ||
-            type == GL_UNSIGNED_SHORT ||
-            type == GL_UNSIGNED_BYTE)) ||
-          (srcType == GL_UNSIGNED_INT &&
-           is_dst_format_integer &&
-           (type == GL_INT ||
-            type == GL_SHORT ||
-            type == GL_BYTE));
-}
-
 bool
 _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims,
                              struct gl_texture_image *tex_image,
--- a/src/mesa/drivers/dri/common/xmlpool/Makefile.am
+++ b/src/mesa/drivers/dri/common/xmlpool/Makefile.am
@@ -67,7 +67,7 @@ CLEANFILES = \
 	$(MOS)

 # Default target options.h
-options.h: LOCALEDIR := .
+LOCALEDIR := .
 options.h: t_options.h $(MOS)
 	$(AM_V_GEN) $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/gen_xmlpool.py $(srcdir)/t_options.h $(LOCALEDIR) $(LANGS) > options.h

--- a/src/mesa/drivers/dri/i915/intel_render.c
+++ b/src/mesa/drivers/dri/i915/intel_render.c
@@ -251,7 +251,7 @@ intel_run_render(struct gl_context * ctx, struct tnl_pipeline_stage *stage)
         continue;

      intel_render_tab_verts[prim & PRIM_MODE_MASK] (ctx, start,
-                                                     start + length, prim);
+                                                     length, prim);
   }

   tnl->Driver.Render.Finish(ctx);
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -215,6 +215,10 @@ brw_blorp_copytexsubimage(struct brw_context *brw,
   struct intel_renderbuffer *src_irb = intel_renderbuffer(src_rb);
   struct intel_texture_image *intel_image = intel_texture_image(dst_image);

+   /* No pixel transfer operations (zoom, bias, mapping), just a blit */
+   if (brw->ctx._ImageTransferState)
+      return false;
+
   /* Sync up the state of window system buffers.  We need to do this before
    * we go looking at the src renderbuffer's miptree.
    */
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -323,6 +323,15 @@ brw_initialize_context_constants(struct brw_context *brw)

   ctx->Const.StripTextureBorder = true;

+   ctx->Const.MaxUniformBlockSize = 65536;
+   for (int i = 0; i < MESA_SHADER_STAGES; i++) {
+      struct gl_program_constants *prog = &ctx->Const.Program[i];
+      prog->MaxUniformBlocks = 12;
+      prog->MaxCombinedUniformComponents =
+         prog->MaxUniformComponents +
+         ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
+   }
+
   ctx->Const.MaxDualSourceDrawBuffers = 1;
   ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
   ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = max_samplers;
--- a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
@@ -43,6 +43,7 @@
 #include "glsl/ir_visitor.h"
 #include "glsl/ir_rvalue_visitor.h"
 #include "glsl/glsl_types.h"
+#include "util/hash_table.h"

 static bool debug = false;

@@ -72,7 +73,8 @@ public:
   ir_vector_reference_visitor(void)
   {
      this->mem_ctx = ralloc_context(NULL);
-      this->variable_list.make_empty();
+      this->ht = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
+                                         _mesa_key_pointer_equal);
   }

   ~ir_vector_reference_visitor(void)
@@ -89,7 +91,7 @@ public:
   variable_entry *get_variable_entry(ir_variable *var);

   /* List of variable_entry */
-   exec_list variable_list;
+   struct hash_table *ht;

   void *mem_ctx;
 };
@@ -119,13 +121,12 @@ ir_vector_reference_visitor::get_variable_entry(ir_variable *var)
      break;
   }

-   foreach_in_list(variable_entry, entry, &variable_list) {
-      if (entry->var == var)
-	 return entry;
-   }
+   struct hash_entry *hte = _mesa_hash_table_search(ht, var);
+   if (hte)
+      return (struct variable_entry *) hte->data;

   variable_entry *entry = new(mem_ctx) variable_entry(var);
-   this->variable_list.push_tail(entry);
+   _mesa_hash_table_insert(ht, var, entry);
   return entry;
 }

@@ -195,9 +196,9 @@ ir_vector_reference_visitor::visit_enter(ir_function_signature *ir)

 class ir_vector_splitting_visitor : public ir_rvalue_visitor {
 public:
-   ir_vector_splitting_visitor(exec_list *vars)
+   ir_vector_splitting_visitor(struct hash_table *vars)
   {
-      this->variable_list = vars;
+      this->ht = vars;
   }

   virtual ir_visitor_status visit_leave(ir_assignment *);
@@ -205,7 +206,7 @@ public:
   void handle_rvalue(ir_rvalue **rvalue);
   variable_entry *get_splitting_entry(ir_variable *var);

-   exec_list *variable_list;
+   struct hash_table *ht;
 };

 variable_entry *
@@ -216,13 +217,8 @@ ir_vector_splitting_visitor::get_splitting_entry(ir_variable *var)
   if (!var->type->is_vector())
      return NULL;

-   foreach_in_list(variable_entry, entry, variable_list) {
-      if (entry->var == var) {
-	 return entry;
-      }
-   }
-
-   return NULL;
+   struct hash_entry *hte = _mesa_hash_table_search(ht, var);
+   return hte ? (struct variable_entry *) hte->data : NULL;
 }

 void
@@ -329,12 +325,15 @@ ir_vector_splitting_visitor::visit_leave(ir_assignment *ir)
 bool
 brw_do_vector_splitting(exec_list *instructions)
 {
+   struct hash_entry *hte;
+
   ir_vector_reference_visitor refs;

   visit_list_elements(&refs, instructions);

   /* Trim out variables we can't split. */
-   foreach_in_list_safe(variable_entry, entry, &refs.variable_list) {
+   hash_table_foreach(refs.ht, hte) {
+      struct variable_entry *entry = (struct variable_entry *) hte->data;
      if (debug) {
 	 fprintf(stderr, "vector %s@%p: whole_access %d\n",
                 entry->var->name, (void *) entry->var,
@@ -342,11 +341,11 @@ brw_do_vector_splitting(exec_list *instructions)
      }

      if (entry->whole_vector_access) {
-	 entry->remove();
+         _mesa_hash_table_remove(refs.ht, hte);
      }
   }

-   if (refs.variable_list.is_empty())
+   if (refs.ht->entries == 0)
      return false;

   void *mem_ctx = ralloc_context(NULL);
@@ -354,7 +353,8 @@ brw_do_vector_splitting(exec_list *instructions)
   /* Replace the decls of the vectors to be split with their split
    * components.
    */
-   foreach_in_list(variable_entry, entry, &refs.variable_list) {
+   hash_table_foreach(refs.ht, hte) {
+      struct variable_entry *entry = (struct variable_entry *) hte->data;
      const struct glsl_type *type;
      type = glsl_type::get_instance(entry->var->type->base_type, 1, 1);

@@ -378,7 +378,7 @@ brw_do_vector_splitting(exec_list *instructions)
      entry->var->remove();
   }

-   ir_vector_splitting_visitor split(&refs.variable_list);
+   ir_vector_splitting_visitor split(refs.ht);
   visit_list_elements(&split, instructions);

   ralloc_free(mem_ctx);
--- a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
+++ b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
@@ -48,6 +48,7 @@ public:

 private:
   void emit(ir_variable *, ir_rvalue *);
+   ir_variable *temp(void *ctx, const glsl_type *type, const char *name);
 };

 /**
@@ -60,6 +61,17 @@ lower_texture_grad_visitor::emit(ir_variable *var, ir_rvalue *value)
   base_ir->insert_before(assign(var, value));
 }

+/**
+ * Emit a temporary variable declaration
+ */
+ir_variable *
+lower_texture_grad_visitor::temp(void *ctx, const glsl_type *type, const char *name)
+{
+   ir_variable *var = new(ctx) ir_variable(type, name, ir_var_temporary);
+   base_ir->insert_before(var);
+   return var;
+}
+
 static const glsl_type *
 txs_type(const glsl_type *type)
 {
@@ -144,28 +156,179 @@ lower_texture_grad_visitor::visit_leave(ir_texture *ir)
      new(mem_ctx) ir_variable(grad_type, "dPdy", ir_var_temporary);
   emit(dPdy, mul(size, ir->lod_info.grad.dPdy));

-   /* Calculate rho from equation 3.20 of the GL 3.0 specification. */
-   ir_rvalue *rho;
-   if (dPdx->type->is_scalar()) {
-      rho = expr(ir_binop_max, expr(ir_unop_abs, dPdx),
-			       expr(ir_unop_abs, dPdy));
-   } else {
-      rho = expr(ir_binop_max, expr(ir_unop_sqrt, dot(dPdx, dPdx)),
-			       expr(ir_unop_sqrt, dot(dPdy, dPdy)));
-   }
-
-   /* lambda_base = log2(rho).  We're ignoring GL state biases for now.
-    *
-    * For cube maps the result of these formulas is giving us a value of rho
-    * that is twice the value we should use, so divide it by 2 or,
-    * alternatively, remove one unit from the result of the log2 computation.
-    */
   ir->op = ir_txl;
   if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) {
-      ir->lod_info.lod = expr(ir_binop_add,
-                              expr(ir_unop_log2, rho),
-                              new(mem_ctx) ir_constant(-1.0f));
+      /* Cubemap texture lookups first generate a texture coordinate normalized
+       * to [-1, 1] on the appropiate face. The appropiate face is determined
+       * by which component has largest magnitude and its sign. The texture
+       * coordinate is the quotient of the remaining texture coordinates against
+       * that absolute value of the component of largest magnitude. This
+       * division requires that the computing of the derivative of the texel
+       * coordinate must use the quotient rule. The high level GLSL code is as
+       * follows:
+       *
+       * Step 1: selection
+       *
+       * vec3 abs_p, Q, dQdx, dQdy;
+       * abs_p = abs(ir->coordinate);
+       * if (abs_p.x >= max(abs_p.y, abs_p.z)) {
+       *    Q = ir->coordinate.yzx;
+       *    dQdx = ir->lod_info.grad.dPdx.yzx;
+       *    dQdy = ir->lod_info.grad.dPdy.yzx;
+       * }
+       * if (abs_p.y >= max(abs_p.x, abs_p.z)) {
+       *    Q = ir->coordinate.xzy;
+       *    dQdx = ir->lod_info.grad.dPdx.xzy;
+       *    dQdy = ir->lod_info.grad.dPdy.xzy;
+       * }
+       * if (abs_p.z >= max(abs_p.x, abs_p.y)) {
+       *    Q = ir->coordinate;
+       *    dQdx = ir->lod_info.grad.dPdx;
+       *    dQdy = ir->lod_info.grad.dPdy;
+       * }
+       *
+       * Step 2: use quotient rule to compute derivative. The normalized to
+       * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are
+       * only concerned with the magnitudes of the derivatives whose values are
+       * not affected by the sign. We drop the sign from the computation.
+       *
+       * vec2 dx, dy;
+       * float recip;
+       *
+       * recip = 1.0 / Q.z;
+       * dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) );
+       * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) );
+       *
+       * Step 3: compute LOD. At this point we have the derivatives of the
+       * texture coordinates normalized to [-1,1]. We take the LOD to be
+       *  result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L)
+       *         = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L)
+       *         = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L)
+       *         = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy))))
+       *         = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy)))
+       * where L is the dimension of the cubemap. The code is:
+       *
+       * float M, result;
+       * M = max(dot(dx, dx), dot(dy, dy));
+       * L = textureSize(sampler, 0).x;
+       * result = -1.0 + 0.5 * log2(L * L * M);
+       */
+
+/* Helpers to make code more human readable. */
+#define EMIT(instr) base_ir->insert_before(instr)
+#define THEN(irif, instr) irif->then_instructions.push_tail(instr)
+#define CLONE(x) x->clone(mem_ctx, NULL)
+
+      ir_variable *abs_p = temp(mem_ctx, glsl_type::vec3_type, "abs_p");
+
+      EMIT(assign(abs_p, swizzle_for_size(abs(CLONE(ir->coordinate)), 3)));
+
+      ir_variable *Q = temp(mem_ctx, glsl_type::vec3_type, "Q");
+      ir_variable *dQdx = temp(mem_ctx, glsl_type::vec3_type, "dQdx");
+      ir_variable *dQdy = temp(mem_ctx, glsl_type::vec3_type, "dQdy");
+
+      /* unmodified dPdx, dPdy values */
+      ir_rvalue *dPdx = ir->lod_info.grad.dPdx;
+      ir_rvalue *dPdy = ir->lod_info.grad.dPdy;
+
+      /* 1. compute selector */
+
+      /* if (abs_p.x >= max(abs_p.y, abs_p.z))  ... */
+      ir_if *branch_x =
+         new(mem_ctx) ir_if(gequal(swizzle_x(abs_p),
+                                   max2(swizzle_y(abs_p), swizzle_z(abs_p))));
+
+      /* Q = p.yzx;
+       * dQdx = dPdx.yzx;
+       * dQdy = dPdy.yzx;
+       */
+      int yzx = MAKE_SWIZZLE4(SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, 0);
+      THEN(branch_x, assign(Q, swizzle(CLONE(ir->coordinate), yzx, 3)));
+      THEN(branch_x, assign(dQdx, swizzle(CLONE(dPdx), yzx, 3)));
+      THEN(branch_x, assign(dQdy, swizzle(CLONE(dPdy), yzx, 3)));
+      EMIT(branch_x);
+
+      /* if (abs_p.y >= max(abs_p.x, abs_p.z)) */
+      ir_if *branch_y =
+         new(mem_ctx) ir_if(gequal(swizzle_y(abs_p),
+                                   max2(swizzle_x(abs_p), swizzle_z(abs_p))));
+
+      /* Q = p.xzy;
+       * dQdx = dPdx.xzy;
+       * dQdy = dPdy.xzy;
+       */
+      int xzy = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Z, SWIZZLE_Y, 0);
+      THEN(branch_y, assign(Q, swizzle(CLONE(ir->coordinate), xzy, 3)));
+      THEN(branch_y, assign(dQdx, swizzle(CLONE(dPdx), xzy, 3)));
+      THEN(branch_y, assign(dQdy, swizzle(CLONE(dPdy), xzy, 3)));
+      EMIT(branch_y);
+
+      /* if (abs_p.z >= max(abs_p.x, abs_p.y)) */
+      ir_if *branch_z =
+         new(mem_ctx) ir_if(gequal(swizzle_z(abs_p),
+                            max2(swizzle_x(abs_p), swizzle_y(abs_p))));
+
+      /* Q = p;
+       * dQdx = dPdx;
+       * dQdy = dPdy;
+       */
+      THEN(branch_z, assign(Q, swizzle_for_size(CLONE(ir->coordinate), 3)));
+      THEN(branch_z, assign(dQdx, CLONE(dPdx)));
+      THEN(branch_z, assign(dQdy, CLONE(dPdy)));
+      EMIT(branch_z);
+
+      /* 2. quotient rule */
+      ir_variable *recip = temp(mem_ctx, glsl_type::float_type, "recip");
+      EMIT(assign(recip, div(new(mem_ctx) ir_constant(1.0f), swizzle_z(Q))));
+
+      ir_variable *dx = temp(mem_ctx, glsl_type::vec2_type, "dx");
+      ir_variable *dy = temp(mem_ctx, glsl_type::vec2_type, "dy");
+
+      /* tmp = Q.xy * recip;
+       * dx = recip * ( dQdx.xy - (tmp * dQdx.z) );
+       * dy = recip * ( dQdy.xy - (tmp * dQdy.z) );
+       */
+      ir_variable *tmp = temp(mem_ctx, glsl_type::vec2_type, "tmp");
+      EMIT(assign(tmp, mul(swizzle_xy(Q), recip)));
+      EMIT(assign(dx, mul(recip, sub(swizzle_xy(dQdx),
+                                     mul(tmp, swizzle_z(dQdx))))));
+      EMIT(assign(dy, mul(recip, sub(swizzle_xy(dQdy),
+                                     mul(tmp, swizzle_z(dQdy))))));
+
+      /* M = max(dot(dx, dx), dot(dy, dy)); */
+      ir_variable *M = temp(mem_ctx, glsl_type::float_type, "M");
+      EMIT(assign(M, max2(dot(dx, dx), dot(dy, dy))));
+
+      /* size has textureSize() of LOD 0 */
+      ir_variable *L = temp(mem_ctx, glsl_type::float_type, "L");
+      EMIT(assign(L, swizzle_x(size)));
+
+      ir_variable *result = temp(mem_ctx, glsl_type::float_type, "result");
+
+      /* result = -1.0 + 0.5 * log2(L * L * M); */
+      EMIT(assign(result,
+                  add(new(mem_ctx)ir_constant(-1.0f),
+                      mul(new(mem_ctx)ir_constant(0.5f),
+                          expr(ir_unop_log2, mul(mul(L, L), M))))));
+
+      /* 3. final assignment of parameters to textureLod call */
+      ir->lod_info.lod = new (mem_ctx) ir_dereference_variable(result);
+
+#undef THEN
+#undef EMIT
+
   } else {
+      /* Calculate rho from equation 3.20 of the GL 3.0 specification. */
+      ir_rvalue *rho;
+      if (dPdx->type->is_scalar()) {
+         rho = expr(ir_binop_max, expr(ir_unop_abs, dPdx),
+                    expr(ir_unop_abs, dPdy));
+      } else {
+         rho = expr(ir_binop_max, expr(ir_unop_sqrt, dot(dPdx, dPdx)),
+                    expr(ir_unop_sqrt, dot(dPdy, dPdy)));
+      }
+
+      /* lambda_base = log2(rho).  We're ignoring GL state biases for now. */
      ir->lod_info.lod = expr(ir_unop_log2, rho);
   }

--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -950,6 +950,14 @@ vec4_instruction::can_reswizzle(int dst_writemask,
   if (mlen > 0)
      return false;

+   /* We can't use swizzles on the accumulator and that's really the only
+    * HW_REG we would care to reswizzle so just disallow them all.
+    */
+   for (int i = 0; i < 3; i++) {
+      if (src[i].file == HW_REG)
+         return false;
+   }
+
   return true;
 }

@@ -1053,6 +1061,17 @@ vec4_visitor::opt_register_coalesce()
               }
            }

+            /* This doesn't handle saturation on the instruction we
+             * want to coalesce away if the register types do not match.
+             * But if scan_inst is a non type-converting 'mov', we can fix
+             * the types later.
+             */
+            if (inst->saturate &&
+                inst->dst.type != scan_inst->dst.type &&
+                !(scan_inst->opcode == BRW_OPCODE_MOV &&
+                  scan_inst->dst.type == scan_inst->src[0].type))
+               break;
+
            /* If we can't handle the swizzle, bail. */
            if (!scan_inst->can_reswizzle(inst->dst.writemask,
                                          inst->src[0].swizzle,
@@ -1128,6 +1147,16 @@ vec4_visitor::opt_register_coalesce()
 	       scan_inst->dst.file = inst->dst.file;
 	       scan_inst->dst.reg = inst->dst.reg;
 	       scan_inst->dst.reg_offset = inst->dst.reg_offset;
+               if (inst->saturate &&
+                   inst->dst.type != scan_inst->dst.type) {
+                  /* If we have reached this point, scan_inst is a non
+                   * type-converting 'mov' and we can modify its register types
+                   * to match the ones in inst. Otherwise, we could have an
+                   * incorrect saturation result.
+                   */
+                  scan_inst->dst.type = inst->dst.type;
+                  scan_inst->src[0].type = inst->src[0].type;
+               }
 	       scan_inst->saturate |= inst->saturate;
 	    }
 	    scan_inst = (vec4_instruction *)scan_inst->next;
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -456,7 +456,7 @@ void
 vec4_visitor::nir_emit_load_const(nir_load_const_instr *instr)
 {
   dst_reg reg = dst_reg(GRF, alloc.allocate(1));
-   reg.type =  BRW_REGISTER_TYPE_F;
+   reg.type =  BRW_REGISTER_TYPE_D;

   unsigned remaining = brw_writemask_for_size(instr->def.num_components);

@@ -477,7 +477,7 @@ vec4_visitor::nir_emit_load_const(nir_load_const_instr *instr)
      }

      reg.writemask = writemask;
-      emit(MOV(reg, src_reg(instr->value.f[i])));
+      emit(MOV(reg, src_reg(instr->value.i[i])));

      remaining &= ~writemask;
   }
--- a/src/mesa/drivers/dri/i965/intel_tex_copy.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_copy.c
@@ -55,6 +55,10 @@ intel_copy_texsubimage(struct brw_context *brw,
   const GLenum internalFormat = intelImage->base.Base.InternalFormat;
   bool ret;

+   /* No pixel transfer operations (zoom, bias, mapping), just a blit */
+   if (brw->ctx._ImageTransferState)
+      return false;
+
   intel_prepare_render(brw);

   /* glCopyTexSubImage() can be called on a multisampled renderbuffer (if
--- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
@@ -446,7 +446,7 @@ static GLboolean radeon_run_render( struct gl_context *ctx,
 		 start, start+length);

      if (length)
-	 tab[prim & PRIM_MODE_MASK]( ctx, start, start + length, prim );
+         tab[prim & PRIM_MODE_MASK](ctx, start, length, prim);
   }

   tnl->Driver.Render.Finish( ctx );
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -3595,7 +3595,16 @@ _mesa_get_framebuffer_attachment_parameter(struct gl_context *ctx,

   switch (pname) {
   case GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE_EXT:
-      *params = _mesa_is_winsys_fbo(buffer)
+      /* From the OpenGL spec, 9.2. Binding and Managing Framebuffer Objects:
+       *
+       * "If the value of FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE is NONE, then
+       *  either no framebuffer is bound to target; or the default framebuffer
+       *  is bound, attachment is DEPTH or STENCIL, and the number of depth or
+       *  stencil bits, respectively, is zero."
+       */
+      *params = (_mesa_is_winsys_fbo(buffer) &&
+                 ((attachment != GL_DEPTH && attachment != GL_STENCIL) ||
+                  (att->Type != GL_NONE)))
         ? GL_FRAMEBUFFER_DEFAULT : att->Type;
      return;
   case GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME_EXT:
--- a/src/mesa/main/glformats.c
+++ b/src/mesa/main/glformats.c
@@ -494,7 +494,8 @@ _mesa_bytes_per_pixel(GLenum format, GLenum type)
      else
         return -1;
   case GL_UNSIGNED_INT_24_8_EXT:
-      if (format == GL_DEPTH_STENCIL_EXT)
+      if (format == GL_DEPTH_COMPONENT ||
+          format == GL_DEPTH_STENCIL_EXT)
         return sizeof(GLuint);
      else
         return -1;
@@ -1691,6 +1692,10 @@ _mesa_error_check_format_and_type(const struct gl_context *ctx,
      return GL_INVALID_OPERATION;

   case GL_UNSIGNED_INT_24_8:
+      /* Depth buffer OK to read in OpenGL ES (NV_read_depth). */
+      if (ctx->API == API_OPENGLES2 && format == GL_DEPTH_COMPONENT)
+         return GL_NO_ERROR;
+
      if (format != GL_DEPTH_STENCIL) {
         return GL_INVALID_OPERATION;
      }
--- a/src/mesa/main/readpix.c
+++ b/src/mesa/main/readpix.c
@@ -963,6 +963,7 @@ read_pixels_es3_error_check(GLenum format, GLenum type,
            return GL_NO_ERROR;
         break;
      case GL_UNSIGNED_SHORT:
+      case GL_UNSIGNED_INT:
      case GL_UNSIGNED_INT_24_8:
         if (!is_float_depth)
            return GL_NO_ERROR;
--- a/src/mesa/main/texcompress_bptc.c
+++ b/src/mesa/main/texcompress_bptc.c
@@ -1291,7 +1291,8 @@ _mesa_texstore_bptc_rgba_unorm(TEXSTORE_PARAMS)
      tempImageSlices[0] = (GLubyte *) tempImage;
      _mesa_texstore(ctx, dims,
                     baseInternalFormat,
-                     MESA_FORMAT_R8G8B8A8_UNORM,
+                     _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM
+                                           : MESA_FORMAT_A8B8G8R8_UNORM,
                     rgbaRowStride, tempImageSlices,
                     srcWidth, srcHeight, srcDepth,
                     srcFormat, srcType, srcAddr,
--- a/src/mesa/main/texcompress_fxt1.c
+++ b/src/mesa/main/texcompress_fxt1.c
@@ -130,7 +130,8 @@ _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
      tempImageSlices[0] = (GLubyte *) tempImage;
      _mesa_texstore(ctx, dims,
                     baseInternalFormat,
-                     MESA_FORMAT_R8G8B8A8_UNORM,
+                     _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM
+                                           : MESA_FORMAT_A8B8G8R8_UNORM,
                     rgbaRowStride, tempImageSlices,
                     srcWidth, srcHeight, srcDepth,
                     srcFormat, srcType, srcAddr,
--- a/src/mesa/main/texcompress_rgtc.c
+++ b/src/mesa/main/texcompress_rgtc.c
@@ -196,9 +196,11 @@ _mesa_texstore_rg_rgtc2(TEXSTORE_PARAMS)
          dstFormat == MESA_FORMAT_LA_LATC2_UNORM);

   if (baseInternalFormat == GL_RG)
-      tempFormat = MESA_FORMAT_R8G8_UNORM;
+      tempFormat = _mesa_little_endian() ? MESA_FORMAT_R8G8_UNORM
+                                         : MESA_FORMAT_G8R8_UNORM;
   else
-      tempFormat = MESA_FORMAT_L8A8_UNORM;
+      tempFormat = _mesa_little_endian() ? MESA_FORMAT_L8A8_UNORM
+                                         : MESA_FORMAT_A8L8_UNORM;

   rgRowStride = 2 * srcWidth * sizeof(GLubyte);
   tempImage = malloc(srcWidth * srcHeight * 2 * sizeof(GLubyte));
--- a/src/mesa/main/texcompress_s3tc.c
+++ b/src/mesa/main/texcompress_s3tc.c
@@ -198,7 +198,8 @@ _mesa_texstore_rgba_dxt1(TEXSTORE_PARAMS)
      tempImageSlices[0] = (GLubyte *) tempImage;
      _mesa_texstore(ctx, dims,
                     baseInternalFormat,
-                     MESA_FORMAT_R8G8B8A8_UNORM,
+                     _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM
+                                           : MESA_FORMAT_A8B8G8R8_UNORM,
                     rgbaRowStride, tempImageSlices,
                     srcWidth, srcHeight, srcDepth,
                     srcFormat, srcType, srcAddr,
@@ -255,7 +256,8 @@ _mesa_texstore_rgba_dxt3(TEXSTORE_PARAMS)
      tempImageSlices[0] = (GLubyte *) tempImage;
      _mesa_texstore(ctx, dims,
                     baseInternalFormat,
-                     MESA_FORMAT_R8G8B8A8_UNORM,
+                     _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM
+                                           : MESA_FORMAT_A8B8G8R8_UNORM,
                     rgbaRowStride, tempImageSlices,
                     srcWidth, srcHeight, srcDepth,
                     srcFormat, srcType, srcAddr,
@@ -311,7 +313,8 @@ _mesa_texstore_rgba_dxt5(TEXSTORE_PARAMS)
      tempImageSlices[0] = (GLubyte *) tempImage;
      _mesa_texstore(ctx, dims,
                     baseInternalFormat,
-                     MESA_FORMAT_R8G8B8A8_UNORM,
+                     _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM
+                                           : MESA_FORMAT_A8B8G8R8_UNORM,
                     rgbaRowStride, tempImageSlices,
                     srcWidth, srcHeight, srcDepth,
                     srcFormat, srcType, srcAddr,
--- a/src/mesa/state_tracker/st_atom_texture.c
+++ b/src/mesa/state_tracker/st_atom_texture.c
@@ -274,8 +274,8 @@ st_create_texture_sampler_view_from_stobj(struct pipe_context *pipe,
         return NULL;
      size = MIN2(stObj->pt->width0 - base, (unsigned)stObj->base.BufferSize);

-      f = ((base * 8) / desc->block.bits) * desc->block.width;
-      n = ((size * 8) / desc->block.bits) * desc->block.width;
+      f = (base / (desc->block.bits / 8)) * desc->block.width;
+      n = (size / (desc->block.bits / 8)) * desc->block.width;
      if (!n)
         return NULL;
      templ.u.buf.first_element = f;
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -873,8 +873,13 @@ void st_init_extensions(struct pipe_screen *screen,

   consts->MaxViewports = screen->get_param(screen, PIPE_CAP_MAX_VIEWPORTS);
   if (consts->MaxViewports >= 16) {
-      consts->ViewportBounds.Min = -16384.0;
-      consts->ViewportBounds.Max = 16384.0;
+      if (glsl_feature_level >= 400) {
+         consts->ViewportBounds.Min = -32768.0;
+         consts->ViewportBounds.Max = 32767.0;
+      } else {
+         consts->ViewportBounds.Min = -16384.0;
+         consts->ViewportBounds.Max = 16383.0;
+      }
      extensions->ARB_viewport_array = GL_TRUE;
      extensions->ARB_fragment_layer_viewport = GL_TRUE;
      if (extensions->AMD_vertex_shader_layer)
--- a/src/mesa/state_tracker/st_format.c
+++ b/src/mesa/state_tracker/st_format.c
@@ -1270,46 +1270,40 @@ static const struct format_mapping format_map[] = {
   /* 32-bit float formats */
   {
      { GL_RGBA32F_ARB, 0 },
-      { PIPE_FORMAT_R32G32B32A32_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+      { PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
   },
   {
      { GL_RGB32F_ARB, 0 },
      { PIPE_FORMAT_R32G32B32_FLOAT, PIPE_FORMAT_R32G32B32X32_FLOAT,
-        PIPE_FORMAT_R32G32B32A32_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+        PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
   },
   {
      { GL_LUMINANCE_ALPHA32F_ARB, 0 },
-      { PIPE_FORMAT_L32A32_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT,
-        PIPE_FORMAT_L16A16_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+      { PIPE_FORMAT_L32A32_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
   },
   {
      { GL_ALPHA32F_ARB, 0 },
      { PIPE_FORMAT_A32_FLOAT, PIPE_FORMAT_L32A32_FLOAT,
-        PIPE_FORMAT_R32G32B32A32_FLOAT, PIPE_FORMAT_A16_FLOAT,
-        PIPE_FORMAT_L16A16_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+        PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
   },
   {
      { GL_INTENSITY32F_ARB, 0 },
      { PIPE_FORMAT_I32_FLOAT, PIPE_FORMAT_L32A32_FLOAT,
-        PIPE_FORMAT_R32G32B32A32_FLOAT, PIPE_FORMAT_I16_FLOAT,
-        PIPE_FORMAT_L16A16_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+        PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
   },
   {
      { GL_LUMINANCE32F_ARB, 0 },
      { PIPE_FORMAT_L32_FLOAT, PIPE_FORMAT_L32A32_FLOAT,
-        PIPE_FORMAT_R32G32B32A32_FLOAT, PIPE_FORMAT_L16_FLOAT,
-        PIPE_FORMAT_L16A16_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+        PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
   },
   {
      { GL_R32F, 0 },
      { PIPE_FORMAT_R32_FLOAT, PIPE_FORMAT_R32G32_FLOAT,
-        PIPE_FORMAT_R32G32B32A32_FLOAT, PIPE_FORMAT_R16_FLOAT,
-        PIPE_FORMAT_R16G16_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+        PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
   },
   {
      { GL_RG32F, 0 },
-      { PIPE_FORMAT_R32G32_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT,
-        PIPE_FORMAT_R16G16_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+      { PIPE_FORMAT_R32G32_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
   },

   /* R, RG formats */
--- a/src/mesa/tnl_dd/t_dd_dmatmp.h
+++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
@@ -121,9 +121,9 @@ static void TAG(render_points_verts)( struct gl_context *ctx,
      if (currentsz < 8)
 	 currentsz = dmasz;

-      for (j = start; j < count; j += nr ) {
+      for (j = 0; j < count; j += nr) {
 	 nr = MIN2( currentsz, count - j );
-	 TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
+         TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
 	 currentsz = dmasz;
      }

@@ -148,7 +148,7 @@ static void TAG(render_lines_verts)( struct gl_context *ctx,

      /* Emit whole number of lines in total and in each buffer:
       */
-      count -= (count-start) & 1;
+      count -= count & 1;
      currentsz = GET_CURRENT_VB_MAX_VERTS();
      currentsz -= currentsz & 1;
      dmasz -= dmasz & 1;
@@ -156,9 +156,9 @@ static void TAG(render_lines_verts)( struct gl_context *ctx,
      if (currentsz < 8)
 	 currentsz = dmasz;

-      for (j = start; j < count; j += nr ) {
+      for (j = 0; j < count; j += nr) {
 	 nr = MIN2( currentsz, count - j );
-	 TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
+         TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
 	 currentsz = dmasz;
      }

@@ -186,9 +186,9 @@ static void TAG(render_line_strip_verts)( struct gl_context *ctx,
      if (currentsz < 8)
 	 currentsz = dmasz;

-      for (j = start; j + 1 < count; j += nr - 1 ) {
+      for (j = 0; j + 1 < count; j += nr - 1 ) {
 	 nr = MIN2( currentsz, count - j );
-	 TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
+         TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
 	 currentsz = dmasz;
      }
 
@@ -214,10 +214,7 @@ static void TAG(render_line_loop_verts)( struct gl_context *ctx,

      INIT( GL_LINE_STRIP );

-      if (flags & PRIM_BEGIN)
-	 j = start;
-      else
-	 j = start + 1;
+      j = (flags & PRIM_BEGIN) ? 0 : 1;

      /* Ensure last vertex won't wrap buffers:
       */
@@ -234,23 +231,23 @@ static void TAG(render_line_loop_verts)( struct gl_context *ctx,
 	    nr = MIN2( currentsz, count - j );

 	    if (j + nr >= count &&
-		start < count - 1 && 
+		count > 1 &&
 		(flags & PRIM_END)) 
 	    {
 	       void *tmp;
 	       tmp = ALLOC_VERTS(nr+1);
-	       tmp = TAG(emit_verts)( ctx, j, nr, tmp );
+               tmp = TAG(emit_verts)(ctx, start + j, nr, tmp);
 	       tmp = TAG(emit_verts)( ctx, start, 1, tmp );
 	       (void) tmp;
 	    }
 	    else {
-	       TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
+               TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
 	       currentsz = dmasz;
 	    }
 	 }

      }
-      else if (start + 1 < count && (flags & PRIM_END)) {
+      else if (count > 1 && (flags & PRIM_END)) {
 	 void *tmp;
 	 tmp = ALLOC_VERTS(2);
 	 tmp = TAG(emit_verts)( ctx, start+1, 1, tmp );
@@ -284,14 +281,14 @@ static void TAG(render_triangles_verts)( struct gl_context *ctx,
   /* Emit whole number of tris in total.  dmasz is already a multiple
    * of 3.
    */
-   count -= (count-start)%3;
+   count -= count % 3;

   if (currentsz < 8)
      currentsz = dmasz;

-   for (j = start; j < count; j += nr) {
+   for (j = 0; j < count; j += nr) {
      nr = MIN2( currentsz, count - j );
-      TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
+      TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
      currentsz = dmasz;
   }
 }
@@ -322,9 +319,9 @@ static void TAG(render_tri_strip_verts)( struct gl_context *ctx,
      dmasz -= (dmasz & 1);
      currentsz -= (currentsz & 1);

-      for (j = start ; j + 2 < count; j += nr - 2 ) {
+      for (j = 0; j + 2 < count; j += nr - 2) {
 	 nr = MIN2( currentsz, count - j );
-	 TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
+         TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
 	 currentsz = dmasz;
      }

@@ -354,12 +351,12 @@ static void TAG(render_tri_fan_verts)( struct gl_context *ctx,
 	 currentsz = dmasz;
      }

-      for (j = start + 1 ; j + 1 < count; j += nr - 2 ) {
+      for (j = 1; j + 1 < count; j += nr - 2) {
 	 void *tmp;
 	 nr = MIN2( currentsz, count - j + 1 );
 	 tmp = ALLOC_VERTS( nr );
 	 tmp = TAG(emit_verts)( ctx, start, 1, tmp );
-	 tmp = TAG(emit_verts)( ctx, j, nr - 1, tmp );
+         tmp = TAG(emit_verts)( ctx, start + j, nr - 1, tmp );
 	 (void) tmp;
 	 currentsz = dmasz;
      }
@@ -394,12 +391,12 @@ static void TAG(render_poly_verts)( struct gl_context *ctx,
 	 currentsz = dmasz;
      }

-      for (j = start + 1 ; j + 1 < count ; j += nr - 2 ) {
+      for (j = 1 ; j + 1 < count ; j += nr - 2 ) {
 	 void *tmp;
 	 nr = MIN2( currentsz, count - j + 1 );
 	 tmp = ALLOC_VERTS( nr );
 	 tmp = TAG(emit_verts)( ctx, start, 1, tmp );
-	 tmp = TAG(emit_verts)( ctx, j, nr - 1, tmp );
+         tmp = TAG(emit_verts)(ctx, start + j, nr - 1, tmp);
 	 (void) tmp;
 	 currentsz = dmasz;
      }
@@ -437,9 +434,9 @@ static void TAG(render_quad_strip_verts)( struct gl_context *ctx,
      dmasz -= (dmasz & 2);
      currentsz -= (currentsz & 2);

-      for (j = start ; j + 3 < count; j += nr - 2 ) {
+      for (j = 0; j + 3 < count; j += nr - 2 ) {
 	 nr = MIN2( currentsz, count - j );
-	 TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
+         TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
 	 currentsz = dmasz;
      }

@@ -465,7 +462,7 @@ static void TAG(render_quad_strip_verts)( struct gl_context *ctx,
 	 /* Emit whole number of quads in total, and in each buffer.
 	  */
 	 dmasz -= dmasz & 1;
-	 count -= (count-start) & 1;
+	 count -= count & 1;
 	 currentsz -= currentsz & 1;

 	 if (currentsz < 12)
@@ -474,14 +471,14 @@ static void TAG(render_quad_strip_verts)( struct gl_context *ctx,
 	 currentsz = currentsz/6*2;
 	 dmasz = dmasz/6*2;

-	 for (j = start; j + 3 < count; j += nr - 2 ) {
+	 for (j = 0; j + 3 < count; j += nr - 2) {
 	    nr = MIN2( currentsz, count - j );
 	    if (nr >= 4) {
 	       GLint quads = (nr/2)-1;
 	       GLint i;
 	       ELTS_VARS( ALLOC_ELTS( quads*6 ) );

-	       for ( i = j-start ; i < j-start+quads*2 ; i+=2 ) {
+               for (i = j; i < j + quads * 2; i += 2) {
 		  EMIT_TWO_ELTS( 0, (i+0), (i+1) );
 		  EMIT_TWO_ELTS( 2, (i+2), (i+1) );
 		  EMIT_TWO_ELTS( 4, (i+3), (i+2) );
@@ -519,15 +516,15 @@ static void TAG(render_quad_strip_verts)( struct gl_context *ctx,
      dmasz -= dmasz & 1;
      currentsz = GET_CURRENT_VB_MAX_VERTS();
      currentsz -= currentsz & 1;
-      count -= (count-start) & 1;
+      count -= count & 1;

      if (currentsz < 8) {
 	 currentsz = dmasz;
      }

-      for (j = start; j + 3 < count; j += nr - 2 ) {
+      for (j = 0; j + 3 < count; j += nr - 2) {
 	 nr = MIN2( currentsz, count - j );
-	 TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
+         TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
 	 currentsz = dmasz;
      }

@@ -545,6 +542,9 @@ static void TAG(render_quads_verts)( struct gl_context *ctx,
 				     GLuint count,
 				     GLuint flags )
 {
+   /* Emit whole number of quads in total. */
+   count -= count & 3;
+
   if (HAVE_QUADS) {
      LOCAL_VARS;
      int dmasz = (GET_SUBSEQUENT_VB_MAX_VERTS()/4) * 4;
@@ -553,18 +553,13 @@ static void TAG(render_quads_verts)( struct gl_context *ctx,

      INIT(GL_QUADS);

-      /* Emit whole number of quads in total.  dmasz is already a multiple
-       * of 4.
-       */
-      count -= (count-start)%4;
-
      currentsz = (GET_CURRENT_VB_MAX_VERTS()/4) * 4;
      if (currentsz < 8)
         currentsz = dmasz;

-      for (j = start; j < count; j += nr) {
+      for (j = 0; j < count; j += nr) {
         nr = MIN2( currentsz, count - j );
-         TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
+         TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
         currentsz = dmasz;
      }
   }
@@ -587,7 +582,6 @@ static void TAG(render_quads_verts)( struct gl_context *ctx,
      /* Emit whole number of quads in total, and in each buffer.
       */
      dmasz -= dmasz & 3;
-      count -= (count-start) & 3;
      currentsz -= currentsz & 3;

      /* Adjust for rendering as triangles:
@@ -598,14 +592,14 @@ static void TAG(render_quads_verts)( struct gl_context *ctx,
      if (currentsz < 8)
 	 currentsz = dmasz;

-      for (j = start; j < count; j += nr ) {
+      for (j = 0; j < count; j += nr ) {
 	 nr = MIN2( currentsz, count - j );
 	 if (nr >= 4) {
 	    GLint quads = nr/4;
 	    GLint i;
 	    ELTS_VARS( ALLOC_ELTS( quads*6 ) );

-	    for ( i = j-start ; i < j-start+quads*4 ; i+=4 ) {
+            for (i = j; i < j + quads * 4; i += 4) {
 	       EMIT_TWO_ELTS( 0, (i+0), (i+1) );
 	       EMIT_TWO_ELTS( 2, (i+3), (i+1) );
 	       EMIT_TWO_ELTS( 4, (i+2), (i+3) );
@@ -629,15 +623,15 @@ static void TAG(render_quads_verts)( struct gl_context *ctx,

      INIT(GL_TRIANGLES);

-      for (j = start; j < count-3; j += 4) {
+      for (j = 0; j + 3 < count; j += 4) {
 	 void *tmp = ALLOC_VERTS( 6 );
 	 /* Send v0, v1, v3
 	  */
-	 tmp = EMIT_VERTS(ctx, j,     2, tmp);
-	 tmp = EMIT_VERTS(ctx, j + 3, 1, tmp);
+	 tmp = EMIT_VERTS(ctx, start + j,     2, tmp);
+	 tmp = EMIT_VERTS(ctx, start + j + 3, 1, tmp);
 	 /* Send v1, v2, v3
 	  */
-	 tmp = EMIT_VERTS(ctx, j + 1, 3, tmp);
+	 tmp = EMIT_VERTS(ctx, start + j + 1, 3, tmp);
 	 (void) tmp;
      }
   }
@@ -698,9 +692,9 @@ static void TAG(render_points_elts)( struct gl_context *ctx,
      if (currentsz < 8)
 	 currentsz = dmasz;

-      for (j = start; j < count; j += nr ) {
+      for (j = 0; j < count; j += nr ) {
 	 nr = MIN2( currentsz, count - j );
-	 TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
+         TAG(emit_elts)(ctx, elts + start + j, nr, ALLOC_ELTS(nr));
 	 FLUSH();
 	 currentsz = dmasz;
      }
@@ -728,7 +722,7 @@ static void TAG(render_lines_elts)( struct gl_context *ctx,

      /* Emit whole number of lines in total and in each buffer:
       */
-      count -= (count-start) & 1;
+      count -= count & 1;
      currentsz -= currentsz & 1;
      dmasz -= dmasz & 1;

@@ -736,9 +730,9 @@ static void TAG(render_lines_elts)( struct gl_context *ctx,
      if (currentsz < 8)
 	 currentsz = dmasz;

-      for (j = start; j < count; j += nr ) {
+      for (j = 0; j < count; j += nr ) {
 	 nr = MIN2( currentsz, count - j );
-	 TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
+         TAG(emit_elts)(ctx, elts + start + j, nr, ALLOC_ELTS(nr));
 	 FLUSH();
 	 currentsz = dmasz;
      }
@@ -768,9 +762,9 @@ static void TAG(render_line_strip_elts)( struct gl_context *ctx,
      if (currentsz < 8)
 	 currentsz = dmasz;

-      for (j = start; j + 1 < count; j += nr - 1 ) {
+      for (j = 0; j + 1 < count; j += nr - 1) {
 	 nr = MIN2( currentsz, count - j );
-	 TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
+         TAG(emit_elts)( ctx, elts + start + j, nr, ALLOC_ELTS(nr));
 	 FLUSH();
 	 currentsz = dmasz;
      }
@@ -798,10 +792,7 @@ static void TAG(render_line_loop_elts)( struct gl_context *ctx,
      FLUSH();
      ELT_INIT( GL_LINE_STRIP );

-      if (flags & PRIM_BEGIN)
-	 j = start;
-      else
-	 j = start + 1;
+      j = (flags & PRIM_BEGIN) ? 0 : 1;

      currentsz = GET_CURRENT_VB_MAX_ELTS();
      if (currentsz < 8) {
@@ -818,23 +809,23 @@ static void TAG(render_line_loop_elts)( struct gl_context *ctx,
 	    nr = MIN2( currentsz, count - j );

 	    if (j + nr >= count &&
-		start < count - 1 && 
+		count > 1 &&
 		(flags & PRIM_END)) 
 	    {
 	       void *tmp;
 	       tmp = ALLOC_ELTS(nr+1);
-	       tmp = TAG(emit_elts)( ctx, elts+j, nr, tmp );
+               tmp = TAG(emit_elts)(ctx, elts + start + j, nr, tmp);
 	       tmp = TAG(emit_elts)( ctx, elts+start, 1, tmp );
 	       (void) tmp;
 	    }
 	    else {
-	       TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
+               TAG(emit_elts)(ctx, elts + start + j, nr, ALLOC_ELTS(nr));
 	       currentsz = dmasz;
 	    }
 	 }

      }
-      else if (start + 1 < count && (flags & PRIM_END)) {
+      else if (count > 1 && (flags & PRIM_END)) {
 	 void *tmp;
 	 tmp = ALLOC_ELTS(2);
 	 tmp = TAG(emit_elts)( ctx, elts+start+1, 1, tmp );
@@ -874,14 +865,14 @@ static void TAG(render_triangles_elts)( struct gl_context *ctx,
   /* Emit whole number of tris in total.  dmasz is already a multiple
    * of 3.
    */
-   count -= (count-start)%3;
+   count -= count % 3;
   currentsz -= currentsz%3;
   if (currentsz < 8)
      currentsz = dmasz;

-   for (j = start; j < count; j += nr) {
+   for (j = 0; j < count; j += nr) {
      nr = MIN2( currentsz, count - j );
-      TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
+      TAG(emit_elts)(ctx, elts + start + j, nr, ALLOC_ELTS(nr));
      FLUSH();
      currentsz = dmasz;
   }
@@ -914,9 +905,9 @@ static void TAG(render_tri_strip_elts)( struct gl_context *ctx,
      dmasz -= (dmasz & 1);
      currentsz -= (currentsz & 1);

-      for (j = start ; j + 2 < count; j += nr - 2 ) {
+      for (j = 0; j + 2 < count; j += nr - 2) {
 	 nr = MIN2( currentsz, count - j );
-	 TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
+	 TAG(emit_elts)( ctx, elts + start + j, nr, ALLOC_ELTS(nr) );
 	 FLUSH();
 	 currentsz = dmasz;
      }
@@ -947,12 +938,12 @@ static void TAG(render_tri_fan_elts)( struct gl_context *ctx,
 	 currentsz = dmasz;
      }

-      for (j = start + 1 ; j + 1 < count; j += nr - 2 ) {
+      for (j = 1; j + 1 < count; j += nr - 2) {
 	 void *tmp;
 	 nr = MIN2( currentsz, count - j + 1 );
 	 tmp = ALLOC_ELTS( nr );
 	 tmp = TAG(emit_elts)( ctx, elts+start, 1, tmp );
-	 tmp = TAG(emit_elts)( ctx, elts+j, nr - 1, tmp );
+         tmp = TAG(emit_elts)(ctx, elts + start + j, nr - 1, tmp);
 	 (void) tmp;
 	 FLUSH();
 	 currentsz = dmasz;
@@ -985,12 +976,12 @@ static void TAG(render_poly_elts)( struct gl_context *ctx,
 	 currentsz = dmasz;
      }

-      for (j = start + 1 ; j + 1 < count; j += nr - 2 ) {
+      for (j = 1 ; j + 1 < count; j += nr - 2) {
 	 void *tmp;
 	 nr = MIN2( currentsz, count - j + 1 );
 	 tmp = ALLOC_ELTS( nr );
 	 tmp = TAG(emit_elts)( ctx, elts+start, 1, tmp );
-	 tmp = TAG(emit_elts)( ctx, elts+j, nr - 1, tmp );
+         tmp = TAG(emit_elts)(ctx, elts + start + j, nr - 1, tmp);
 	 (void) tmp;
 	 FLUSH();
 	 currentsz = dmasz;
@@ -1023,7 +1014,7 @@ static void TAG(render_quad_strip_elts)( struct gl_context *ctx,
      /* Emit whole number of quads in total, and in each buffer.
       */
      dmasz -= dmasz & 1;
-      count -= (count-start) & 1;
+      count -= count & 1;
      currentsz -= currentsz & 1;

      if (currentsz < 12)
@@ -1035,7 +1026,7 @@ static void TAG(render_quad_strip_elts)( struct gl_context *ctx,
 	 currentsz = currentsz/6*2;
 	 dmasz = dmasz/6*2;

-	 for (j = start; j + 3 < count; j += nr - 2 ) {
+         for (j = 0; j + 3 < count; j += nr - 2) {
 	    nr = MIN2( currentsz, count - j );

 	    if (nr >= 4)
@@ -1044,7 +1035,7 @@ static void TAG(render_quad_strip_elts)( struct gl_context *ctx,
 	       GLint quads = (nr/2)-1;
 	       ELTS_VARS( ALLOC_ELTS( quads*6 ) );

-	       for ( i = j-start ; i < j-start+quads ; i++, elts += 2 ) {
+               for (i = j; i < j + quads; i++, elts += 2) {
 		  EMIT_TWO_ELTS( 0, elts[0], elts[1] );
 		  EMIT_TWO_ELTS( 2, elts[2], elts[1] );
 		  EMIT_TWO_ELTS( 4, elts[3], elts[2] );
@@ -1060,9 +1051,9 @@ static void TAG(render_quad_strip_elts)( struct gl_context *ctx,
      else {
 	 ELT_INIT( GL_TRIANGLE_STRIP );

-	 for (j = start; j + 3 < count; j += nr - 2 ) {
+         for (j = 0; j + 3 < count; j += nr - 2) {
 	    nr = MIN2( currentsz, count - j );
-	    TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
+            TAG(emit_elts)(ctx, elts + start + j, nr, ALLOC_ELTS(nr));
 	    FLUSH();
 	    currentsz = dmasz;
 	 }
@@ -1076,6 +1067,9 @@ static void TAG(render_quads_elts)( struct gl_context *ctx,
 				    GLuint count,
 				    GLuint flags )
 {
+   /* Emit whole number of quads in total. */
+   count -= count & 3;
+
   if (HAVE_QUADS) {
      LOCAL_VARS;
      GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
@@ -1088,14 +1082,12 @@ static void TAG(render_quads_elts)( struct gl_context *ctx,

      currentsz = GET_CURRENT_VB_MAX_ELTS()/4*4;

-      count -= (count-start)%4;
-
      if (currentsz < 8)
 	 currentsz = dmasz;

-      for (j = start; j < count; j += nr) {
+      for (j = 0; j < count; j += nr) {
 	 nr = MIN2( currentsz, count - j );
-	 TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
+         TAG(emit_elts)(ctx, elts + start + j, nr, ALLOC_ELTS(nr));
 	 FLUSH();
 	 currentsz = dmasz;
      }
@@ -1112,7 +1104,6 @@ static void TAG(render_quads_elts)( struct gl_context *ctx,
      /* Emit whole number of quads in total, and in each buffer.
       */
      dmasz -= dmasz & 3;
-      count -= (count-start) & 3;
      currentsz -= currentsz & 3;

      /* Adjust for rendering as triangles:
@@ -1123,7 +1114,7 @@ static void TAG(render_quads_elts)( struct gl_context *ctx,
      if (currentsz < 8)
 	 currentsz = dmasz;

-      for (j = start; j + 3 < count; j += nr - 2 ) {
+      for (j = 0; j + 3 < count; j += nr - 2) {
 	 nr = MIN2( currentsz, count - j );

 	 if (nr >= 4)
@@ -1132,7 +1123,7 @@ static void TAG(render_quads_elts)( struct gl_context *ctx,
 	    GLint i;
 	    ELTS_VARS( ALLOC_ELTS( quads * 6 ) );

-	    for ( i = j-start ; i < j-start+quads ; i++, elts += 4 ) {
+	    for (i = j; i < j + quads; i++, elts += 4) {
 	       EMIT_TWO_ELTS( 0, elts[0], elts[1] );
 	       EMIT_TWO_ELTS( 2, elts[3], elts[1] );
 	       EMIT_TWO_ELTS( 4, elts[2], elts[3] );
--- a/src/util/Makefile.am
+++ b/src/util/Makefile.am
@@ -52,5 +52,7 @@ BUILT_SOURCES = $(MESA_UTIL_GENERATED_FILES)
 CLEANFILES = $(BUILT_SOURCES)
 EXTRA_DIST = format_srgb.py SConscript

-format_srgb.c: $(srcdir)/format_srgb.py
-	$(AM_V_GEN) $(PYTHON2) $< > $@
+PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
+
+format_srgb.c: format_srgb.py
+	$(PYTHON_GEN) $(srcdir)/format_srgb.py > $@