docs: add sha256 checksums for 10.6.9

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
docs: add release notes for 10.6.9
2015-10-03 13:16:18 +01:00 · 2015-10-03 12:37:31 +01:00 · 2015-10-03 12:34:16 +01:00 · 2015-10-03 12:31:17 +01:00 · 2015-10-03 12:31:11 +01:00 · 2015-10-03 12:31:06 +01:00
79 changed files with 1293 additions and 370 deletions
--- a/2
+++ b/2
@@ -1 +1 @@
-10.6.5
+10.6.9
--- a/bin/.cherry-ignore
+++ b/bin/.cherry-ignore
@@ -0,0 +1,8 @@
+# The vec4 nir i965 work landed after the 10.6 branchpoint
+b8d2263c83d29f4626ac0fe0316978aa6262aefb i965/vec4_nir: Load constants as integers
+
+# The issue/commit it fixes has landed post 10.6 branchpoint
+afa1efdc8522d987e3af7c7a6272021caa33eb82 mesa: fix errors when reading depth with glReadPixels
+
+# The issue/commit is fixes has landed post 10.6 branchpoint
+7f8815bcb9af9b4b374ad7bd6e7cfa7529a6c980 i965: fix textureGrad for cubemaps
--- a/bin/get-pick-list.sh
+++ b/bin/get-pick-list.sh
@@ -14,7 +14,7 @@ git log --reverse --grep="cherry picked from commit" origin/master..HEAD |\
 	sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked

 # Grep for commits that were marked as a candidate for the stable tree.
-git log --reverse --pretty=%H -i --grep='^\([[:space:]]*NOTE: .*[Cc]andidate\|CC:.*mesa-stable\)' HEAD..origin/master |\
+git log --reverse --pretty=%H -i --grep='^\([[:space:]]*NOTE: .*[Cc]andidate\|CC:.*10\.6.*mesa-stable\)' HEAD..origin/master |\
 while read sha
 do
 	# Check to see whether the patch is on the ignore list.
--- a/configure.ac
+++ b/configure.ac
@@ -1150,6 +1150,16 @@ AC_SUBST(GLX_TLS, ${GLX_USE_TLS})
 AS_IF([test "x$GLX_USE_TLS" = xyes -a "x$ax_pthread_ok" = xyes],
      [DEFINES="${DEFINES} -DGLX_USE_TLS"])

+dnl Read-only text section on x86 hardened platforms
+AC_ARG_ENABLE([glx-read-only-text],
+    [AS_HELP_STRING([--enable-glx-read-only-text],
+        [Disable writable .text section on x86 (decreases performance) @<:@default=disabled@:>@])],
+    [enable_glx_read_only_text="$enableval"],
+    [enable_glx_read_only_text=no])
+if test "x$enable_glx_read_only_text" = xyes; then
+    DEFINES="$DEFINES -DGLX_X86_READONLY_TEXT"
+fi
+
 dnl
 dnl More DRI setup
 dnl
@@ -1515,6 +1525,10 @@ if test "x$enable_nine" = xyes; then
    if test "x$with_gallium_drivers" = xswrast; then
        AC_MSG_ERROR([nine requires at least one non-swrast gallium driver])
    fi
+    if test $GCC_VERSION_MAJOR -lt 4 -o $GCC_VERSION_MAJOR -eq 4 -a $GCC_VERSION_MINOR -lt 6; then
+        AC_MSG_ERROR([gcc >= 4.6 is required to build nine])
+    fi
+
    if test "x$enable_dri3" = xno; then
        AC_MSG_WARN([using nine together with wine requires DRI3 enabled system])
    fi
--- a/docs/relnotes/10.6.5.html
+++ b/docs/relnotes/10.6.5.html
@@ -31,7 +31,8 @@ because compatibility contexts are not supported.

 <h2>SHA256 checksums</h2>
 <pre>
-TBD
+afe290fc7af75a25df5ee52396a9f09e5dba85fb3e159304bdda265b8564b0d4  mesa-10.6.5.tar.gz
+fb6fac3c85bcfa9d06b8dd439169f23f0c0924a88e44362e738b99b1feff762f  mesa-10.6.5.tar.xz
 </pre>


--- a/docs/relnotes/10.6.6.html
+++ b/docs/relnotes/10.6.6.html
@@ -0,0 +1,164 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.6.6 Release Notes / September 04, 2015</h1>
+
+<p>
+Mesa 10.6.6 is a bug fix release which fixes bugs found since the 10.6.5 release.
+</p>
+<p>
+Mesa 10.6.6 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+416517aa9df4791f97d34451a9e4da33c966afcd18c115c5769b92b15b018ef5  mesa-10.6.6.tar.gz
+570f2154b7340ff5db61ff103bc6e85165b8958798b78a50fa2df488e98e5778  mesa-10.6.6.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=84677">Bug 84677</a> - Triangle disappears with glPolygonMode GL_LINE</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90734">Bug 90734</a> - glBufferSubData is corrupting data when buffer is &gt; 32k</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90748">Bug 90748</a> - [BDW Bisected]dEQP-GLES3.functional.fbo.completeness.renderable.texture.depth.rg_half_float_oes fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90902">Bug 90902</a> - [bsw][regression] dEQP: &quot;Found invalid pixel values&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90925">Bug 90925</a> - &quot;high fidelity&quot;: Segfault in _mesa_program_resource_find_name</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91254">Bug 91254</a> - (regresion) video using VA-API on Intel slow and freeze system with mesa 10.6 or 10.6.1</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91292">Bug 91292</a> - [BDW+] glVertexAttribDivisor not working in combination with glPolygonMode</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91673">Bug 91673</a> - Segfault when calling glTexSubImage2D on storage texture to bound FBO</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91726">Bug 91726</a> - R600 asserts in tgsi_cmp/make_src_for_op3</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Chris Wilson (2):</p>
+<ul>
+  <li>i965: Prevent coordinate overflow in intel_emit_linear_blit</li>
+  <li>i965: Always re-emit the pipeline select during invariant state emission</li>
+</ul>
+
+<p>Daniel Scharrer (1):</p>
+<ul>
+  <li>mesa: add missing queries for ARB_direct_state_access</li>
+</ul>
+
+<p>Dave Airlie (8):</p>
+<ul>
+  <li>mesa/arb_gpu_shader_fp64: add support for glGetUniformdv</li>
+  <li>mesa/texgetimage: fix missing stencil check</li>
+  <li>st/readpixels: fix accel path for skipimages.</li>
+  <li>texcompress_s3tc/fxt1: fix stride checks (v1.1)</li>
+  <li>mesa/readpixels: check strides are equal before skipping conversion</li>
+  <li>mesa: enable texture stencil8 for multisample</li>
+  <li>r600/sb: update last_cf for finalize if.</li>
+  <li>r600g: fix calculation for gpr allocation</li>
+</ul>
+
+<p>David Heidelberg (1):</p>
+<ul>
+  <li>st/nine: Require gcc &gt;= 4.6</li>
+</ul>
+
+<p>Emil Velikov (2):</p>
+<ul>
+  <li>docs: add sha256 checksums for 10.6.5</li>
+  <li>get-pick-list.sh: Require explicit "10.6" for nominating stable patches</li>
+</ul>
+
+<p>Glenn Kennard (4):</p>
+<ul>
+  <li>r600g: Fix assert in tgsi_cmp</li>
+  <li>r600g/sb: Handle undef in read port tracker</li>
+  <li>r600g/sb: Don't read junk after EOP</li>
+  <li>r600g/sb: Don't crash on empty if jump target</li>
+</ul>
+
+<p>Ilia Mirkin (5):</p>
+<ul>
+  <li>st/mesa: fix assignments with 4-operand arguments (i.e. BFI)</li>
+  <li>st/mesa: pass through 4th opcode argument in bitmap/pixel visitors</li>
+  <li>nv50,nvc0: disable depth bounds test on blit</li>
+  <li>nv50: fix 2d engine blits for 64- and 128-bit formats</li>
+  <li>mesa: only copy the requested teximage faces</li>
+</ul>
+
+<p>Jason Ekstrand (1):</p>
+<ul>
+  <li>i965/fs: Split VGRFs after lowering pull constants</li>
+</ul>
+
+<p>Kenneth Graunke (3):</p>
+<ul>
+  <li>i965: Fix copy propagation type changes.</li>
+  <li>Revert "i965: Advertise a line width of 40.0 on Cherryview and Skylake."</li>
+  <li>i965: Momentarily pretend to support ARB_texture_stencil8 for blits.</li>
+</ul>
+
+<p>Marek Olšák (3):</p>
+<ul>
+  <li>gallium/radeon: fix the ADDRESS_HI mask for EVENT_WRITE CIK packets</li>
+  <li>mesa: create multisample fallback textures like normal textures</li>
+  <li>radeonsi: fix a Unigine Heaven hang when drirc is missing</li>
+</ul>
+
+<p>Matt Turner (1):</p>
+<ul>
+  <li>i965/fs: Handle MRF destinations in lower_integer_multiplication().</li>
+</ul>
+
+<p>Neil Roberts (2):</p>
+<ul>
+  <li>i965: Swap the order of the vertex ID and edge flag attributes</li>
+  <li>i965/bdw: Fix 3DSTATE_VF_INSTANCING when the edge flag is used</li>
+</ul>
+
+<p>Tapani Pälli (5):</p>
+<ul>
+  <li>mesa: update fbo state in glTexStorage</li>
+  <li>glsl: build stageref mask using IR, not symbol table</li>
+  <li>glsl: expose build_program_resource_list function</li>
+  <li>glsl: create program resource list after LinkShader</li>
+  <li>mesa: add GL_RED, GL_RG support for floating point textures</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/10.6.7.html
+++ b/docs/relnotes/10.6.7.html
@@ -0,0 +1,75 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.6.7 Release Notes / September 10, 2015</h1>
+
+<p>
+Mesa 10.6.7 is a bug fix release which fixes bugs found since the 10.6.6 release.
+</p>
+<p>
+Mesa 10.6.7 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+4ba10c59abee30d72476543a57afd2f33803dabf4620dc333b335d47966ff842  mesa-10.6.7.tar.gz
+feb1f640b915dada88a7c793dfaff0ae23580f8903f87a6b76469253de0d28d8  mesa-10.6.7.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90751">Bug 90751</a> - [BDW Bisected]dEQP-GLES3.functional.fbo.completeness.renderable.texture.stencil.stencil_index8 fails</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Dave Airlie (1):</p>
+<ul>
+  <li>mesa/teximage: use correct extension for accept stencil texture.</li>
+</ul>
+
+<p>Emil Velikov (3):</p>
+<ul>
+  <li>docs: add sha256 checksums for 10.6.6</li>
+  <li>Revert "i965: Momentarily pretend to support ARB_texture_stencil8 for blits."</li>
+  <li>Update version to 10.6.7</li>
+</ul>
+
+<p>Kenneth Graunke (1):</p>
+<ul>
+  <li>glsl: Handle attribute aliasing in attribute storage limit check.</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/10.6.8.html
+++ b/docs/relnotes/10.6.8.html
@@ -0,0 +1,136 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.6.8 Release Notes / September 20, 2015</h1>
+
+<p>
+Mesa 10.6.8 is a bug fix release which fixes bugs found since the 10.6.7 release.
+</p>
+<p>
+Mesa 10.6.8 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+1f34dba2a8059782e3e4e0f18b9628004e253b2c69085f735b846d2e63c9e250  mesa-10.6.8.tar.gz
+e36ee5ceeadb3966fb5ce5b4cf18322dbb76a4f075558ae49c3bba94f57d58fd  mesa-10.6.8.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90621">Bug 90621</a> - Mesa fail to build from git</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91526">Bug 91526</a> - World of Warcraft (on Wine) has UI corruption with nouveau</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91719">Bug 91719</a> - [SNB,HSW,BYT] dEQP regressions associated with using NIR for vertex shaders</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Alejandro Piñeiro (1):</p>
+<ul>
+  <li>i965/vec4: fill src_reg type using the constructor type parameter</li>
+</ul>
+
+<p>Antia Puentes (1):</p>
+<ul>
+  <li>i965/vec4: Fix saturation errors when coalescing registers</li>
+</ul>
+
+<p>Emil Velikov (2):</p>
+<ul>
+  <li>docs: add sha256 checksums for 10.6.7</li>
+  <li>cherry-ignore: add commit non applicable for 10.6</li>
+</ul>
+
+<p>Hans de Goede (4):</p>
+<ul>
+  <li>nv30: Fix creation of scanout buffers</li>
+  <li>nv30: Implement color resolve for msaa</li>
+  <li>nv30: Fix max width / height checks in nv30 sifm code</li>
+  <li>nv30: Disable msaa unless requested from the env by NV30_MAX_MSAA</li>
+</ul>
+
+<p>Ian Romanick (2):</p>
+<ul>
+  <li>mesa: Pass the type to _mesa_uniform_matrix as a glsl_base_type</li>
+  <li>mesa: Don't allow wrong type setters for matrix uniforms</li>
+</ul>
+
+<p>Ilia Mirkin (5):</p>
+<ul>
+  <li>st/mesa: don't fall back to 16F when 32F is requested</li>
+  <li>nvc0: always emit a full shader colormask</li>
+  <li>nvc0: remove BGRA4 format support</li>
+  <li>st/mesa: avoid integer overflows with buffers &gt;= 512MB</li>
+  <li>nv50, nvc0: fix max texture buffer size to 128M elements</li>
+</ul>
+
+<p>Jason Ekstrand (1):</p>
+<ul>
+  <li>i965/vec4: Don't reswizzle hardware registers</li>
+</ul>
+
+<p>Jose Fonseca (1):</p>
+<ul>
+  <li>gallivm: Workaround LLVM PR23628.</li>
+</ul>
+
+<p>Kenneth Graunke (1):</p>
+<ul>
+  <li>i965: Momentarily pretend to support ARB_texture_stencil8 for blits.</li>
+</ul>
+
+<p>Oded Gabbay (1):</p>
+<ul>
+  <li>llvmpipe: convert double to long long instead of unsigned long long</li>
+</ul>
+
+<p>Ray Strode (1):</p>
+<ul>
+  <li>gbm: convert gbm bo format to fourcc format on dma-buf import</li>
+</ul>
+
+<p>Ulrich Weigand (1):</p>
+<ul>
+  <li>mesa: Fix texture compression on big-endian systems</li>
+</ul>
+
+<p>Vinson Lee (1):</p>
+<ul>
+  <li>gallivm: Do not use NoFramePointerElim with LLVM 3.7.</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/10.6.9.html
+++ b/docs/relnotes/10.6.9.html
@@ -0,0 +1,130 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.6.9 Release Notes / Octover 03, 2015</h1>
+
+<p>
+Mesa 10.6.9 is a bug fix release which fixes bugs found since the 10.6.8 release.
+</p>
+<p>
+Mesa 10.6.9 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+3406876aac67546d0c3e2cb97da330b62644c313e7992b95618662e13c54296a  mesa-10.6.9.tar.gz
+b04c4de6280b863babc2929573da17218d92e9e4ba6272d548d135415723e8c3  mesa-10.6.9.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=38109">Bug 38109</a> - i915 driver crashes if too few vertices are submitted (Mesa 7.10.2)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=55552">Bug 55552</a> - Compile errors with --enable-mangling</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=86281">Bug 86281</a> - brw_meta_fast_clear (brw=brw&#64;entry=0x7fffd4097a08, fb=fb&#64;entry=0x7fffd40fa900, buffers=buffers&#64;entry=2, partial_clear=partial_clear&#64;entry=false)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91970">Bug 91970</a> - [BSW regression] dEQP-GLES3.functional.shaders.precision.int.highp_mul_vertex</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92072">Bug 92072</a> - Wine breakage since d082c5324 (st/mesa: don't call st_validate_state in BlitFramebuffer)</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Brian Paul (1):</p>
+<ul>
+  <li>st/mesa: try PIPE_BIND_RENDER_TARGET when choosing float texture formats</li>
+</ul>
+
+<p>Chris Wilson (1):</p>
+<ul>
+  <li>i965: Remove early release of DRI2 miptree</li>
+</ul>
+
+<p>Emil Velikov (4):</p>
+<ul>
+  <li>docs: add sha256 checksums for 10.6.8</li>
+  <li>cherry-ignore: add commit non applicable for 10.6</li>
+  <li>cherry-ignore: add commit non applicable for 10.6</li>
+  <li>Update version to 10.6.9</li>
+</ul>
+
+<p>Iago Toral Quiroga (1):</p>
+<ul>
+  <li>mesa: Fix GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE for default framebuffer.</li>
+</ul>
+
+<p>Ian Romanick (5):</p>
+<ul>
+  <li>t_dd_dmatmp: Make "count" actually be the count</li>
+  <li>t_dd_dmatmp: Clean up improper code formatting from previous patch</li>
+  <li>t_dd_dmatmp: Use '&amp; 3' instead of '% 4' everywhere</li>
+  <li>t_dd_dmatmp: Pull out common 'count -= count &amp; 3' code</li>
+  <li>t_dd_dmatmp: Use addition instead of subtraction in loop bounds</li>
+</ul>
+
+<p>Jeremy Huddleston (1):</p>
+<ul>
+  <li>configure.ac: Add support to enable read-only text segment on x86.</li>
+</ul>
+
+<p>Kristian Høgsberg Kristensen (1):</p>
+<ul>
+  <li>i965: Respect stride and subreg_offset for ATTR registers</li>
+</ul>
+
+<p>Kyle Brenneman (3):</p>
+<ul>
+  <li>glx: Fix build errors with --enable-mangling (v2)</li>
+  <li>mapi: Make _glapi_get_stub work with "gl" or "mgl" prefix.</li>
+  <li>glx: Don't hard-code the name "libGL.so.1" in driOpenDriver (v3)</li>
+</ul>
+
+<p>Leo Liu (1):</p>
+<ul>
+  <li>radeon/vce: fix vui time_scale zero error</li>
+</ul>
+
+<p>Marek Olšák (1):</p>
+<ul>
+  <li>st/mesa: fix front buffer regression after dropping st_validate_state in Blit</li>
+</ul>
+
+<p>Roland Scheidegger (1):</p>
+<ul>
+  <li>mesa: fix mipmap generation for immutable, compressed textures</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/src/gallium/auxiliary/gallivm/lp_bld_const.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_const.c
@@ -311,7 +311,7 @@ lp_build_const_elem(struct gallivm_state *gallivm,
   else {
      double dscale = lp_const_scale(type);

-      elem = LLVMConstInt(elem_type, round(val*dscale), 0);
+      elem = LLVMConstInt(elem_type, (long long) round(val*dscale), 0);
   }

   return elem;
--- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
@@ -277,7 +277,9 @@ disassemble(const void* func, llvm::raw_ostream & Out)
   options.StackAlignmentOverride = 4;
 #endif
 #if defined(DEBUG) || defined(PROFILE)
+#if HAVE_LLVM < 0x0307
   options.NoFramePointerElim = true;
+#endif
 #endif
   OwningPtr<TargetMachine> TM(T->createTargetMachine(Triple, sys::getHostCPUName(), "", options));

--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -50,6 +50,12 @@

 #include <stddef.h>

+// Workaround http://llvm.org/PR23628
+#if HAVE_LLVM >= 0x0307
+#  pragma push_macro("DEBUG")
+#  undef DEBUG
+#endif
+
 #include <llvm-c/Core.h>
 #include <llvm-c/ExecutionEngine.h>
 #include <llvm/Target/TargetOptions.h>
@@ -70,6 +76,11 @@
 #include <llvm/IR/Module.h>
 #include <llvm/Support/CBindingWrapping.h>

+// Workaround http://llvm.org/PR23628
+#if HAVE_LLVM >= 0x0307
+#  pragma pop_macro("DEBUG")
+#endif
+
 #include "pipe/p_config.h"
 #include "util/u_debug.h"
 #include "util/u_cpu_detect.h"
@@ -439,7 +450,9 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
 #if HAVE_LLVM < 0x0304
   options.NoFramePointerElimNonLeaf = true;
 #endif
+#if HAVE_LLVM < 0x0307
   options.NoFramePointerElim = true;
+#endif
 #endif

   builder.setEngineKind(EngineKind::JIT)
--- a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
@@ -28,6 +28,7 @@
 #include "util/u_surface.h"

 #include "nv_m2mf.xml.h"
+#include "nv_object.xml.h"
 #include "nv30/nv30_screen.h"
 #include "nv30/nv30_context.h"
 #include "nv30/nv30_resource.h"
@@ -144,21 +145,18 @@ nv30_resource_copy_region(struct pipe_context *pipe,
   nv30_transfer_rect(nv30, NEAREST, &src, &dst);
 }

-void
-nv30_resource_resolve(struct pipe_context *pipe,
-                      const struct pipe_resolve_info *info)
+static void
+nv30_resource_resolve(struct nv30_context *nv30,
+                      const struct pipe_blit_info *info)
 {
-#if 0
-   struct nv30_context *nv30 = nv30_context(pipe);
   struct nv30_rect src, dst;

-   define_rect(info->src.res, 0, 0, info->src.x0, info->src.y0,
-               info->src.x1 - info->src.x0, info->src.y1 - info->src.y0, &src);
-   define_rect(info->dst.res, info->dst.level, 0, info->dst.x0, info->dst.y0,
-               info->dst.x1 - info->dst.x0, info->dst.y1 - info->dst.y0, &dst);
+   define_rect(info->src.resource, 0, info->src.box.z, info->src.box.x,
+      info->src.box.y, info->src.box.width, info->src.box.height, &src);
+   define_rect(info->dst.resource, 0, info->dst.box.z, info->dst.box.x,
+      info->dst.box.y, info->dst.box.width, info->dst.box.height, &dst);

   nv30_transfer_rect(nv30, BILINEAR, &src, &dst);
-#endif
 }

 void
@@ -172,7 +170,7 @@ nv30_blit(struct pipe_context *pipe,
       info.dst.resource->nr_samples <= 1 &&
       !util_format_is_depth_or_stencil(info.src.resource->format) &&
       !util_format_is_pure_integer(info.src.resource->format)) {
-      debug_printf("nv30: color resolve unimplemented\n");
+      nv30_resource_resolve(nv30, blit_info);
      return;
   }

@@ -362,6 +360,7 @@ nv30_miptree_create(struct pipe_screen *pscreen,
   blocksz = util_format_get_blocksize(pt->format);

   if ((pt->target == PIPE_TEXTURE_RECT) ||
+       (pt->bind & PIPE_BIND_SCANOUT) ||
       !util_is_power_of_two(pt->width0) ||
       !util_is_power_of_two(pt->height0) ||
       !util_is_power_of_two(pt->depth0) ||
@@ -369,6 +368,14 @@ nv30_miptree_create(struct pipe_screen *pscreen,
       util_format_is_float(pt->format) || mt->ms_mode) {
      mt->uniform_pitch = util_format_get_nblocksx(pt->format, w) * blocksz;
      mt->uniform_pitch = align(mt->uniform_pitch, 64);
+      if (pt->bind & PIPE_BIND_SCANOUT) {
+         struct nv30_screen *screen = nv30_screen(pscreen);
+         int pitch_align = MAX2(
+               screen->eng3d->oclass >= NV40_3D_CLASS ? 1024 : 256,
+               /* round_down_pow2(mt->uniform_pitch / 4) */
+               1 << (util_last_bit(mt->uniform_pitch / 4) - 1));
+         mt->uniform_pitch = align(mt->uniform_pitch, pitch_align);
+      }
   }

   if (!mt->uniform_pitch)
--- a/src/gallium/drivers/nouveau/nv30/nv30_resource.h
+++ b/src/gallium/drivers/nouveau/nv30/nv30_resource.h
@@ -65,9 +65,6 @@ nv30_resource_copy_region(struct pipe_context *pipe,
                          struct pipe_resource *src, unsigned src_level,
                          const struct pipe_box *src_box);

-void
-nv30_resource_resolve(struct pipe_context *, const struct pipe_resolve_info *);
-
 void
 nv30_blit(struct pipe_context *pipe,
          const struct pipe_blit_info *blit_info);
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -310,8 +310,9 @@ nv30_screen_is_format_supported(struct pipe_screen *pscreen,
                                unsigned sample_count,
                                unsigned bindings)
 {
-   if (sample_count > 4)
+   if (sample_count > nv30_screen(pscreen)->max_sample_count)
      return FALSE;
+
   if (!(0x00000017 & (1 << sample_count)))
      return FALSE;

@@ -441,6 +442,23 @@ nv30_screen_create(struct nouveau_device *dev)
      return NULL;
   }

+   /*
+    * Some modern apps try to use msaa without keeping in mind the
+    * restrictions on videomem of older cards. Resulting in dmesg saying:
+    * [ 1197.850642] nouveau E[soffice.bin[3785]] fail ttm_validate
+    * [ 1197.850648] nouveau E[soffice.bin[3785]] validating bo list
+    * [ 1197.850654] nouveau E[soffice.bin[3785]] validate: -12
+    *
+    * Because we are running out of video memory, after which the program
+    * using the msaa visual freezes, and eventually the entire system freezes.
+    *
+    * To work around this we do not allow msaa visauls by default and allow
+    * the user to override this via NV30_MAX_MSAA.
+    */
+   screen->max_sample_count = debug_get_num_option("NV30_MAX_MSAA", 0);
+   if (screen->max_sample_count > 4)
+      screen->max_sample_count = 4;
+
   pscreen = &screen->base.base;
   pscreen->destroy = nv30_screen_destroy;
   pscreen->get_param = nv30_screen_get_param;
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.h
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.h
@@ -38,6 +38,8 @@ struct nv30_screen {
   /*XXX: nvfx state */
   struct nouveau_heap *vp_exec_heap;
   struct nouveau_heap *vp_data_heap;
+
+   unsigned max_sample_count;
 };

 static INLINE struct nv30_screen *
--- a/src/gallium/drivers/nouveau/nv30/nv30_transfer.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_transfer.c
@@ -371,7 +371,7 @@ nv30_transfer_rect_blit(XFER_ARGS)
 static boolean
 nv30_transfer_sifm(XFER_ARGS)
 {
-   if (!src->pitch || (src->w | src->h) > 1024 || src->w < 2 || src->h < 2)
+   if (!src->pitch || src->w > 1024 || src->h > 1024 || src->w < 2 || src->h < 2)
      return FALSE;

   if (src->d > 1 || dst->d > 1)
@@ -381,7 +381,7 @@ nv30_transfer_sifm(XFER_ARGS)
      return FALSE;

   if (!dst->pitch) {
-      if ((dst->w | dst->h) > 2048 || dst->w < 2 || dst->h < 2)
+      if (dst->w > 2048 || dst->h > 2048 || dst->w < 2 || dst->h < 2)
         return FALSE;
   } else {
      if (dst->domain != NOUVEAU_BO_VRAM)
--- a/src/gallium/drivers/nouveau/nv50/nv50_formats.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_formats.c
@@ -203,8 +203,10 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
   F3B(B5G6R5_UNORM, B5G6R5_UNORM, C2, C1, C0, xx, UNORM, 5_6_5, TD),
   C4B(B5G5R5A1_UNORM, BGR5_A1_UNORM, C2, C1, C0, C3, UNORM, 5_5_5_1, TD),
   F3B(B5G5R5X1_UNORM, BGR5_X1_UNORM, C2, C1, C0, xx, UNORM, 5_5_5_1, TD),
+#if NOUVEAU_DRIVER != 0xc0
   C4B(B4G4R4A4_UNORM, NONE, C2, C1, C0, C3, UNORM, 4_4_4_4, T),
   F3B(B4G4R4X4_UNORM, NONE, C2, C1, C0, xx, UNORM, 4_4_4_4, T),
+#endif
   F3B(R9G9B9E5_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 9_9_9_E5, T),

   C4A(R10G10B10A2_UNORM, RGB10_A2_UNORM, C0, C1, C2, C3, UNORM, 10_10_10_2,
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -100,7 +100,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
   case PIPE_CAP_MAX_TEXEL_OFFSET:
      return 7;
   case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
-      return 65536;
+      return 128 * 1024 * 1024;
   case PIPE_CAP_GLSL_FEATURE_LEVEL:
      return 330;
   case PIPE_CAP_MAX_RENDER_TARGETS:
--- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -68,6 +68,10 @@ nv50_2d_format(enum pipe_format format, boolean dst, boolean dst_src_equal)
      return NV50_SURFACE_FORMAT_R16_UNORM;
   case 4:
      return NV50_SURFACE_FORMAT_BGRA8_UNORM;
+   case 8:
+      return NV50_SURFACE_FORMAT_RGBA16_FLOAT;
+   case 16:
+      return NV50_SURFACE_FORMAT_RGBA32_FLOAT;
   default:
      return 0;
   }
@@ -1003,6 +1007,8 @@ nv50_blitctx_prepare_state(struct nv50_blitctx *blit)
   /* zsa state */
   BEGIN_NV04(push, NV50_3D(DEPTH_TEST_ENABLE), 1);
   PUSH_DATA (push, 0);
+   BEGIN_NV04(push, NV50_3D(DEPTH_BOUNDS_EN), 1);
+   PUSH_DATA (push, 0);
   BEGIN_NV04(push, NV50_3D(STENCIL_ENABLE), 1);
   PUSH_DATA (push, 0);
   BEGIN_NV04(push, NV50_3D(ALPHA_TEST_ENABLE), 1);
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -459,7 +459,7 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info)

   for (i = 0; i < info->numOutputs; ++i) {
      if (info->out[i].sn == TGSI_SEMANTIC_COLOR)
-         fp->hdr[18] |= info->out[i].mask << info->out[i].slot[0];
+         fp->hdr[18] |= 0xf << info->out[i].slot[0];
   }

   fp->fp.early_z = info->prop.fp.earlyFragTests;
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -87,7 +87,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
   case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
      return 31;
   case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
-      return 65536;
+      return 128 * 1024 * 1024;
   case PIPE_CAP_GLSL_FEATURE_LEVEL:
      return 410;
   case PIPE_CAP_MAX_RENDER_TARGETS:
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -887,6 +887,7 @@ nvc0_blitctx_prepare_state(struct nvc0_blitctx *blit)

   /* zsa state */
   IMMED_NVC0(push, NVC0_3D(DEPTH_TEST_ENABLE), 0);
+   IMMED_NVC0(push, NVC0_3D(DEPTH_BOUNDS_EN), 0);
   IMMED_NVC0(push, NVC0_3D(STENCIL_ENABLE), 0);
   IMMED_NVC0(push, NVC0_3D(ALPHA_TEST_ENABLE), 0);

--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -2029,6 +2029,8 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
 					fprintf(stderr, "CND:%X ", cf->cond);
 				if (cf->pop_count)
 					fprintf(stderr, "POP:%X ", cf->pop_count);
+				if (cf->end_of_program)
+					fprintf(stderr, "EOP ");
 				fprintf(stderr, "\n");
 			}
 		}
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -6143,10 +6143,10 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx)
 		r = tgsi_make_src_for_op3(ctx, temp_regs[0], i, &alu.src[0], &ctx->src[0]);
 		if (r)
 			return r;
-		r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[1], &ctx->src[2]);
+		r = tgsi_make_src_for_op3(ctx, temp_regs[2], i, &alu.src[1], &ctx->src[2]);
 		if (r)
 			return r;
-		r = tgsi_make_src_for_op3(ctx, temp_regs[2], i, &alu.src[2], &ctx->src[1]);
+		r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[2], &ctx->src[1]);
 		if (r)
 			return r;
 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -2051,7 +2051,7 @@ bool r600_adjust_gprs(struct r600_context *rctx)
 			/* always privilege vs stage so that at worst we have the
 			 * pixel stage producing wrong output (not the vertex
 			 * stage) */
-			new_num_ps_gprs = max_gprs - ((new_num_vs_gprs - new_num_es_gprs - new_num_gs_gprs) + def_num_clause_temp_gprs * 2);
+			new_num_ps_gprs = max_gprs - ((new_num_vs_gprs + new_num_es_gprs + new_num_gs_gprs) + def_num_clause_temp_gprs * 2);
 			new_num_vs_gprs = num_vs_gprs;
 			new_num_gs_gprs = num_gs_gprs;
 			new_num_es_gprs = num_es_gprs;
--- a/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp
@@ -32,6 +32,7 @@ int bc_decoder::decode_cf(unsigned &i, bc_cf& bc) {
 	int r = 0;
 	uint32_t dw0 = dw[i];
 	uint32_t dw1 = dw[i+1];
+	assert(i+1 <= ndw);

 	if ((dw1 >> 29) & 1) { // CF_ALU
 		return decode_cf_alu(i, bc);
--- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
@@ -199,6 +199,9 @@ void bc_finalizer::finalize_if(region_node* r) {
 		cf_node *if_jump = sh.create_cf(CF_OP_JUMP);
 		cf_node *if_pop = sh.create_cf(CF_OP_POP);

+		if (!last_cf || last_cf->get_parent_region() == r) {
+			last_cf = if_pop;
+		}
 		if_pop->bc.pop_count = 1;
 		if_pop->jump_after(if_pop);

--- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
@@ -95,7 +95,7 @@ int bc_parser::decode_shader() {
 		if ((r = decode_cf(i, eop)))
 			return r;

-	} while (!eop || (i >> 1) <= max_cf);
+	} while (!eop || (i >> 1) < max_cf);

 	return 0;
 }
@@ -769,6 +769,7 @@ int bc_parser::prepare_ir() {
 }

 int bc_parser::prepare_loop(cf_node* c) {
+	assert(c->bc.addr-1 < cf_map.size());

 	cf_node *end = cf_map[c->bc.addr - 1];
 	assert(end->bc.op == CF_OP_LOOP_END);
@@ -788,8 +789,12 @@ int bc_parser::prepare_loop(cf_node* c) {
 }

 int bc_parser::prepare_if(cf_node* c) {
+	assert(c->bc.addr-1 < cf_map.size());
 	cf_node *c_else = NULL, *end = cf_map[c->bc.addr];

+	if (!end)
+		return 0; // not quite sure how this happens, malformed input?
+
 	BCP_DUMP(
 		sblog << "parsing JUMP @" << c->bc.id;
 		sblog << "\n";
@@ -815,7 +820,7 @@ int bc_parser::prepare_if(cf_node* c) {
 	if (c_else->parent != c->parent)
 		c_else = NULL;

-	if (end->parent != c->parent)
+	if (end && end->parent != c->parent)
 		end = NULL;

 	region_node *reg = sh->create_region();
--- a/src/gallium/drivers/r600/sb/sb_sched.cpp
+++ b/src/gallium/drivers/r600/sb/sb_sched.cpp
@@ -236,7 +236,7 @@ void rp_gpr_tracker::unreserve(alu_node* n) {

 	for (i = 0; i < nsrc; ++i) {
 		value *v = n->src[i];
-		if (v->is_readonly())
+		if (v->is_readonly() || v->is_undef())
 			continue;
 		if (i == 1 && opt)
 			continue;
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -184,7 +184,7 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (va >> 32UL) & 0xFF);
+		radeon_emit(cs, (va >> 32) & 0xFFFF);
 		break;
 	case PIPE_QUERY_PRIMITIVES_EMITTED:
 	case PIPE_QUERY_PRIMITIVES_GENERATED:
@@ -193,13 +193,13 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (va >> 32UL) & 0xFF);
+		radeon_emit(cs, (va >> 32) & 0xFFFF);
 		break;
 	case PIPE_QUERY_TIME_ELAPSED:
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (3 << 29) | ((va >> 32UL) & 0xFF));
+		radeon_emit(cs, (3 << 29) | ((va >> 32) & 0xFFFF));
 		radeon_emit(cs, 0);
 		radeon_emit(cs, 0);
 		break;
@@ -207,7 +207,7 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (va >> 32UL) & 0xFF);
+		radeon_emit(cs, (va >> 32) & 0xFFFF);
 		break;
 	default:
 		assert(0);
@@ -240,7 +240,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (va >> 32UL) & 0xFF);
+		radeon_emit(cs, (va >> 32) & 0xFFFF);
 		break;
 	case PIPE_QUERY_PRIMITIVES_EMITTED:
 	case PIPE_QUERY_PRIMITIVES_GENERATED:
@@ -250,7 +250,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (va >> 32UL) & 0xFF);
+		radeon_emit(cs, (va >> 32) & 0xFFFF);
 		break;
 	case PIPE_QUERY_TIME_ELAPSED:
 		va += query->buffer.results_end + query->result_size/2;
@@ -259,7 +259,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (3 << 29) | ((va >> 32UL) & 0xFF));
+		radeon_emit(cs, (3 << 29) | ((va >> 32) & 0xFFFF));
 		radeon_emit(cs, 0);
 		radeon_emit(cs, 0);
 		break;
@@ -268,7 +268,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (va >> 32UL) & 0xFF);
+		radeon_emit(cs, (va >> 32) & 0xFFFF);
 		break;
 	default:
 		assert(0);
--- a/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
+++ b/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
@@ -251,6 +251,9 @@ static void vui(struct rvce_encoder *enc)
 {
 	int i;

+	if (!enc->pic.rate_ctrl.frame_rate_num)
+		return;
+
 	RVCE_BEGIN(0x04000009); // vui
 	RVCE_CS(0x00000000); //aspectRatioInfoPresentFlag
 	RVCE_CS(0x00000000); //aspectRatioInfo.aspectRatioIdc
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -116,6 +116,7 @@ struct si_shader_selector {
 	unsigned	gs_output_prim;
 	unsigned	gs_max_out_vertices;
 	uint64_t	gs_used_inputs; /* mask of "get_unique_index" bits */
+	uint32_t	ps_colors_written;
 };

 union si_shader_key {
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -29,6 +29,7 @@
 #include "sid.h"
 #include "radeon/r600_cs.h"

+#include "util/u_dual_blend.h"
 #include "util/u_format.h"
 #include "util/u_format_s3tc.h"
 #include "util/u_memory.h"
@@ -218,8 +219,10 @@ static unsigned si_pack_float_12p4(float x)
 * - The COLOR1 format isn't INVALID because of possible dual-source blending,
 *   so COLOR1 is enabled pretty much all the time.
 * So CB_TARGET_MASK is the only register that can disable COLOR1.
+ *
+ * Another reason is to avoid a hang with dual source blending.
 */
-static void si_update_fb_blend_state(struct si_context *sctx)
+void si_update_fb_blend_state(struct si_context *sctx)
 {
 	struct si_pm4_state *pm4;
 	struct si_state_blend *blend = sctx->queued.named.blend;
@@ -237,6 +240,16 @@ static void si_update_fb_blend_state(struct si_context *sctx)
 			mask |= 0xf << (4*i);
 	mask &= blend->cb_target_mask;

+	/* Avoid a hang that happens when dual source blending is enabled
+	 * but there is not enough color outputs. This is undefined behavior,
+	 * so disable color writes completely.
+	 *
+	 * Reproducible with Unigine Heaven 4.0 and drirc missing.
+	 */
+	if (blend->dual_src_blend &&
+	    (sctx->ps_shader->ps_colors_written & 0x3) != 0x3)
+		mask = 0;
+
 	si_pm4_set_reg(pm4, R_028238_CB_TARGET_MASK, mask);
 	si_pm4_set_state(sctx, fb_blend, pm4);
 }
@@ -328,6 +341,7 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
 		return NULL;

 	blend->alpha_to_one = state->alpha_to_one;
+	blend->dual_src_blend = util_blend_state_is_dual(state, 0);

 	if (state->logicop_enable) {
 		color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4));
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -39,6 +39,7 @@ struct si_state_blend {
 	struct si_pm4_state	pm4;
 	uint32_t		cb_target_mask;
 	bool			alpha_to_one;
+	bool			dual_src_blend;
 };

 struct si_state_sample_mask {
@@ -247,6 +248,7 @@ void si_shader_change_notify(struct si_context *sctx);
 /* si_state.c */
 struct si_shader_selector;

+void si_update_fb_blend_state(struct si_context *sctx);
 boolean si_is_format_supported(struct pipe_screen *screen,
                               enum pipe_format format,
                               enum pipe_texture_target target,
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -493,6 +493,16 @@ static void *si_create_shader_state(struct pipe_context *ctx,
 					1llu << si_shader_io_get_unique_index(name, index);
 			}
 		}
+		break;
+	case PIPE_SHADER_FRAGMENT:
+		for (i = 0; i < sel->info.num_outputs; i++) {
+			unsigned name = sel->info.output_semantic_name[i];
+			unsigned index = sel->info.output_semantic_index[i];
+
+			if (name == TGSI_SEMANTIC_COLOR)
+				sel->ps_colors_written |= 1 << index;
+		}
+		break;
 	}

 	if (sscreen->b.debug_flags & DBG_PRECOMPILE)
@@ -574,6 +584,7 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
 	}

 	sctx->ps_shader = sel;
+	si_update_fb_blend_state(sctx);
 }

 static void si_delete_shader_selector(struct pipe_context *ctx,
--- a/src/gbm/backends/dri/gbm_dri.c
+++ b/src/gbm/backends/dri/gbm_dri.c
@@ -706,14 +706,30 @@ gbm_dri_bo_import(struct gbm_device *gbm,
   {
      struct gbm_import_fd_data *fd_data = buffer;
      int stride = fd_data->stride, offset = 0;
+      int dri_format;
+
+      switch (fd_data->format) {
+      case GBM_BO_FORMAT_XRGB8888:
+         dri_format = GBM_FORMAT_XRGB8888;
+         break;
+      case GBM_BO_FORMAT_ARGB8888:
+         dri_format = GBM_FORMAT_ARGB8888;
+         break;
+      default:
+         dri_format = fd_data->format;
+      }

      image = dri->image->createImageFromFds(dri->screen,
                                             fd_data->width,
                                             fd_data->height,
-                                             fd_data->format,
+                                             dri_format,
                                             &fd_data->fd, 1,
                                             &stride, &offset,
                                             NULL);
+      if (image == NULL) {
+         errno = EINVAL;
+         return NULL;
+      }
      gbm_format = fd_data->format;
      break;
   }
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -1960,6 +1960,7 @@ assign_attribute_or_color_locations(gl_shader_program *prog,
    */
   unsigned used_locations = (max_index >= 32)
      ? ~0 : ~((1 << max_index) - 1);
+   unsigned double_storage_locations = 0;

   assert((target_index == MESA_SHADER_VERTEX)
 	  || (target_index == MESA_SHADER_FRAGMENT));
@@ -2054,34 +2055,6 @@ assign_attribute_or_color_locations(gl_shader_program *prog,

      const unsigned slots = var->type->count_attribute_slots();

-      /* From GL4.5 core spec, section 11.1.1 (Vertex Attributes):
-       *
-       * "A program with more than the value of MAX_VERTEX_ATTRIBS active
-       * attribute variables may fail to link, unless device-dependent
-       * optimizations are able to make the program fit within available
-       * hardware resources. For the purposes of this test, attribute variables
-       * of the type dvec3, dvec4, dmat2x3, dmat2x4, dmat3, dmat3x4, dmat4x3,
-       * and dmat4 may count as consuming twice as many attributes as equivalent
-       * single-precision types. While these types use the same number of
-       * generic attributes as their single-precision equivalents,
-       * implementations are permitted to consume two single-precision vectors
-       * of internal storage for each three- or four-component double-precision
-       * vector."
-       * Until someone has a good reason in Mesa, enforce that now.
-       */
-      if (target_index == MESA_SHADER_VERTEX) {
-	 total_attribs_size += slots;
-	 if (var->type->without_array() == glsl_type::dvec3_type ||
-	     var->type->without_array() == glsl_type::dvec4_type ||
-	     var->type->without_array() == glsl_type::dmat2x3_type ||
-	     var->type->without_array() == glsl_type::dmat2x4_type ||
-	     var->type->without_array() == glsl_type::dmat3_type ||
-	     var->type->without_array() == glsl_type::dmat3x4_type ||
-	     var->type->without_array() == glsl_type::dmat4x3_type ||
-	     var->type->without_array() == glsl_type::dmat4_type)
-	    total_attribs_size += slots;
-      }
-
      /* If the variable is not a built-in and has a location statically
       * assigned in the shader (presumably via a layout qualifier), make sure
       * that it doesn't collide with other assigned locations.  Otherwise,
@@ -2196,6 +2169,38 @@ assign_attribute_or_color_locations(gl_shader_program *prog,
 	    }

 	    used_locations |= (use_mask << attr);
+
+            /* From the GL 4.5 core spec, section 11.1.1 (Vertex Attributes):
+             *
+             * "A program with more than the value of MAX_VERTEX_ATTRIBS
+             *  active attribute variables may fail to link, unless
+             *  device-dependent optimizations are able to make the program
+             *  fit within available hardware resources. For the purposes
+             *  of this test, attribute variables of the type dvec3, dvec4,
+             *  dmat2x3, dmat2x4, dmat3, dmat3x4, dmat4x3, and dmat4 may
+             *  count as consuming twice as many attributes as equivalent
+             *  single-precision types. While these types use the same number
+             *  of generic attributes as their single-precision equivalents,
+             *  implementations are permitted to consume two single-precision
+             *  vectors of internal storage for each three- or four-component
+             *  double-precision vector."
+             *
+             * Mark this attribute slot as taking up twice as much space
+             * so we can count it properly against limits.  According to
+             * issue (3) of the GL_ARB_vertex_attrib_64bit behavior, this
+             * is optional behavior, but it seems preferable.
+             */
+            const glsl_type *type = var->type->without_array();
+            if (type == glsl_type::dvec3_type ||
+                type == glsl_type::dvec4_type ||
+                type == glsl_type::dmat2x3_type ||
+                type == glsl_type::dmat2x4_type ||
+                type == glsl_type::dmat3_type ||
+                type == glsl_type::dmat3x4_type ||
+                type == glsl_type::dmat4x3_type ||
+                type == glsl_type::dmat4_type) {
+               double_storage_locations |= (use_mask << attr);
+            }
 	 }

 	 continue;
@@ -2207,6 +2212,9 @@ assign_attribute_or_color_locations(gl_shader_program *prog,
   }

   if (target_index == MESA_SHADER_VERTEX) {
+      unsigned total_attribs_size =
+         _mesa_bitcount(used_locations & ((1 << max_index) - 1)) +
+         _mesa_bitcount(double_storage_locations);
      if (total_attribs_size > max_index) {
 	 linker_error(prog,
 		      "attempt to use %d vertex attribute slots only %d available ",
@@ -2617,9 +2625,17 @@ build_stageref(struct gl_shader_program *shProg, const char *name)
      struct gl_shader *sh = shProg->_LinkedShaders[i];
      if (!sh)
         continue;
-      ir_variable *var = sh->symbols->get_variable(name);
-      if (var)
-         stages |= (1 << i);
+
+      /* Shader symbol table may contain variables that have
+       * been optimized away. Search IR for the variable instead.
+       */
+      foreach_in_list(ir_instruction, node, sh->ir) {
+         ir_variable *var = node->as_variable();
+         if (var && strcmp(var->name, name) == 0) {
+            stages |= (1 << i);
+            break;
+         }
+      }
   }
   return stages;
 }
@@ -2674,7 +2690,7 @@ add_interface_variables(struct gl_shader_program *shProg,
 * Builds up a list of program resources that point to existing
 * resource data.
 */
-static void
+void
 build_program_resource_list(struct gl_context *ctx,
                            struct gl_shader_program *shProg)
 {
@@ -3227,10 +3243,6 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
      }
   }

-   build_program_resource_list(ctx, prog);
-   if (!prog->LinkStatus)
-      goto done;
-
   /* FINISHME: Assign fragment shader output locations. */

 done:
--- a/src/glsl/program.h
+++ b/src/glsl/program.h
@@ -39,6 +39,10 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader,
 extern void
 link_shaders(struct gl_context *ctx, struct gl_shader_program *prog);

+extern void
+build_program_resource_list(struct gl_context *ctx,
+                            struct gl_shader_program *shProg);
+
 extern void
 linker_error(struct gl_shader_program *prog, const char *fmt, ...)
   PRINTFLIKE(2, 3);
--- a/src/glx/Makefile.am
+++ b/src/glx/Makefile.am
@@ -46,6 +46,7 @@ AM_CFLAGS = \
 	$(EXTRA_DEFINES_XF86VIDMODE) \
 	-D_REENTRANT \
 	-DDEFAULT_DRIVER_DIR=\"$(DRI_DRIVER_SEARCH_DIR)\" \
+	-DGL_LIB_NAME=\"lib@GL_LIB@.so.1\" \
 	$(DEFINES) \
 	$(LIBDRM_CFLAGS) \
 	$(DRI2PROTO_CFLAGS) \
--- a/src/glx/dri_common.c
+++ b/src/glx/dri_common.c
@@ -73,6 +73,10 @@ dri_message(int level, const char *f, ...)
   }
 }

+#ifndef GL_LIB_NAME
+#define GL_LIB_NAME "libGL.so.1"
+#endif
+
 #ifndef DEFAULT_DRIVER_DIR
 /* this is normally defined in Mesa/configs/default with DRI_DRIVER_SEARCH_PATH */
 #define DEFAULT_DRIVER_DIR "/usr/local/lib/dri"
@@ -99,7 +103,7 @@ driOpenDriver(const char *driverName)
   int len;

   /* Attempt to make sure libGL symbols will be visible to the driver */
-   glhandle = dlopen("libGL.so.1", RTLD_NOW | RTLD_GLOBAL);
+   glhandle = dlopen(GL_LIB_NAME, RTLD_NOW | RTLD_GLOBAL);

   libPaths = NULL;
   if (geteuid() == getuid()) {
--- a/src/glx/glxcmds.c
+++ b/src/glx/glxcmds.c
@@ -2646,7 +2646,11 @@ _X_EXPORT void (*glXGetProcAddressARB(const GLubyte * procName)) (void)
 */
 _X_EXPORT void (*glXGetProcAddress(const GLubyte * procName)) (void)
 #if defined(__GNUC__) && !defined(GLX_ALIAS_UNSUPPORTED)
+# if defined(USE_MGL_NAMESPACE)
+   __attribute__ ((alias("mglXGetProcAddressARB")));
+# else
   __attribute__ ((alias("glXGetProcAddressARB")));
+# endif
 #else
 {
   return glXGetProcAddressARB(procName);
--- a/src/glx/glxextensions.h
+++ b/src/glx/glxextensions.h
@@ -280,11 +280,17 @@ typedef void (*PFNGLXDISABLEEXTENSIONPROC) (const char *name);
 # define GLX_ALIAS_VOID(real_func, proto_args, args, aliased_func)
 #else
 # if defined(__GNUC__) && !defined(GLX_ALIAS_UNSUPPORTED)
-#  define GLX_ALIAS(return_type, real_func, proto_args, args, aliased_func) \
+/* GLX_ALIAS and GLX_ALIAS_VOID both expand to the macro GLX_ALIAS2. Using the
+ * extra expansion means that the name mangling macros in glx_mangle.h will
+ * apply before stringification, so the alias attribute will have a string like
+ * "mglXFoo" instead of "glXFoo". */
+#  define GLX_ALIAS2(return_type, real_func, proto_args, args, aliased_func) \
   return_type  real_func  proto_args                                   \
   __attribute__ ((alias( # aliased_func ) ));
+#  define GLX_ALIAS(return_type, real_func, proto_args, args, aliased_func) \
+   GLX_ALIAS2(return_type, real_func, proto_args, args, aliased_func)
 #  define GLX_ALIAS_VOID(real_func, proto_args, args, aliased_func) \
-   GLX_ALIAS(void, real_func, proto_args, args, aliased_func)
+   GLX_ALIAS2(void, real_func, proto_args, args, aliased_func)
 # else
 #  define GLX_ALIAS(return_type, real_func, proto_args, args, aliased_func) \
   return_type  real_func  proto_args                                   \
--- a/src/mapi/mapi_glapi.c
+++ b/src/mapi/mapi_glapi.c
@@ -175,7 +175,7 @@ _glapi_get_stub(const char *name, int generate)
   const struct mapi_stub *stub;

 #ifdef USE_MGL_NAMESPACE
-   if (name)
+   if (name && name[0] == 'm')
      name++;
 #endif

--- a/src/mesa/drivers/dri/i915/intel_render.c
+++ b/src/mesa/drivers/dri/i915/intel_render.c
@@ -251,7 +251,7 @@ intel_run_render(struct gl_context * ctx, struct tnl_pipeline_stage *stage)
         continue;

      intel_render_tab_verts[prim & PRIM_MODE_MASK] (ctx, start,
-                                                     start + length, prim);
+                                                     length, prim);
   }

   tnl->Driver.Render.Finish(ctx);
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -428,11 +428,7 @@ brw_initialize_context_constants(struct brw_context *brw)

   ctx->Const.MinLineWidth = 1.0;
   ctx->Const.MinLineWidthAA = 1.0;
-   if (brw->gen >= 9 || brw->is_cherryview) {
-      ctx->Const.MaxLineWidth = 40.0;
-      ctx->Const.MaxLineWidthAA = 40.0;
-      ctx->Const.LineWidthGranularity = 0.125;
-   } else if (brw->gen >= 6) {
+   if (brw->gen >= 6) {
      ctx->Const.MaxLineWidth = 7.375;
      ctx->Const.MaxLineWidthAA = 7.375;
      ctx->Const.LineWidthGranularity = 0.125;
@@ -1417,7 +1413,6 @@ intel_process_dri2_buffer(struct brw_context *brw,
              buffer->cpp, buffer->pitch);
   }

-   intel_miptree_release(&rb->mt);
   bo = drm_intel_bo_gem_create_from_name(brw->bufmgr, buffer_name,
                                          buffer->name);
   if (!bo) {
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -787,21 +787,6 @@ static void brw_emit_vertices(struct brw_context *brw)
                    ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
   }

-   if (brw->gen >= 6 && gen6_edgeflag_input) {
-      uint32_t format =
-         brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
-
-      OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) |
-                GEN6_VE0_VALID |
-                GEN6_VE0_EDGE_FLAG_ENABLE |
-                (format << BRW_VE0_FORMAT_SHIFT) |
-                (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
-      OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
-                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
-                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
-                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
-   }
-
   if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) {
      uint32_t dw0 = 0, dw1 = 0;
      uint32_t comp0 = BRW_VE1_COMPONENT_STORE_0;
@@ -842,6 +827,21 @@ static void brw_emit_vertices(struct brw_context *brw)
      OUT_BATCH(dw1);
   }

+   if (brw->gen >= 6 && gen6_edgeflag_input) {
+      uint32_t format =
+         brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
+
+      OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) |
+                GEN6_VE0_VALID |
+                GEN6_VE0_EDGE_FLAG_ENABLE |
+                (format << BRW_VE0_FORMAT_SHIFT) |
+                (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
+      OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
+                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
+                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
+                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
+   }
+
   ADVANCE_BATCH();
 }

--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1942,7 +1942,10 @@ fs_visitor::assign_vs_urb_setup()

            inst->src[i].file = HW_REG;
            inst->src[i].fixed_hw_reg =
-               retype(brw_vec8_grf(grf, 0), inst->src[i].type);
+               stride(byte_offset(retype(brw_vec8_grf(grf, 0), inst->src[i].type),
+                                  inst->src[i].subreg_offset),
+                      inst->exec_size * inst->src[i].stride,
+                      inst->exec_size, inst->src[i].stride);
         }
      }
   }
@@ -3607,7 +3610,8 @@ fs_visitor::lower_integer_multiplication()
          * schedule multi-component multiplications much better.
          */

-         if (inst->conditional_mod && inst->dst.is_null()) {
+         fs_reg orig_dst = inst->dst;
+         if (orig_dst.is_null() || orig_dst.file == MRF) {
            inst->dst = fs_reg(GRF, alloc.allocate(dispatch_width / 8),
                               inst->dst.type, dispatch_width);
         }
@@ -3673,9 +3677,8 @@ fs_visitor::lower_integer_multiplication()

         insert(ADD(dst, low, high));

-         if (inst->conditional_mod) {
-            fs_reg null(retype(brw_null_reg(), inst->dst.type));
-            fs_inst *mov = MOV(null, inst->dst);
+         if (inst->conditional_mod || orig_dst.file == MRF) {
+            fs_inst *mov = MOV(orig_dst, inst->dst);
            mov->conditional_mod = inst->conditional_mod;
            insert(mov);
         }
@@ -4098,12 +4101,12 @@ fs_visitor::calculate_register_pressure()
 void
 fs_visitor::optimize()
 {
-   split_virtual_grfs();
-
   move_uniform_array_access_to_pull_constants();
   assign_constant_locations();
   demote_pull_constants();

+   split_virtual_grfs();
+
 #define OPT(pass, args...) ({                                           \
      pass_num++;                                                       \
      bool this_progress = pass(args);                                  \
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -279,6 +279,7 @@ static bool
 can_change_source_types(fs_inst *inst)
 {
   return !inst->src[0].abs && !inst->src[0].negate &&
+          inst->dst.type == inst->src[0].type &&
          (inst->opcode == BRW_OPCODE_MOV ||
           (inst->opcode == BRW_OPCODE_SEL &&
            inst->predicate != BRW_PREDICATE_NONE &&
--- a/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c
@@ -414,6 +414,12 @@ brw_meta_stencil_blit(struct brw_context *brw,
   GLenum target;

   _mesa_meta_fb_tex_blit_begin(ctx, &blit);
+   /* XXX: Pretend to support stencil textures so _mesa_base_tex_format()
+    * returns a valid format.  When we properly support the extension, we
+    * should remove this.
+    */
+   assert(ctx->Extensions.ARB_texture_stencil8 == false);
+   ctx->Extensions.ARB_texture_stencil8 = true;

   _mesa_GenFramebuffers(1, &fbo);
   /* Force the surface to be configured for level zero. */
@@ -451,6 +457,7 @@ brw_meta_stencil_blit(struct brw_context *brw,
   _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);

 error:
+   ctx->Extensions.ARB_texture_stencil8 = false;
   _mesa_meta_fb_tex_blit_end(ctx, target, &blit);
   _mesa_meta_end(ctx);

--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -879,7 +879,8 @@ brw_upload_invariant_state(struct brw_context *brw)
 {
   const bool is_965 = brw->gen == 4 && !brw->is_g4x;

-   brw_select_pipeline(brw, BRW_RENDER_PIPELINE);
+   brw_emit_select_pipeline(brw, BRW_RENDER_PIPELINE);
+   brw->last_pipeline = BRW_RENDER_PIPELINE;

   if (brw->gen < 6) {
      /* Disable depth offset clamping. */
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -60,6 +60,8 @@ src_reg::src_reg(register_file file, int reg, const glsl_type *type)
      this->swizzle = brw_swizzle_for_size(type->vector_elements);
   else
      this->swizzle = BRW_SWIZZLE_XYZW;
+   if (type)
+      this->type = brw_type_for_base_type(type);
 }

 /** Generic unset register constructor. */
@@ -938,6 +940,14 @@ vec4_instruction::can_reswizzle(int dst_writemask,
   if (mlen > 0)
      return false;

+   /* We can't use swizzles on the accumulator and that's really the only
+    * HW_REG we would care to reswizzle so just disallow them all.
+    */
+   for (int i = 0; i < 3; i++) {
+      if (src[i].file == HW_REG)
+         return false;
+   }
+
   return true;
 }

@@ -1041,6 +1051,17 @@ vec4_visitor::opt_register_coalesce()
               }
            }

+            /* This doesn't handle saturation on the instruction we
+             * want to coalesce away if the register types do not match.
+             * But if scan_inst is a non type-converting 'mov', we can fix
+             * the types later.
+             */
+            if (inst->saturate &&
+                inst->dst.type != scan_inst->dst.type &&
+                !(scan_inst->opcode == BRW_OPCODE_MOV &&
+                  scan_inst->dst.type == scan_inst->src[0].type))
+               break;
+
            /* If we can't handle the swizzle, bail. */
            if (!scan_inst->can_reswizzle(inst->dst.writemask,
                                          inst->src[0].swizzle,
@@ -1116,6 +1137,16 @@ vec4_visitor::opt_register_coalesce()
 	       scan_inst->dst.file = inst->dst.file;
 	       scan_inst->dst.reg = inst->dst.reg;
 	       scan_inst->dst.reg_offset = inst->dst.reg_offset;
+               if (inst->saturate &&
+                   inst->dst.type != scan_inst->dst.type) {
+                  /* If we have reached this point, scan_inst is a non
+                   * type-converting 'mov' and we can modify its register types
+                   * to match the ones in inst. Otherwise, we could have an
+                   * incorrect saturation result.
+                   */
+                  scan_inst->dst.type = inst->dst.type;
+                  scan_inst->src[0].type = inst->src[0].type;
+               }
 	       scan_inst->saturate |= inst->saturate;
 	    }
 	    scan_inst = (vec4_instruction *)scan_inst->next;
--- a/src/mesa/drivers/dri/i965/gen8_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/gen8_draw_upload.c
@@ -40,16 +40,25 @@ gen8_emit_vertices(struct brw_context *brw)
 {
   struct gl_context *ctx = &brw->ctx;
   uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
+   bool uses_edge_flag;

   brw_prepare_vertices(brw);
   brw_prepare_shader_draw_parameters(brw);

+   uses_edge_flag = (ctx->Polygon.FrontMode != GL_FILL ||
+                     ctx->Polygon.BackMode != GL_FILL);
+
   if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) {
      unsigned vue = brw->vb.nr_enabled;

-      WARN_ONCE(brw->vs.prog_data->inputs_read & VERT_BIT_EDGEFLAG,
-                "Using VID/IID with edgeflags, need to reorder the "
-                "vertex attributes");
+      /* The element for the edge flags must always be last, so we have to
+       * insert the SGVS before it in that case.
+       */
+      if (uses_edge_flag) {
+         assert(vue > 0);
+         vue--;
+      }
+
      WARN_ONCE(vue >= 33,
                "Trying to insert VID/IID past 33rd vertex element, "
                "need to reorder the vertex attrbutes.");
@@ -138,7 +147,18 @@ gen8_emit_vertices(struct brw_context *brw)
      ADVANCE_BATCH();
   }

-   unsigned nr_elements = brw->vb.nr_enabled + brw->vs.prog_data->uses_vertexid;
+   /* Normally we don't need an element for the SGVS attribute because the
+    * 3DSTATE_VF_SGVS instruction lets you store the generated attribute in an
+    * element that is past the list in 3DSTATE_VERTEX_ELEMENTS. However if the
+    * vertex ID is used then it needs an element for the base vertex buffer.
+    * Additionally if there is an edge flag element then the SGVS can't be
+    * inserted past that so we need a dummy element to ensure that the edge
+    * flag is the last one.
+    */
+   bool needs_sgvs_element = (brw->vs.prog_data->uses_vertexid ||
+                              (brw->vs.prog_data->uses_instanceid &&
+                               uses_edge_flag));
+   unsigned nr_elements = brw->vb.nr_enabled + needs_sgvs_element;

   /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS,
    * presumably for VertexID/InstanceID.
@@ -192,6 +212,24 @@ gen8_emit_vertices(struct brw_context *brw)
                (comp3 << BRW_VE1_COMPONENT_3_SHIFT));
   }

+   if (needs_sgvs_element) {
+      if (brw->vs.prog_data->uses_vertexid) {
+         OUT_BATCH(GEN6_VE0_VALID |
+                   brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT |
+                   BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT);
+         OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
+                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
+                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
+                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
+      } else {
+         OUT_BATCH(GEN6_VE0_VALID);
+         OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
+                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
+                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
+                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
+      }
+   }
+
   if (gen6_edgeflag_input) {
      uint32_t format =
         brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
@@ -206,25 +244,26 @@ gen8_emit_vertices(struct brw_context *brw)
                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
   }
-
-   if (brw->vs.prog_data->uses_vertexid) {
-      OUT_BATCH(GEN6_VE0_VALID |
-                brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT |
-                BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT);
-      OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
-                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
-                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
-                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
-   }
   ADVANCE_BATCH();

-   for (unsigned i = 0; i < brw->vb.nr_enabled; i++) {
+   for (unsigned i = 0, j = 0; i < brw->vb.nr_enabled; i++) {
      const struct brw_vertex_element *input = brw->vb.enabled[i];
      const struct brw_vertex_buffer *buffer = &brw->vb.buffers[input->buffer];
+      unsigned element_index;
+
+      /* The edge flag element is reordered to be the last one in the code
+       * above so we need to compensate for that in the element indices used
+       * below.
+       */
+      if (input == gen6_edgeflag_input)
+         element_index = nr_elements - 1;
+      else
+         element_index = j++;

      BEGIN_BATCH(3);
      OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2));
-      OUT_BATCH(i | (buffer->step_rate ? GEN8_VF_INSTANCING_ENABLE : 0));
+      OUT_BATCH(element_index |
+                (buffer->step_rate ? GEN8_VF_INSTANCING_ENABLE : 0));
      OUT_BATCH(buffer->step_rate);
      ADVANCE_BATCH();
   }
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -569,43 +569,41 @@ intel_emit_linear_blit(struct brw_context *brw,
   int16_t src_x, dst_x;
   bool ok;

-   /* The pitch given to the GPU must be DWORD aligned, and
-    * we want width to match pitch. Max width is (1 << 15 - 1),
-    * rounding that down to the nearest DWORD is 1 << 15 - 4
-    */
-   pitch = ROUND_DOWN_TO(MIN2(size, (1 << 15) - 1), 4);
-   height = (pitch == 0) ? 1 : size / pitch;
-   src_x = src_offset % 64;
-   dst_x = dst_offset % 64;
-   ok = intelEmitCopyBlit(brw, 1,
-			  pitch, src_bo, src_offset - src_x, I915_TILING_NONE,
-			  pitch, dst_bo, dst_offset - dst_x, I915_TILING_NONE,
-			  src_x, 0, /* src x/y */
-			  dst_x, 0, /* dst x/y */
-			  pitch, height, /* w, h */
-			  GL_COPY);
-   if (!ok)
-      _mesa_problem(ctx, "Failed to linear blit %dx%d\n", pitch, height);
+   do {
+      /* The pitch given to the GPU must be DWORD aligned, and
+       * we want width to match pitch. Max width is (1 << 15 - 1),
+       * rounding that down to the nearest DWORD is 1 << 15 - 4
+       */
+      pitch = ROUND_DOWN_TO(MIN2(size, (1 << 15) - 64), 4);
+      height = (size < pitch || pitch == 0) ? 1 : size / pitch;

-   src_offset += pitch * height;
-   dst_offset += pitch * height;
-   src_x = src_offset % 64;
-   dst_x = dst_offset % 64;
-   size -= pitch * height;
-   assert (size < (1 << 15));
-   pitch = ALIGN(size, 4);
+      src_x = src_offset % 64;
+      dst_x = dst_offset % 64;
+      pitch = ALIGN(MIN2(size, (1 << 15) - 64), 4);
+      assert(src_x + pitch < 1 << 15);
+      assert(dst_x + pitch < 1 << 15);

-   if (size != 0) {
      ok = intelEmitCopyBlit(brw, 1,
-			     pitch, src_bo, src_offset - src_x, I915_TILING_NONE,
-			     pitch, dst_bo, dst_offset - dst_x, I915_TILING_NONE,
-			     src_x, 0, /* src x/y */
-			     dst_x, 0, /* dst x/y */
-			     size, 1, /* w, h */
-			     GL_COPY);
-      if (!ok)
-         _mesa_problem(ctx, "Failed to linear blit %dx%d\n", size, 1);
-   }
+                             pitch, src_bo, src_offset - src_x, I915_TILING_NONE,
+                             pitch, dst_bo, dst_offset - dst_x, I915_TILING_NONE,
+                             src_x, 0, /* src x/y */
+                             dst_x, 0, /* dst x/y */
+                             MIN2(size, pitch), height, /* w, h */
+                             GL_COPY);
+      if (!ok) {
+         _mesa_problem(ctx, "Failed to linear blit %dx%d\n",
+                       MIN2(size, pitch), height);
+         return;
+      }
+
+      pitch *= height;
+      if (size <= pitch)
+         return;
+
+      src_offset += pitch;
+      dst_offset += pitch;
+      size -= pitch;
+   } while (1);
 }

 /**
--- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
@@ -446,7 +446,7 @@ static GLboolean radeon_run_render( struct gl_context *ctx,
 		 start, start+length);

      if (length)
-	 tab[prim & PRIM_MODE_MASK]( ctx, start, start + length, prim );
+         tab[prim & PRIM_MODE_MASK](ctx, start, length, prim);
   }

   tnl->Driver.Render.Finish( ctx );
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -3422,7 +3422,16 @@ _mesa_get_framebuffer_attachment_parameter(struct gl_context *ctx,

   switch (pname) {
   case GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE_EXT:
-      *params = _mesa_is_winsys_fbo(buffer)
+      /* From the OpenGL spec, 9.2. Binding and Managing Framebuffer Objects:
+       *
+       * "If the value of FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE is NONE, then
+       *  either no framebuffer is bound to target; or the default framebuffer
+       *  is bound, attachment is DEPTH or STENCIL, and the number of depth or
+       *  stencil bits, respectively, is zero."
+       */
+      *params = (_mesa_is_winsys_fbo(buffer) &&
+                 ((attachment != GL_DEPTH && attachment != GL_STENCIL) ||
+                  (att->Type != GL_NONE)))
         ? GL_FRAMEBUFFER_DEFAULT : att->Type;
      return;
   case GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME_EXT:
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -35,6 +35,7 @@
 #include "mtypes.h"
 #include "state.h"
 #include "texcompress.h"
+#include "texstate.h"
 #include "framebuffer.h"
 #include "samplerobj.h"
 #include "stencil.h"
@@ -1709,6 +1710,52 @@ _mesa_GetDoublev(GLenum pname, GLdouble *params)
   }
 }

+/**
+ * Convert a GL texture binding enum such as GL_TEXTURE_BINDING_2D
+ * into the corresponding Mesa texture target index.
+ * \return TEXTURE_x_INDEX or -1 if binding is invalid
+ */
+static int
+tex_binding_to_index(const struct gl_context *ctx, GLenum binding)
+{
+   switch (binding) {
+   case GL_TEXTURE_BINDING_1D:
+      return _mesa_is_desktop_gl(ctx) ? TEXTURE_1D_INDEX : -1;
+   case GL_TEXTURE_BINDING_2D:
+      return TEXTURE_2D_INDEX;
+   case GL_TEXTURE_BINDING_3D:
+      return ctx->API != API_OPENGLES ? TEXTURE_3D_INDEX : -1;
+   case GL_TEXTURE_BINDING_CUBE_MAP:
+      return ctx->Extensions.ARB_texture_cube_map
+         ? TEXTURE_CUBE_INDEX : -1;
+   case GL_TEXTURE_BINDING_RECTANGLE:
+      return _mesa_is_desktop_gl(ctx) && ctx->Extensions.NV_texture_rectangle
+         ? TEXTURE_RECT_INDEX : -1;
+   case GL_TEXTURE_BINDING_1D_ARRAY:
+      return _mesa_is_desktop_gl(ctx) && ctx->Extensions.EXT_texture_array
+         ? TEXTURE_1D_ARRAY_INDEX : -1;
+   case GL_TEXTURE_BINDING_2D_ARRAY:
+      return (_mesa_is_desktop_gl(ctx) && ctx->Extensions.EXT_texture_array)
+         || _mesa_is_gles3(ctx)
+         ? TEXTURE_2D_ARRAY_INDEX : -1;
+   case GL_TEXTURE_BINDING_BUFFER:
+      return ctx->API == API_OPENGL_CORE &&
+             ctx->Extensions.ARB_texture_buffer_object ?
+             TEXTURE_BUFFER_INDEX : -1;
+   case GL_TEXTURE_BINDING_CUBE_MAP_ARRAY:
+      return _mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_cube_map_array
+         ? TEXTURE_CUBE_ARRAY_INDEX : -1;
+   case GL_TEXTURE_BINDING_2D_MULTISAMPLE:
+      return _mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_multisample
+         ? TEXTURE_2D_MULTISAMPLE_INDEX : -1;
+   case GL_TEXTURE_BINDING_2D_MULTISAMPLE_ARRAY:
+      return _mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_multisample
+         ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : -1;
+   default:
+      return -1;
+   }
+}
+
 static enum value_type
 find_value_indexed(const char *func, GLenum pname, GLuint index, union value *v)
 {
@@ -1972,6 +2019,45 @@ find_value_indexed(const char *func, GLenum pname, GLuint index, union value *v)
      v->value_int = ctx->ImageUnits[index].Format;
      return TYPE_INT;

+   /* ARB_direct_state_access */
+   case GL_TEXTURE_BINDING_1D:
+   case GL_TEXTURE_BINDING_1D_ARRAY:
+   case GL_TEXTURE_BINDING_2D:
+   case GL_TEXTURE_BINDING_2D_ARRAY:
+   case GL_TEXTURE_BINDING_2D_MULTISAMPLE:
+   case GL_TEXTURE_BINDING_2D_MULTISAMPLE_ARRAY:
+   case GL_TEXTURE_BINDING_3D:
+   case GL_TEXTURE_BINDING_BUFFER:
+   case GL_TEXTURE_BINDING_CUBE_MAP:
+   case GL_TEXTURE_BINDING_CUBE_MAP_ARRAY:
+   case GL_TEXTURE_BINDING_RECTANGLE: {
+      int target;
+
+      if (ctx->API != API_OPENGL_CORE)
+         goto invalid_enum;
+      target = tex_binding_to_index(ctx, pname);
+      if (target < 0)
+         goto invalid_enum;
+      if (index >= _mesa_max_tex_unit(ctx))
+         goto invalid_value;
+
+      v->value_int = ctx->Texture.Unit[index].CurrentTex[target]->Name;
+      return TYPE_INT;
+   }
+
+   case GL_SAMPLER_BINDING: {
+      struct gl_sampler_object *samp;
+
+      if (ctx->API != API_OPENGL_CORE)
+         goto invalid_enum;
+      if (index >= _mesa_max_tex_unit(ctx))
+         goto invalid_value;
+
+      samp = ctx->Texture.Unit[index].Sampler;
+      v->value_int = samp ? samp->Name : 0;
+      return TYPE_INT;
+   }
+
   case GL_MAX_COMPUTE_WORK_GROUP_COUNT:
      if (!_mesa_is_desktop_gl(ctx) || !ctx->Extensions.ARB_compute_shader)
         goto invalid_enum;
--- a/src/mesa/main/glformats.c
+++ b/src/mesa/main/glformats.c
@@ -2292,8 +2292,18 @@ _mesa_es3_error_check_format_and_type(const struct gl_context *ctx,
         break;

      case GL_HALF_FLOAT:
-         if (internalFormat != GL_RG16F)
-            return GL_INVALID_OPERATION;
+      case GL_HALF_FLOAT_OES:
+         switch (internalFormat) {
+            case GL_RG16F:
+               break;
+            case GL_RG:
+               if (ctx->Extensions.ARB_texture_rg &&
+                   ctx->Extensions.OES_texture_half_float)
+                  break;
+            /* fallthrough */
+            default:
+               return GL_INVALID_OPERATION;
+         }
         break;

      case GL_FLOAT:
@@ -2301,6 +2311,11 @@ _mesa_es3_error_check_format_and_type(const struct gl_context *ctx,
         case GL_RG16F:
         case GL_RG32F:
            break;
+         case GL_RG:
+            if (ctx->Extensions.ARB_texture_rg &&
+                ctx->Extensions.OES_texture_float)
+               break;
+            /* fallthrough */
         default:
            return GL_INVALID_OPERATION;
         }
@@ -2361,8 +2376,19 @@ _mesa_es3_error_check_format_and_type(const struct gl_context *ctx,
         break;

      case GL_HALF_FLOAT:
-         if (internalFormat != GL_R16F)
+      case GL_HALF_FLOAT_OES:
+         switch (internalFormat) {
+         case GL_R16F:
+            break;
+         case GL_RG:
+         case GL_RED:
+            if (ctx->Extensions.ARB_texture_rg &&
+                ctx->Extensions.OES_texture_half_float)
+               break;
+            /* fallthrough */
+         default:
            return GL_INVALID_OPERATION;
+         }
         break;

      case GL_FLOAT:
@@ -2370,6 +2396,11 @@ _mesa_es3_error_check_format_and_type(const struct gl_context *ctx,
         case GL_R16F:
         case GL_R32F:
            break;
+         case GL_RED:
+            if (ctx->Extensions.ARB_texture_rg &&
+                ctx->Extensions.OES_texture_float)
+               break;
+            /* fallthrough */
         default:
            return GL_INVALID_OPERATION;
         }
--- a/src/mesa/main/mipmap.c
+++ b/src/mesa/main/mipmap.c
@@ -1922,11 +1922,8 @@ generate_mipmap_uncompressed(struct gl_context *ctx, GLenum target,
      }

      /* get dest gl_texture_image */
-      dstImage = _mesa_get_tex_image(ctx, texObj, target, level + 1);
-      if (!dstImage) {
-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "generating mipmaps");
-         return;
-      }
+      dstImage = _mesa_select_tex_image(texObj, target, level + 1);
+      assert(dstImage);

      if (target == GL_TEXTURE_1D_ARRAY) {
 	 srcDepth = srcHeight;
@@ -2107,7 +2104,19 @@ generate_mipmap_compressed(struct gl_context *ctx, GLenum target,
                                         srcWidth, srcHeight, srcDepth,
                                         &dstWidth, &dstHeight, &dstDepth);
      if (!nextLevel)
-	 break;
+	 goto end;
+
+      if (!_mesa_prepare_mipmap_level(ctx, texObj, level + 1,
+                                      dstWidth, dstHeight, dstDepth,
+                                      border, srcImage->InternalFormat,
+                                      srcImage->TexFormat)) {
+         /* all done */
+         goto end;
+      }
+
+      /* get dest gl_texture_image */
+      dstImage = _mesa_select_tex_image(texObj, target, level + 1);
+      assert(dstImage);

      /* Compute dst image strides and alloc memory on first iteration */
      temp_dst_row_stride = _mesa_format_row_stride(temp_format, dstWidth);
@@ -2121,13 +2130,6 @@ generate_mipmap_compressed(struct gl_context *ctx, GLenum target,
 	 }
      }

-      /* get dest gl_texture_image */
-      dstImage = _mesa_get_tex_image(ctx, texObj, target, level + 1);
-      if (!dstImage) {
-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "generating mipmaps");
-         goto end;
-      }
-
      /* for 2D arrays, setup array[depth] of slice pointers */
      for (i = 0; i < srcDepth; i++) {
         temp_src_slices[i] = temp_src + temp_src_img_stride * i;
@@ -2146,14 +2148,6 @@ generate_mipmap_compressed(struct gl_context *ctx, GLenum target,
                                  dstWidth, dstHeight, dstDepth,
                                  temp_dst_slices, temp_dst_row_stride);

-      if (!_mesa_prepare_mipmap_level(ctx, texObj, level + 1,
-                                      dstWidth, dstHeight, dstDepth,
-                                      border, srcImage->InternalFormat,
-                                      srcImage->TexFormat)) {
-         /* all done */
-         goto end;
-      }
-
      /* The image space was allocated above so use glTexSubImage now */
      ctx->Driver.TexSubImage(ctx, 2, dstImage,
                              0, 0, 0, dstWidth, dstHeight, dstDepth,
--- a/src/mesa/main/readpix.c
+++ b/src/mesa/main/readpix.c
@@ -537,7 +537,8 @@ read_rgba_pixels( struct gl_context *ctx,
       * convert to, then we can convert directly into the dst buffer and avoid
       * the final conversion/copy from the rgba buffer to the dst buffer.
       */
-      if (dst_format == rgba_format) {
+      if (dst_format == rgba_format &&
+          dst_stride == rgba_stride) {
         need_convert = false;
         rgba = dst;
      } else {
--- a/src/mesa/main/texcompress_bptc.c
+++ b/src/mesa/main/texcompress_bptc.c
@@ -1291,7 +1291,8 @@ _mesa_texstore_bptc_rgba_unorm(TEXSTORE_PARAMS)
      tempImageSlices[0] = (GLubyte *) tempImage;
      _mesa_texstore(ctx, dims,
                     baseInternalFormat,
-                     MESA_FORMAT_R8G8B8A8_UNORM,
+                     _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM
+                                           : MESA_FORMAT_A8B8G8R8_UNORM,
                     rgbaRowStride, tempImageSlices,
                     srcWidth, srcHeight, srcDepth,
                     srcFormat, srcType, srcAddr,
--- a/src/mesa/main/texcompress_fxt1.c
+++ b/src/mesa/main/texcompress_fxt1.c
@@ -65,7 +65,7 @@ _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
   if (srcFormat != GL_RGB ||
       srcType != GL_UNSIGNED_BYTE ||
       ctx->_ImageTransferState ||
-       srcPacking->RowLength != srcWidth ||
+       ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
       srcPacking->SwapBytes) {
      /* convert image to RGB/GLubyte */
      GLubyte *tempImageSlices[1];
@@ -130,7 +130,8 @@ _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
      tempImageSlices[0] = (GLubyte *) tempImage;
      _mesa_texstore(ctx, dims,
                     baseInternalFormat,
-                     MESA_FORMAT_R8G8B8A8_UNORM,
+                     _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM
+                                           : MESA_FORMAT_A8B8G8R8_UNORM,
                     rgbaRowStride, tempImageSlices,
                     srcWidth, srcHeight, srcDepth,
                     srcFormat, srcType, srcAddr,
--- a/src/mesa/main/texcompress_rgtc.c
+++ b/src/mesa/main/texcompress_rgtc.c
@@ -196,9 +196,11 @@ _mesa_texstore_rg_rgtc2(TEXSTORE_PARAMS)
          dstFormat == MESA_FORMAT_LA_LATC2_UNORM);

   if (baseInternalFormat == GL_RG)
-      tempFormat = MESA_FORMAT_R8G8_UNORM;
+      tempFormat = _mesa_little_endian() ? MESA_FORMAT_R8G8_UNORM
+                                         : MESA_FORMAT_G8R8_UNORM;
   else
-      tempFormat = MESA_FORMAT_L8A8_UNORM;
+      tempFormat = _mesa_little_endian() ? MESA_FORMAT_L8A8_UNORM
+                                         : MESA_FORMAT_A8L8_UNORM;

   rgRowStride = 2 * srcWidth * sizeof(GLubyte);
   tempImage = malloc(srcWidth * srcHeight * 2 * sizeof(GLubyte));
--- a/src/mesa/main/texcompress_s3tc.c
+++ b/src/mesa/main/texcompress_s3tc.c
@@ -130,7 +130,7 @@ _mesa_texstore_rgb_dxt1(TEXSTORE_PARAMS)
   if (srcFormat != GL_RGB ||
       srcType != GL_UNSIGNED_BYTE ||
       ctx->_ImageTransferState ||
-       srcPacking->RowLength != srcWidth ||
+       ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
       srcPacking->SwapBytes) {
      /* convert image to RGB/GLubyte */
      GLubyte *tempImageSlices[1];
@@ -187,7 +187,7 @@ _mesa_texstore_rgba_dxt1(TEXSTORE_PARAMS)
   if (srcFormat != GL_RGBA ||
       srcType != GL_UNSIGNED_BYTE ||
       ctx->_ImageTransferState ||
-       srcPacking->RowLength != srcWidth ||
+       ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
       srcPacking->SwapBytes) {
      /* convert image to RGBA/GLubyte */
      GLubyte *tempImageSlices[1];
@@ -198,7 +198,8 @@ _mesa_texstore_rgba_dxt1(TEXSTORE_PARAMS)
      tempImageSlices[0] = (GLubyte *) tempImage;
      _mesa_texstore(ctx, dims,
                     baseInternalFormat,
-                     MESA_FORMAT_R8G8B8A8_UNORM,
+                     _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM
+                                           : MESA_FORMAT_A8B8G8R8_UNORM,
                     rgbaRowStride, tempImageSlices,
                     srcWidth, srcHeight, srcDepth,
                     srcFormat, srcType, srcAddr,
@@ -244,7 +245,7 @@ _mesa_texstore_rgba_dxt3(TEXSTORE_PARAMS)
   if (srcFormat != GL_RGBA ||
       srcType != GL_UNSIGNED_BYTE ||
       ctx->_ImageTransferState ||
-       srcPacking->RowLength != srcWidth ||
+       ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
       srcPacking->SwapBytes) {
      /* convert image to RGBA/GLubyte */
      GLubyte *tempImageSlices[1];
@@ -255,7 +256,8 @@ _mesa_texstore_rgba_dxt3(TEXSTORE_PARAMS)
      tempImageSlices[0] = (GLubyte *) tempImage;
      _mesa_texstore(ctx, dims,
                     baseInternalFormat,
-                     MESA_FORMAT_R8G8B8A8_UNORM,
+                     _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM
+                                           : MESA_FORMAT_A8B8G8R8_UNORM,
                     rgbaRowStride, tempImageSlices,
                     srcWidth, srcHeight, srcDepth,
                     srcFormat, srcType, srcAddr,
@@ -300,7 +302,7 @@ _mesa_texstore_rgba_dxt5(TEXSTORE_PARAMS)
   if (srcFormat != GL_RGBA ||
       srcType != GL_UNSIGNED_BYTE ||
       ctx->_ImageTransferState ||
-       srcPacking->RowLength != srcWidth ||
+       ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
       srcPacking->SwapBytes) {
      /* convert image to RGBA/GLubyte */
      GLubyte *tempImageSlices[1];
@@ -311,7 +313,8 @@ _mesa_texstore_rgba_dxt5(TEXSTORE_PARAMS)
      tempImageSlices[0] = (GLubyte *) tempImage;
      _mesa_texstore(ctx, dims,
                     baseInternalFormat,
-                     MESA_FORMAT_R8G8B8A8_UNORM,
+                     _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM
+                                           : MESA_FORMAT_A8B8G8R8_UNORM,
                     rgbaRowStride, tempImageSlices,
                     srcWidth, srcHeight, srcDepth,
                     srcFormat, srcType, srcAddr,
--- a/src/mesa/main/texgetimage.c
+++ b/src/mesa/main/texgetimage.c
@@ -915,6 +915,13 @@ getteximage_error_check(struct gl_context *ctx,
                  "glGetTex%sImage(format=GL_STENCIL_INDEX)", suffix);
      return GL_TRUE;
   }
+   else if (_mesa_is_stencil_format(format)
+	    && !_mesa_is_depthstencil_format(baseFormat)
+	    && !_mesa_is_stencil_format(baseFormat)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glGetTex%sImage(format mismatch)", suffix);
+      return true;
+   }
   else if (_mesa_is_ycbcr_format(format)
            && !_mesa_is_ycbcr_format(baseFormat)) {
      _mesa_error(ctx, GL_INVALID_OPERATION,
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -222,7 +222,7 @@ _mesa_base_tex_format( struct gl_context *ctx, GLint internalFormat )
      }
   }

-   if (ctx->Extensions.ARB_stencil_texturing) {
+   if (ctx->Extensions.ARB_texture_stencil8) {
      switch (internalFormat) {
      case GL_STENCIL_INDEX:
      case GL_STENCIL_INDEX1:
@@ -3682,12 +3682,12 @@ texturesubimage(struct gl_context *ctx, GLuint dims,
      rowStride = _mesa_image_image_stride(&ctx->Unpack, width, height,
                                           format, type);
      /* Copy in each face. */
-      for (i = 0; i < 6; ++i) {
+      for (i = zoffset; i < zoffset + depth; ++i) {
         texImage = texObj->Image[i][level];
         assert(texImage);

         _mesa_texture_sub_image(ctx, 3, texObj, texImage, texObj->Target,
-                                 level, xoffset, yoffset, zoffset,
+                                 level, xoffset, yoffset, 0,
                                 width, height, 1, format,
                                 type, pixels, true);
         pixels = (GLubyte *) pixels + rowStride;
@@ -5537,10 +5537,13 @@ static GLboolean
 is_renderable_texture_format(struct gl_context *ctx, GLenum internalformat)
 {
   /* Everything that is allowed for renderbuffers,
-    * except for a base format of GL_STENCIL_INDEX.
+    * except for a base format of GL_STENCIL_INDEX, unless supported.
    */
   GLenum baseFormat = _mesa_base_fbo_format(ctx, internalformat);
-   return baseFormat != 0 && baseFormat != GL_STENCIL_INDEX;
+   if (ctx->Extensions.ARB_texture_stencil8)
+      return baseFormat != 0;
+   else
+      return baseFormat != 0 && baseFormat != GL_STENCIL_INDEX;
 }


--- a/src/mesa/main/texparam.c
+++ b/src/mesa/main/texparam.c
@@ -1888,6 +1888,12 @@ get_tex_parameterfv(struct gl_context *ctx,
         *params = (GLfloat) obj->Sampler.sRGBDecode;
         break;

+      case GL_TEXTURE_TARGET:
+         if (ctx->API != API_OPENGL_CORE)
+            goto invalid_pname;
+         *params = ENUM_TO_FLOAT(obj->Target);
+         break;
+
      default:
         goto invalid_pname;
   }
@@ -2113,6 +2119,12 @@ get_tex_parameteriv(struct gl_context *ctx,
         *params = obj->ImageFormatCompatibilityType;
         break;

+      case GL_TEXTURE_TARGET:
+         if (ctx->API != API_OPENGL_CORE)
+            goto invalid_pname;
+         *params = (GLint) obj->Target;
+         break;
+
      default:
         goto invalid_pname;
   }
--- a/src/mesa/main/texstorage.c
+++ b/src/mesa/main/texstorage.c
@@ -189,6 +189,20 @@ clear_texture_fields(struct gl_context *ctx,
 }


+/**
+ * Update/re-validate framebuffer object.
+ */
+static void
+update_fbo_texture(struct gl_context *ctx, struct gl_texture_object *texObj)
+{
+   const unsigned numFaces = _mesa_num_tex_faces(texObj->Target);
+   for (int level = 0; level < ARRAY_SIZE(texObj->Image[0]); level++) {
+      for (unsigned face = 0; face < numFaces; face++)
+         _mesa_update_fbo_texture(ctx, texObj, face, level);
+   }
+}
+
+
 GLboolean
 _mesa_is_legal_tex_storage_format(struct gl_context *ctx, GLenum internalformat)
 {
@@ -445,6 +459,7 @@ _mesa_texture_storage(struct gl_context *ctx, GLuint dims,

      _mesa_set_texture_view_state(ctx, texObj, target, levels);

+      update_fbo_texture(ctx, texObj);
   }
 }

--- a/src/mesa/main/texstore.c
+++ b/src/mesa/main/texstore.c
@@ -1004,6 +1004,7 @@ store_texsubimage(struct gl_context *ctx,
   /* compute slice info (and do some sanity checks) */
   switch (target) {
   case GL_TEXTURE_2D:
+   case GL_TEXTURE_2D_MULTISAMPLE:
   case GL_TEXTURE_RECTANGLE:
   case GL_TEXTURE_CUBE_MAP:
   case GL_TEXTURE_EXTERNAL_OES:
@@ -1025,6 +1026,7 @@ store_texsubimage(struct gl_context *ctx,
      srcImageStride = _mesa_image_row_stride(packing, width, format, type);
      break;
   case GL_TEXTURE_2D_ARRAY:
+   case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
      numSlices = depth;
      sliceOffset = zoffset;
      depth = 1;
--- a/src/mesa/main/uniform_query.cpp
+++ b/src/mesa/main/uniform_query.cpp
@@ -312,24 +312,31 @@ _mesa_get_uniform(struct gl_context *ctx, GLuint program, GLint location,

      return;
   }
+   if ((uni->type->base_type == GLSL_TYPE_DOUBLE &&
+        returnType != GLSL_TYPE_DOUBLE) ||
+       (uni->type->base_type != GLSL_TYPE_DOUBLE &&
+        returnType == GLSL_TYPE_DOUBLE)) {
+	 _mesa_error( ctx, GL_INVALID_OPERATION,
+	             "glGetnUniform*vARB(incompatible uniform types)");
+	return;
+   }

   {
      unsigned elements = (uni->type->is_sampler())
 	 ? 1 : uni->type->components();
+      const int dmul = uni->type->base_type == GLSL_TYPE_DOUBLE ? 2 : 1;

      /* Calculate the source base address *BEFORE* modifying elements to
       * account for the size of the user's buffer.
       */
      const union gl_constant_value *const src =
-	 &uni->storage[offset * elements];
+	 &uni->storage[offset * elements * dmul];

      assert(returnType == GLSL_TYPE_FLOAT || returnType == GLSL_TYPE_INT ||
-             returnType == GLSL_TYPE_UINT);
-      /* The three (currently) supported types all have the same size,
-       * which is of course the same as their union. That'll change
-       * with glGetUniformdv()...
-       */
-      unsigned bytes = sizeof(src[0]) * elements;
+             returnType == GLSL_TYPE_UINT || returnType == GLSL_TYPE_DOUBLE);
+
+      /* doubles have a different size than the other 3 types */
+      unsigned bytes = sizeof(src[0]) * elements * dmul;
      if (bufSize < 0 || bytes > (unsigned) bufSize) {
 	 _mesa_error( ctx, GL_INVALID_OPERATION,
 	             "glGetnUniform*vARB(out of bounds: bufSize is %d,"
@@ -857,7 +864,7 @@ _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg,
 		     GLuint cols, GLuint rows,
                     GLint location, GLsizei count,
                     GLboolean transpose,
-                     const GLvoid *values, GLenum type)
+                     const GLvoid *values, enum glsl_base_type basicType)
 {
   unsigned offset;
   unsigned vectors;
@@ -876,8 +883,8 @@ _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg,
      return;
   }

-   assert(type == GL_FLOAT || type == GL_DOUBLE);
-   size_mul = type == GL_DOUBLE ? 2 : 1;
+   assert(basicType == GLSL_TYPE_FLOAT || basicType == GLSL_TYPE_DOUBLE);
+   size_mul = basicType == GLSL_TYPE_DOUBLE ? 2 : 1;

   assert(!uni->type->is_sampler());
   vectors = uni->type->matrix_columns;
@@ -903,6 +910,31 @@ _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg,
      }
   }

+   /* Section 2.11.7 (Uniform Variables) of the OpenGL 4.2 Core Profile spec
+    * says:
+    *
+    *     "If any of the following conditions occur, an INVALID_OPERATION
+    *     error is generated by the Uniform* commands, and no uniform values
+    *     are changed:
+    *
+    *     ...
+    *
+    *     - if the uniform declared in the shader is not of type boolean and
+    *       the type indicated in the name of the Uniform* command used does
+    *       not match the type of the uniform"
+    *
+    * There are no Boolean matrix types, so we do not need to allow
+    * GLSL_TYPE_BOOL here (as _mesa_uniform does).
+    */
+   if (uni->type->base_type != basicType) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glUniformMatrix%ux%u(\"%s\"@%d is %s, not %s)",
+                  cols, rows, uni->name, location,
+                  glsl_type_name(uni->type->base_type),
+                  glsl_type_name(basicType));
+      return;
+   }
+
   if (unlikely(ctx->_Shader->Flags & GLSL_UNIFORMS)) {
      log_uniform(values, uni->type->base_type, components, vectors, count,
 		  bool(transpose), shProg, location, uni);
@@ -932,7 +964,7 @@ _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg,
   if (!transpose) {
      memcpy(&uni->storage[elements * offset], values,
 	     sizeof(uni->storage[0]) * elements * count * size_mul);
-   } else if (type == GL_FLOAT) {
+   } else if (basicType == GLSL_TYPE_FLOAT) {
      /* Copy and transpose the matrix.
       */
      const float *src = (const float *)values;
@@ -949,7 +981,7 @@ _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg,
 	 src += elements;
      }
   } else {
-      assert(type == GL_DOUBLE);
+      assert(basicType == GLSL_TYPE_DOUBLE);
      const double *src = (const double *)values;
      double *dst = (double *)&uni->storage[elements * offset].f;

--- a/src/mesa/main/uniforms.c
+++ b/src/mesa/main/uniforms.c
@@ -553,7 +553,7 @@ _mesa_UniformMatrix2fv(GLint location, GLsizei count, GLboolean transpose,
 {
   GET_CURRENT_CONTEXT(ctx);
   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			2, 2, location, count, transpose, value, GL_FLOAT);
+			2, 2, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }

 void GLAPIENTRY
@@ -562,7 +562,7 @@ _mesa_UniformMatrix3fv(GLint location, GLsizei count, GLboolean transpose,
 {
   GET_CURRENT_CONTEXT(ctx);
   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			3, 3, location, count, transpose, value, GL_FLOAT);
+			3, 3, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }

 void GLAPIENTRY
@@ -571,7 +571,7 @@ _mesa_UniformMatrix4fv(GLint location, GLsizei count, GLboolean transpose,
 {
   GET_CURRENT_CONTEXT(ctx);
   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			4, 4, location, count, transpose, value, GL_FLOAT);
+			4, 4, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }

 /** Same as above with direct state access **/
@@ -683,7 +683,7 @@ _mesa_ProgramUniformMatrix2fv(GLuint program, GLint location, GLsizei count,
   struct gl_shader_program *shProg =
      _mesa_lookup_shader_program_err(ctx, program,
            "glProgramUniformMatrix2fv");
-   _mesa_uniform_matrix(ctx, shProg, 2, 2, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 2, 2, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }

 void GLAPIENTRY
@@ -694,7 +694,7 @@ _mesa_ProgramUniformMatrix3fv(GLuint program, GLint location, GLsizei count,
   struct gl_shader_program *shProg =
      _mesa_lookup_shader_program_err(ctx, program,
            "glProgramUniformMatrix3fv");
-   _mesa_uniform_matrix(ctx, shProg, 3, 3, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 3, 3, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }

 void GLAPIENTRY
@@ -705,7 +705,7 @@ _mesa_ProgramUniformMatrix4fv(GLuint program, GLint location, GLsizei count,
   struct gl_shader_program *shProg =
      _mesa_lookup_shader_program_err(ctx, program,
            "glProgramUniformMatrix4fv");
-   _mesa_uniform_matrix(ctx, shProg, 4, 4, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 4, 4, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }


@@ -718,7 +718,7 @@ _mesa_UniformMatrix2x3fv(GLint location, GLsizei count, GLboolean transpose,
 {
   GET_CURRENT_CONTEXT(ctx);
   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			2, 3, location, count, transpose, value, GL_FLOAT);
+			2, 3, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }

 void GLAPIENTRY
@@ -727,7 +727,7 @@ _mesa_UniformMatrix3x2fv(GLint location, GLsizei count, GLboolean transpose,
 {
   GET_CURRENT_CONTEXT(ctx);
   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			3, 2, location, count, transpose, value, GL_FLOAT);
+			3, 2, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }

 void GLAPIENTRY
@@ -736,7 +736,7 @@ _mesa_UniformMatrix2x4fv(GLint location, GLsizei count, GLboolean transpose,
 {
   GET_CURRENT_CONTEXT(ctx);
   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			2, 4, location, count, transpose, value, GL_FLOAT);
+			2, 4, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }

 void GLAPIENTRY
@@ -745,7 +745,7 @@ _mesa_UniformMatrix4x2fv(GLint location, GLsizei count, GLboolean transpose,
 {
   GET_CURRENT_CONTEXT(ctx);
   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			4, 2, location, count, transpose, value, GL_FLOAT);
+			4, 2, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }

 void GLAPIENTRY
@@ -754,7 +754,7 @@ _mesa_UniformMatrix3x4fv(GLint location, GLsizei count, GLboolean transpose,
 {
   GET_CURRENT_CONTEXT(ctx);
   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			3, 4, location, count, transpose, value, GL_FLOAT);
+			3, 4, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }

 void GLAPIENTRY
@@ -763,7 +763,7 @@ _mesa_UniformMatrix4x3fv(GLint location, GLsizei count, GLboolean transpose,
 {
   GET_CURRENT_CONTEXT(ctx);
   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			4, 3, location, count, transpose, value, GL_FLOAT);
+			4, 3, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }

 /** Same as above with direct state access **/
@@ -776,7 +776,7 @@ _mesa_ProgramUniformMatrix2x3fv(GLuint program, GLint location, GLsizei count,
   struct gl_shader_program *shProg =
      _mesa_lookup_shader_program_err(ctx, program,
            "glProgramUniformMatrix2x3fv");
-   _mesa_uniform_matrix(ctx, shProg, 2, 3, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 2, 3, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }

 void GLAPIENTRY
@@ -787,7 +787,7 @@ _mesa_ProgramUniformMatrix3x2fv(GLuint program, GLint location, GLsizei count,
   struct gl_shader_program *shProg =
      _mesa_lookup_shader_program_err(ctx, program,
            "glProgramUniformMatrix3x2fv");
-   _mesa_uniform_matrix(ctx, shProg, 3, 2, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 3, 2, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }

 void GLAPIENTRY
@@ -798,7 +798,7 @@ _mesa_ProgramUniformMatrix2x4fv(GLuint program, GLint location, GLsizei count,
   struct gl_shader_program *shProg =
      _mesa_lookup_shader_program_err(ctx, program,
            "glProgramUniformMatrix2x4fv");
-   _mesa_uniform_matrix(ctx, shProg, 2, 4, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 2, 4, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }

 void GLAPIENTRY
@@ -809,7 +809,7 @@ _mesa_ProgramUniformMatrix4x2fv(GLuint program, GLint location, GLsizei count,
   struct gl_shader_program *shProg =
      _mesa_lookup_shader_program_err(ctx, program,
            "glProgramUniformMatrix4x2fv");
-   _mesa_uniform_matrix(ctx, shProg, 4, 2, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 4, 2, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }

 void GLAPIENTRY
@@ -820,7 +820,7 @@ _mesa_ProgramUniformMatrix3x4fv(GLuint program, GLint location, GLsizei count,
   struct gl_shader_program *shProg =
      _mesa_lookup_shader_program_err(ctx, program,
            "glProgramUniformMatrix3x4fv");
-   _mesa_uniform_matrix(ctx, shProg, 3, 4, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 3, 4, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }

 void GLAPIENTRY
@@ -831,7 +831,7 @@ _mesa_ProgramUniformMatrix4x3fv(GLuint program, GLint location, GLsizei count,
   struct gl_shader_program *shProg =
      _mesa_lookup_shader_program_err(ctx, program,
            "glProgramUniformMatrix4x3fv");
-   _mesa_uniform_matrix(ctx, shProg, 4, 3, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 4, 3, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }


@@ -888,16 +888,7 @@ _mesa_GetnUniformdvARB(GLuint program, GLint location,
 {
   GET_CURRENT_CONTEXT(ctx);

-   (void) program;
-   (void) location;
-   (void) bufSize;
-   (void) params;
-
-   /*
   _mesa_get_uniform(ctx, program, location, bufSize, GLSL_TYPE_DOUBLE, params);
-   */
-   _mesa_error(ctx, GL_INVALID_OPERATION, "glGetUniformdvARB"
-               "(GL_ARB_gpu_shader_fp64 not implemented)");
 }

 void GLAPIENTRY
@@ -1302,7 +1293,7 @@ _mesa_UniformMatrix2dv(GLint location, GLsizei count, GLboolean transpose,
 {
   GET_CURRENT_CONTEXT(ctx);
   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			2, 2, location, count, transpose, value, GL_DOUBLE);
+			2, 2, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }

 void GLAPIENTRY
@@ -1311,7 +1302,7 @@ _mesa_UniformMatrix3dv(GLint location, GLsizei count, GLboolean transpose,
 {
   GET_CURRENT_CONTEXT(ctx);
   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			3, 3, location, count, transpose, value, GL_DOUBLE);
+			3, 3, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }

 void GLAPIENTRY
@@ -1320,7 +1311,7 @@ _mesa_UniformMatrix4dv(GLint location, GLsizei count, GLboolean transpose,
 {
   GET_CURRENT_CONTEXT(ctx);
   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			4, 4, location, count, transpose, value, GL_DOUBLE);
+			4, 4, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }

 void GLAPIENTRY
@@ -1329,7 +1320,7 @@ _mesa_UniformMatrix2x3dv(GLint location, GLsizei count, GLboolean transpose,
 {
   GET_CURRENT_CONTEXT(ctx);
   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			2, 3, location, count, transpose, value, GL_DOUBLE);
+			2, 3, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }

 void GLAPIENTRY
@@ -1338,7 +1329,7 @@ _mesa_UniformMatrix3x2dv(GLint location, GLsizei count, GLboolean transpose,
 {
   GET_CURRENT_CONTEXT(ctx);
   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			3, 2, location, count, transpose, value, GL_DOUBLE);
+			3, 2, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }

 void GLAPIENTRY
@@ -1347,7 +1338,7 @@ _mesa_UniformMatrix2x4dv(GLint location, GLsizei count, GLboolean transpose,
 {
   GET_CURRENT_CONTEXT(ctx);
   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			2, 4, location, count, transpose, value, GL_DOUBLE);
+			2, 4, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }

 void GLAPIENTRY
@@ -1356,7 +1347,7 @@ _mesa_UniformMatrix4x2dv(GLint location, GLsizei count, GLboolean transpose,
 {
   GET_CURRENT_CONTEXT(ctx);
   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			4, 2, location, count, transpose, value, GL_DOUBLE);
+			4, 2, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }

 void GLAPIENTRY
@@ -1365,7 +1356,7 @@ _mesa_UniformMatrix3x4dv(GLint location, GLsizei count, GLboolean transpose,
 {
   GET_CURRENT_CONTEXT(ctx);
   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			3, 4, location, count, transpose, value, GL_DOUBLE);
+			3, 4, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }

 void GLAPIENTRY
@@ -1374,7 +1365,7 @@ _mesa_UniformMatrix4x3dv(GLint location, GLsizei count, GLboolean transpose,
 {
   GET_CURRENT_CONTEXT(ctx);
   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			4, 3, location, count, transpose, value, GL_DOUBLE);
+			4, 3, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }

 void GLAPIENTRY
@@ -1480,7 +1471,7 @@ _mesa_ProgramUniformMatrix2dv(GLuint program, GLint location, GLsizei count,
   struct gl_shader_program *shProg =
      _mesa_lookup_shader_program_err(ctx, program,
            "glProgramUniformMatrix2dv");
-   _mesa_uniform_matrix(ctx, shProg, 2, 2, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 2, 2, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }

 void GLAPIENTRY
@@ -1491,7 +1482,7 @@ _mesa_ProgramUniformMatrix3dv(GLuint program, GLint location, GLsizei count,
   struct gl_shader_program *shProg =
      _mesa_lookup_shader_program_err(ctx, program,
            "glProgramUniformMatrix3dv");
-   _mesa_uniform_matrix(ctx, shProg, 3, 3, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 3, 3, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }

 void GLAPIENTRY
@@ -1502,7 +1493,7 @@ _mesa_ProgramUniformMatrix4dv(GLuint program, GLint location, GLsizei count,
   struct gl_shader_program *shProg =
      _mesa_lookup_shader_program_err(ctx, program,
            "glProgramUniformMatrix4dv");
-   _mesa_uniform_matrix(ctx, shProg, 4, 4, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 4, 4, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }

 void GLAPIENTRY
@@ -1513,7 +1504,7 @@ _mesa_ProgramUniformMatrix2x3dv(GLuint program, GLint location, GLsizei count,
   struct gl_shader_program *shProg =
      _mesa_lookup_shader_program_err(ctx, program,
            "glProgramUniformMatrix2x3dv");
-   _mesa_uniform_matrix(ctx, shProg, 2, 3, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 2, 3, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }

 void GLAPIENTRY
@@ -1524,7 +1515,7 @@ _mesa_ProgramUniformMatrix3x2dv(GLuint program, GLint location, GLsizei count,
   struct gl_shader_program *shProg =
      _mesa_lookup_shader_program_err(ctx, program,
            "glProgramUniformMatrix3x2dv");
-   _mesa_uniform_matrix(ctx, shProg, 3, 2, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 3, 2, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }

 void GLAPIENTRY
@@ -1535,7 +1526,7 @@ _mesa_ProgramUniformMatrix2x4dv(GLuint program, GLint location, GLsizei count,
   struct gl_shader_program *shProg =
      _mesa_lookup_shader_program_err(ctx, program,
            "glProgramUniformMatrix2x4dv");
-   _mesa_uniform_matrix(ctx, shProg, 2, 4, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 2, 4, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }

 void GLAPIENTRY
@@ -1546,7 +1537,7 @@ _mesa_ProgramUniformMatrix4x2dv(GLuint program, GLint location, GLsizei count,
   struct gl_shader_program *shProg =
      _mesa_lookup_shader_program_err(ctx, program,
            "glProgramUniformMatrix4x2dv");
-   _mesa_uniform_matrix(ctx, shProg, 4, 2, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 4, 2, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }

 void GLAPIENTRY
@@ -1557,7 +1548,7 @@ _mesa_ProgramUniformMatrix3x4dv(GLuint program, GLint location, GLsizei count,
   struct gl_shader_program *shProg =
      _mesa_lookup_shader_program_err(ctx, program,
            "glProgramUniformMatrix3x4dv");
-   _mesa_uniform_matrix(ctx, shProg, 3, 4, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 3, 4, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }

 void GLAPIENTRY
@@ -1568,5 +1559,5 @@ _mesa_ProgramUniformMatrix4x3dv(GLuint program, GLint location, GLsizei count,
   struct gl_shader_program *shProg =
      _mesa_lookup_shader_program_err(ctx, program,
            "glProgramUniformMatrix4x3dv");
-   _mesa_uniform_matrix(ctx, shProg, 4, 3, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 4, 3, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }
--- a/src/mesa/main/uniforms.h
+++ b/src/mesa/main/uniforms.h
@@ -363,7 +363,7 @@ _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg,
 		     GLuint cols, GLuint rows,
                     GLint location, GLsizei count,
                     GLboolean transpose,
-                     const GLvoid *values, GLenum type);
+                     const GLvoid *values, enum glsl_base_type basicType);

 void
 _mesa_get_uniform(struct gl_context *ctx, GLuint program, GLint location,
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2963,6 +2963,8 @@ _mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
   if (prog->LinkStatus) {
      if (!ctx->Driver.LinkShader(ctx, prog)) {
 	 prog->LinkStatus = GL_FALSE;
+      } else {
+         build_program_resource_list(ctx, prog);
      }
   }

--- a/src/mesa/state_tracker/st_atom_texture.c
+++ b/src/mesa/state_tracker/st_atom_texture.c
@@ -253,8 +253,8 @@ st_create_texture_sampler_view_from_stobj(struct pipe_context *pipe,
         return NULL;
      size = MIN2(stObj->pt->width0 - base, (unsigned)stObj->base.BufferSize);

-      f = ((base * 8) / desc->block.bits) * desc->block.width;
-      n = ((size * 8) / desc->block.bits) * desc->block.width;
+      f = (base / (desc->block.bits / 8)) * desc->block.width;
+      n = (size / (desc->block.bits / 8)) * desc->block.width;
      if (!n)
         return NULL;
      templ.u.buf.first_element = f;
--- a/src/mesa/state_tracker/st_cb_blit.c
+++ b/src/mesa/state_tracker/st_cb_blit.c
@@ -228,6 +228,7 @@ st_BlitFramebuffer(struct gl_context *ctx,
                  st_adjust_blit_for_msaa_resolve(&blit);

                  st->pipe->blit(st->pipe, &blit);
+                  dstRb->defined = true; /* front buffer tracking */
               }
            }
         }
@@ -265,6 +266,7 @@ st_BlitFramebuffer(struct gl_context *ctx,
                  st_adjust_blit_for_msaa_resolve(&blit);

                  st->pipe->blit(st->pipe, &blit);
+                  dstRb->defined = true; /* front buffer tracking */
               }
            }
         }
--- a/src/mesa/state_tracker/st_cb_readpixels.c
+++ b/src/mesa/state_tracker/st_cb_readpixels.c
@@ -210,9 +210,9 @@ st_readpixels(struct gl_context *ctx, GLint x, GLint y,
      GLuint row;

      for (row = 0; row < (unsigned) height; row++) {
-         GLvoid *dest = _mesa_image_address3d(pack, pixels,
+         GLvoid *dest = _mesa_image_address2d(pack, pixels,
                                              width, height, format,
-                                              type, 0, row, 0);
+                                              type, row, 0);
         memcpy(dest, map, bytesPerRow);
         map += tex_xfer->stride;
      }
--- a/src/mesa/state_tracker/st_format.c
+++ b/src/mesa/state_tracker/st_format.c
@@ -1270,46 +1270,40 @@ static const struct format_mapping format_map[] = {
   /* 32-bit float formats */
   {
      { GL_RGBA32F_ARB, 0 },
-      { PIPE_FORMAT_R32G32B32A32_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+      { PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
   },
   {
      { GL_RGB32F_ARB, 0 },
      { PIPE_FORMAT_R32G32B32_FLOAT, PIPE_FORMAT_R32G32B32X32_FLOAT,
-        PIPE_FORMAT_R32G32B32A32_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+        PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
   },
   {
      { GL_LUMINANCE_ALPHA32F_ARB, 0 },
-      { PIPE_FORMAT_L32A32_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT,
-        PIPE_FORMAT_L16A16_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+      { PIPE_FORMAT_L32A32_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
   },
   {
      { GL_ALPHA32F_ARB, 0 },
      { PIPE_FORMAT_A32_FLOAT, PIPE_FORMAT_L32A32_FLOAT,
-        PIPE_FORMAT_R32G32B32A32_FLOAT, PIPE_FORMAT_A16_FLOAT,
-        PIPE_FORMAT_L16A16_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+        PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
   },
   {
      { GL_INTENSITY32F_ARB, 0 },
      { PIPE_FORMAT_I32_FLOAT, PIPE_FORMAT_L32A32_FLOAT,
-        PIPE_FORMAT_R32G32B32A32_FLOAT, PIPE_FORMAT_I16_FLOAT,
-        PIPE_FORMAT_L16A16_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+        PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
   },
   {
      { GL_LUMINANCE32F_ARB, 0 },
      { PIPE_FORMAT_L32_FLOAT, PIPE_FORMAT_L32A32_FLOAT,
-        PIPE_FORMAT_R32G32B32A32_FLOAT, PIPE_FORMAT_L16_FLOAT,
-        PIPE_FORMAT_L16A16_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+        PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
   },
   {
      { GL_R32F, 0 },
      { PIPE_FORMAT_R32_FLOAT, PIPE_FORMAT_R32G32_FLOAT,
-        PIPE_FORMAT_R32G32B32A32_FLOAT, PIPE_FORMAT_R16_FLOAT,
-        PIPE_FORMAT_R16G16_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+        PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
   },
   {
      { GL_RG32F, 0 },
-      { PIPE_FORMAT_R32G32_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT,
-        PIPE_FORMAT_R16G16_FLOAT, PIPE_FORMAT_R16G16B16A16_FLOAT, 0 }
+      { PIPE_FORMAT_R32G32_FLOAT, PIPE_FORMAT_R32G32B32A32_FLOAT, 0 }
   },

   /* R, RG formats */
@@ -1966,7 +1960,11 @@ st_ChooseTextureFormat(struct gl_context *ctx, GLenum target,
   else if (internalFormat == 3 || internalFormat == 4 ||
            internalFormat == GL_RGB || internalFormat == GL_RGBA ||
            internalFormat == GL_RGB8 || internalFormat == GL_RGBA8 ||
-            internalFormat == GL_BGRA)
+            internalFormat == GL_BGRA ||
+            internalFormat == GL_RGB16F ||
+            internalFormat == GL_RGBA16F ||
+            internalFormat == GL_RGB32F ||
+            internalFormat == GL_RGBA32F)
 	 bindings |= PIPE_BIND_RENDER_TARGET;

   /* GLES allows the driver to choose any format which matches
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -2679,7 +2679,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
       */
      glsl_to_tgsi_instruction *inst, *new_inst;
      inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
-      new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]);
+      new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2], inst->src[3]);
      new_inst->saturate = inst->saturate;
      inst->dead_mask = inst->dst[0].writemask;
   } else {
@@ -4240,12 +4240,12 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
    * new visitor. */
   foreach_in_list(glsl_to_tgsi_instruction, inst, &original->instructions) {
      glsl_to_tgsi_instruction *newinst;
-      st_src_reg src_regs[3];
+      st_src_reg src_regs[4];

      if (inst->dst[0].file == PROGRAM_OUTPUT)
         prog->OutputsWritten |= BITFIELD64_BIT(inst->dst[0].index);

-      for (int i = 0; i < 3; i++) {
+      for (int i = 0; i < 4; i++) {
         src_regs[i] = inst->src[i];
         if (src_regs[i].file == PROGRAM_INPUT &&
             src_regs[i].index == VARYING_SLOT_COL0) {
@@ -4256,7 +4256,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
            prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index);
      }

-      newinst = v->emit(NULL, inst->op, inst->dst[0], src_regs[0], src_regs[1], src_regs[2]);
+      newinst = v->emit(NULL, inst->op, inst->dst[0], src_regs[0], src_regs[1], src_regs[2], src_regs[3]);
      newinst->tex_target = inst->tex_target;
      newinst->sampler_array_size = inst->sampler_array_size;
   }
@@ -4325,18 +4325,18 @@ get_bitmap_visitor(struct st_fragment_program *fp,
    * new visitor. */
   foreach_in_list(glsl_to_tgsi_instruction, inst, &original->instructions) {
      glsl_to_tgsi_instruction *newinst;
-      st_src_reg src_regs[3];
+      st_src_reg src_regs[4];

      if (inst->dst[0].file == PROGRAM_OUTPUT)
         prog->OutputsWritten |= BITFIELD64_BIT(inst->dst[0].index);

-      for (int i = 0; i < 3; i++) {
+      for (int i = 0; i < 4; i++) {
         src_regs[i] = inst->src[i];
         if (src_regs[i].file == PROGRAM_INPUT)
            prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index);
      }

-      newinst = v->emit(NULL, inst->op, inst->dst[0], src_regs[0], src_regs[1], src_regs[2]);
+      newinst = v->emit(NULL, inst->op, inst->dst[0], src_regs[0], src_regs[1], src_regs[2], src_regs[3]);
      newinst->tex_target = inst->tex_target;
      newinst->sampler_array_size = inst->sampler_array_size;
   }
--- a/src/mesa/tnl_dd/t_dd_dmatmp.h
+++ b/src/mesa/tnl_dd/t_dd_dmatmp.h
@@ -121,9 +121,9 @@ static void TAG(render_points_verts)( struct gl_context *ctx,
      if (currentsz < 8)
 	 currentsz = dmasz;

-      for (j = start; j < count; j += nr ) {
+      for (j = 0; j < count; j += nr) {
 	 nr = MIN2( currentsz, count - j );
-	 TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
+         TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
 	 currentsz = dmasz;
      }

@@ -148,7 +148,7 @@ static void TAG(render_lines_verts)( struct gl_context *ctx,

      /* Emit whole number of lines in total and in each buffer:
       */
-      count -= (count-start) & 1;
+      count -= count & 1;
      currentsz = GET_CURRENT_VB_MAX_VERTS();
      currentsz -= currentsz & 1;
      dmasz -= dmasz & 1;
@@ -156,9 +156,9 @@ static void TAG(render_lines_verts)( struct gl_context *ctx,
      if (currentsz < 8)
 	 currentsz = dmasz;

-      for (j = start; j < count; j += nr ) {
+      for (j = 0; j < count; j += nr) {
 	 nr = MIN2( currentsz, count - j );
-	 TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
+         TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
 	 currentsz = dmasz;
      }

@@ -186,9 +186,9 @@ static void TAG(render_line_strip_verts)( struct gl_context *ctx,
      if (currentsz < 8)
 	 currentsz = dmasz;

-      for (j = start; j + 1 < count; j += nr - 1 ) {
+      for (j = 0; j + 1 < count; j += nr - 1 ) {
 	 nr = MIN2( currentsz, count - j );
-	 TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
+         TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
 	 currentsz = dmasz;
      }
 
@@ -214,10 +214,7 @@ static void TAG(render_line_loop_verts)( struct gl_context *ctx,

      INIT( GL_LINE_STRIP );

-      if (flags & PRIM_BEGIN)
-	 j = start;
-      else
-	 j = start + 1;
+      j = (flags & PRIM_BEGIN) ? 0 : 1;

      /* Ensure last vertex won't wrap buffers:
       */
@@ -234,23 +231,23 @@ static void TAG(render_line_loop_verts)( struct gl_context *ctx,
 	    nr = MIN2( currentsz, count - j );

 	    if (j + nr >= count &&
-		start < count - 1 && 
+		count > 1 &&
 		(flags & PRIM_END)) 
 	    {
 	       void *tmp;
 	       tmp = ALLOC_VERTS(nr+1);
-	       tmp = TAG(emit_verts)( ctx, j, nr, tmp );
+               tmp = TAG(emit_verts)(ctx, start + j, nr, tmp);
 	       tmp = TAG(emit_verts)( ctx, start, 1, tmp );
 	       (void) tmp;
 	    }
 	    else {
-	       TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
+               TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
 	       currentsz = dmasz;
 	    }
 	 }

      }
-      else if (start + 1 < count && (flags & PRIM_END)) {
+      else if (count > 1 && (flags & PRIM_END)) {
 	 void *tmp;
 	 tmp = ALLOC_VERTS(2);
 	 tmp = TAG(emit_verts)( ctx, start+1, 1, tmp );
@@ -284,14 +281,14 @@ static void TAG(render_triangles_verts)( struct gl_context *ctx,
   /* Emit whole number of tris in total.  dmasz is already a multiple
    * of 3.
    */
-   count -= (count-start)%3;
+   count -= count % 3;

   if (currentsz < 8)
      currentsz = dmasz;

-   for (j = start; j < count; j += nr) {
+   for (j = 0; j < count; j += nr) {
      nr = MIN2( currentsz, count - j );
-      TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
+      TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
      currentsz = dmasz;
   }
 }
@@ -322,9 +319,9 @@ static void TAG(render_tri_strip_verts)( struct gl_context *ctx,
      dmasz -= (dmasz & 1);
      currentsz -= (currentsz & 1);

-      for (j = start ; j + 2 < count; j += nr - 2 ) {
+      for (j = 0; j + 2 < count; j += nr - 2) {
 	 nr = MIN2( currentsz, count - j );
-	 TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
+         TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
 	 currentsz = dmasz;
      }

@@ -354,12 +351,12 @@ static void TAG(render_tri_fan_verts)( struct gl_context *ctx,
 	 currentsz = dmasz;
      }

-      for (j = start + 1 ; j + 1 < count; j += nr - 2 ) {
+      for (j = 1; j + 1 < count; j += nr - 2) {
 	 void *tmp;
 	 nr = MIN2( currentsz, count - j + 1 );
 	 tmp = ALLOC_VERTS( nr );
 	 tmp = TAG(emit_verts)( ctx, start, 1, tmp );
-	 tmp = TAG(emit_verts)( ctx, j, nr - 1, tmp );
+         tmp = TAG(emit_verts)( ctx, start + j, nr - 1, tmp );
 	 (void) tmp;
 	 currentsz = dmasz;
      }
@@ -394,12 +391,12 @@ static void TAG(render_poly_verts)( struct gl_context *ctx,
 	 currentsz = dmasz;
      }

-      for (j = start + 1 ; j + 1 < count ; j += nr - 2 ) {
+      for (j = 1 ; j + 1 < count ; j += nr - 2 ) {
 	 void *tmp;
 	 nr = MIN2( currentsz, count - j + 1 );
 	 tmp = ALLOC_VERTS( nr );
 	 tmp = TAG(emit_verts)( ctx, start, 1, tmp );
-	 tmp = TAG(emit_verts)( ctx, j, nr - 1, tmp );
+         tmp = TAG(emit_verts)(ctx, start + j, nr - 1, tmp);
 	 (void) tmp;
 	 currentsz = dmasz;
      }
@@ -437,9 +434,9 @@ static void TAG(render_quad_strip_verts)( struct gl_context *ctx,
      dmasz -= (dmasz & 2);
      currentsz -= (currentsz & 2);

-      for (j = start ; j + 3 < count; j += nr - 2 ) {
+      for (j = 0; j + 3 < count; j += nr - 2 ) {
 	 nr = MIN2( currentsz, count - j );
-	 TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
+         TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
 	 currentsz = dmasz;
      }

@@ -465,7 +462,7 @@ static void TAG(render_quad_strip_verts)( struct gl_context *ctx,
 	 /* Emit whole number of quads in total, and in each buffer.
 	  */
 	 dmasz -= dmasz & 1;
-	 count -= (count-start) & 1;
+	 count -= count & 1;
 	 currentsz -= currentsz & 1;

 	 if (currentsz < 12)
@@ -474,14 +471,14 @@ static void TAG(render_quad_strip_verts)( struct gl_context *ctx,
 	 currentsz = currentsz/6*2;
 	 dmasz = dmasz/6*2;

-	 for (j = start; j + 3 < count; j += nr - 2 ) {
+	 for (j = 0; j + 3 < count; j += nr - 2) {
 	    nr = MIN2( currentsz, count - j );
 	    if (nr >= 4) {
 	       GLint quads = (nr/2)-1;
 	       GLint i;
 	       ELTS_VARS( ALLOC_ELTS( quads*6 ) );

-	       for ( i = j-start ; i < j-start+quads*2 ; i+=2 ) {
+               for (i = j; i < j + quads * 2; i += 2) {
 		  EMIT_TWO_ELTS( 0, (i+0), (i+1) );
 		  EMIT_TWO_ELTS( 2, (i+2), (i+1) );
 		  EMIT_TWO_ELTS( 4, (i+3), (i+2) );
@@ -519,15 +516,15 @@ static void TAG(render_quad_strip_verts)( struct gl_context *ctx,
      dmasz -= dmasz & 1;
      currentsz = GET_CURRENT_VB_MAX_VERTS();
      currentsz -= currentsz & 1;
-      count -= (count-start) & 1;
+      count -= count & 1;

      if (currentsz < 8) {
 	 currentsz = dmasz;
      }

-      for (j = start; j + 3 < count; j += nr - 2 ) {
+      for (j = 0; j + 3 < count; j += nr - 2) {
 	 nr = MIN2( currentsz, count - j );
-	 TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
+         TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
 	 currentsz = dmasz;
      }

@@ -545,6 +542,9 @@ static void TAG(render_quads_verts)( struct gl_context *ctx,
 				     GLuint count,
 				     GLuint flags )
 {
+   /* Emit whole number of quads in total. */
+   count -= count & 3;
+
   if (HAVE_QUADS) {
      LOCAL_VARS;
      int dmasz = (GET_SUBSEQUENT_VB_MAX_VERTS()/4) * 4;
@@ -553,18 +553,13 @@ static void TAG(render_quads_verts)( struct gl_context *ctx,

      INIT(GL_QUADS);

-      /* Emit whole number of quads in total.  dmasz is already a multiple
-       * of 4.
-       */
-      count -= (count-start)%4;
-
      currentsz = (GET_CURRENT_VB_MAX_VERTS()/4) * 4;
      if (currentsz < 8)
         currentsz = dmasz;

-      for (j = start; j < count; j += nr) {
+      for (j = 0; j < count; j += nr) {
         nr = MIN2( currentsz, count - j );
-         TAG(emit_verts)( ctx, j, nr, ALLOC_VERTS(nr) );
+         TAG(emit_verts)(ctx, start + j, nr, ALLOC_VERTS(nr));
         currentsz = dmasz;
      }
   }
@@ -587,7 +582,6 @@ static void TAG(render_quads_verts)( struct gl_context *ctx,
      /* Emit whole number of quads in total, and in each buffer.
       */
      dmasz -= dmasz & 3;
-      count -= (count-start) & 3;
      currentsz -= currentsz & 3;

      /* Adjust for rendering as triangles:
@@ -598,14 +592,14 @@ static void TAG(render_quads_verts)( struct gl_context *ctx,
      if (currentsz < 8)
 	 currentsz = dmasz;

-      for (j = start; j < count; j += nr ) {
+      for (j = 0; j < count; j += nr ) {
 	 nr = MIN2( currentsz, count - j );
 	 if (nr >= 4) {
 	    GLint quads = nr/4;
 	    GLint i;
 	    ELTS_VARS( ALLOC_ELTS( quads*6 ) );

-	    for ( i = j-start ; i < j-start+quads*4 ; i+=4 ) {
+            for (i = j; i < j + quads * 4; i += 4) {
 	       EMIT_TWO_ELTS( 0, (i+0), (i+1) );
 	       EMIT_TWO_ELTS( 2, (i+3), (i+1) );
 	       EMIT_TWO_ELTS( 4, (i+2), (i+3) );
@@ -629,15 +623,15 @@ static void TAG(render_quads_verts)( struct gl_context *ctx,

      INIT(GL_TRIANGLES);

-      for (j = start; j < count-3; j += 4) {
+      for (j = 0; j + 3 < count; j += 4) {
 	 void *tmp = ALLOC_VERTS( 6 );
 	 /* Send v0, v1, v3
 	  */
-	 tmp = EMIT_VERTS(ctx, j,     2, tmp);
-	 tmp = EMIT_VERTS(ctx, j + 3, 1, tmp);
+	 tmp = EMIT_VERTS(ctx, start + j,     2, tmp);
+	 tmp = EMIT_VERTS(ctx, start + j + 3, 1, tmp);
 	 /* Send v1, v2, v3
 	  */
-	 tmp = EMIT_VERTS(ctx, j + 1, 3, tmp);
+	 tmp = EMIT_VERTS(ctx, start + j + 1, 3, tmp);
 	 (void) tmp;
      }
   }
@@ -698,9 +692,9 @@ static void TAG(render_points_elts)( struct gl_context *ctx,
      if (currentsz < 8)
 	 currentsz = dmasz;

-      for (j = start; j < count; j += nr ) {
+      for (j = 0; j < count; j += nr ) {
 	 nr = MIN2( currentsz, count - j );
-	 TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
+         TAG(emit_elts)(ctx, elts + start + j, nr, ALLOC_ELTS(nr));
 	 FLUSH();
 	 currentsz = dmasz;
      }
@@ -728,7 +722,7 @@ static void TAG(render_lines_elts)( struct gl_context *ctx,

      /* Emit whole number of lines in total and in each buffer:
       */
-      count -= (count-start) & 1;
+      count -= count & 1;
      currentsz -= currentsz & 1;
      dmasz -= dmasz & 1;

@@ -736,9 +730,9 @@ static void TAG(render_lines_elts)( struct gl_context *ctx,
      if (currentsz < 8)
 	 currentsz = dmasz;

-      for (j = start; j < count; j += nr ) {
+      for (j = 0; j < count; j += nr ) {
 	 nr = MIN2( currentsz, count - j );
-	 TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
+         TAG(emit_elts)(ctx, elts + start + j, nr, ALLOC_ELTS(nr));
 	 FLUSH();
 	 currentsz = dmasz;
      }
@@ -768,9 +762,9 @@ static void TAG(render_line_strip_elts)( struct gl_context *ctx,
      if (currentsz < 8)
 	 currentsz = dmasz;

-      for (j = start; j + 1 < count; j += nr - 1 ) {
+      for (j = 0; j + 1 < count; j += nr - 1) {
 	 nr = MIN2( currentsz, count - j );
-	 TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
+         TAG(emit_elts)( ctx, elts + start + j, nr, ALLOC_ELTS(nr));
 	 FLUSH();
 	 currentsz = dmasz;
      }
@@ -798,10 +792,7 @@ static void TAG(render_line_loop_elts)( struct gl_context *ctx,
      FLUSH();
      ELT_INIT( GL_LINE_STRIP );

-      if (flags & PRIM_BEGIN)
-	 j = start;
-      else
-	 j = start + 1;
+      j = (flags & PRIM_BEGIN) ? 0 : 1;

      currentsz = GET_CURRENT_VB_MAX_ELTS();
      if (currentsz < 8) {
@@ -818,23 +809,23 @@ static void TAG(render_line_loop_elts)( struct gl_context *ctx,
 	    nr = MIN2( currentsz, count - j );

 	    if (j + nr >= count &&
-		start < count - 1 && 
+		count > 1 &&
 		(flags & PRIM_END)) 
 	    {
 	       void *tmp;
 	       tmp = ALLOC_ELTS(nr+1);
-	       tmp = TAG(emit_elts)( ctx, elts+j, nr, tmp );
+               tmp = TAG(emit_elts)(ctx, elts + start + j, nr, tmp);
 	       tmp = TAG(emit_elts)( ctx, elts+start, 1, tmp );
 	       (void) tmp;
 	    }
 	    else {
-	       TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
+               TAG(emit_elts)(ctx, elts + start + j, nr, ALLOC_ELTS(nr));
 	       currentsz = dmasz;
 	    }
 	 }

      }
-      else if (start + 1 < count && (flags & PRIM_END)) {
+      else if (count > 1 && (flags & PRIM_END)) {
 	 void *tmp;
 	 tmp = ALLOC_ELTS(2);
 	 tmp = TAG(emit_elts)( ctx, elts+start+1, 1, tmp );
@@ -874,14 +865,14 @@ static void TAG(render_triangles_elts)( struct gl_context *ctx,
   /* Emit whole number of tris in total.  dmasz is already a multiple
    * of 3.
    */
-   count -= (count-start)%3;
+   count -= count % 3;
   currentsz -= currentsz%3;
   if (currentsz < 8)
      currentsz = dmasz;

-   for (j = start; j < count; j += nr) {
+   for (j = 0; j < count; j += nr) {
      nr = MIN2( currentsz, count - j );
-      TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
+      TAG(emit_elts)(ctx, elts + start + j, nr, ALLOC_ELTS(nr));
      FLUSH();
      currentsz = dmasz;
   }
@@ -914,9 +905,9 @@ static void TAG(render_tri_strip_elts)( struct gl_context *ctx,
      dmasz -= (dmasz & 1);
      currentsz -= (currentsz & 1);

-      for (j = start ; j + 2 < count; j += nr - 2 ) {
+      for (j = 0; j + 2 < count; j += nr - 2) {
 	 nr = MIN2( currentsz, count - j );
-	 TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
+	 TAG(emit_elts)( ctx, elts + start + j, nr, ALLOC_ELTS(nr) );
 	 FLUSH();
 	 currentsz = dmasz;
      }
@@ -947,12 +938,12 @@ static void TAG(render_tri_fan_elts)( struct gl_context *ctx,
 	 currentsz = dmasz;
      }

-      for (j = start + 1 ; j + 1 < count; j += nr - 2 ) {
+      for (j = 1; j + 1 < count; j += nr - 2) {
 	 void *tmp;
 	 nr = MIN2( currentsz, count - j + 1 );
 	 tmp = ALLOC_ELTS( nr );
 	 tmp = TAG(emit_elts)( ctx, elts+start, 1, tmp );
-	 tmp = TAG(emit_elts)( ctx, elts+j, nr - 1, tmp );
+         tmp = TAG(emit_elts)(ctx, elts + start + j, nr - 1, tmp);
 	 (void) tmp;
 	 FLUSH();
 	 currentsz = dmasz;
@@ -985,12 +976,12 @@ static void TAG(render_poly_elts)( struct gl_context *ctx,
 	 currentsz = dmasz;
      }

-      for (j = start + 1 ; j + 1 < count; j += nr - 2 ) {
+      for (j = 1 ; j + 1 < count; j += nr - 2) {
 	 void *tmp;
 	 nr = MIN2( currentsz, count - j + 1 );
 	 tmp = ALLOC_ELTS( nr );
 	 tmp = TAG(emit_elts)( ctx, elts+start, 1, tmp );
-	 tmp = TAG(emit_elts)( ctx, elts+j, nr - 1, tmp );
+         tmp = TAG(emit_elts)(ctx, elts + start + j, nr - 1, tmp);
 	 (void) tmp;
 	 FLUSH();
 	 currentsz = dmasz;
@@ -1023,7 +1014,7 @@ static void TAG(render_quad_strip_elts)( struct gl_context *ctx,
      /* Emit whole number of quads in total, and in each buffer.
       */
      dmasz -= dmasz & 1;
-      count -= (count-start) & 1;
+      count -= count & 1;
      currentsz -= currentsz & 1;

      if (currentsz < 12)
@@ -1035,7 +1026,7 @@ static void TAG(render_quad_strip_elts)( struct gl_context *ctx,
 	 currentsz = currentsz/6*2;
 	 dmasz = dmasz/6*2;

-	 for (j = start; j + 3 < count; j += nr - 2 ) {
+         for (j = 0; j + 3 < count; j += nr - 2) {
 	    nr = MIN2( currentsz, count - j );

 	    if (nr >= 4)
@@ -1044,7 +1035,7 @@ static void TAG(render_quad_strip_elts)( struct gl_context *ctx,
 	       GLint quads = (nr/2)-1;
 	       ELTS_VARS( ALLOC_ELTS( quads*6 ) );

-	       for ( i = j-start ; i < j-start+quads ; i++, elts += 2 ) {
+               for (i = j; i < j + quads; i++, elts += 2) {
 		  EMIT_TWO_ELTS( 0, elts[0], elts[1] );
 		  EMIT_TWO_ELTS( 2, elts[2], elts[1] );
 		  EMIT_TWO_ELTS( 4, elts[3], elts[2] );
@@ -1060,9 +1051,9 @@ static void TAG(render_quad_strip_elts)( struct gl_context *ctx,
      else {
 	 ELT_INIT( GL_TRIANGLE_STRIP );

-	 for (j = start; j + 3 < count; j += nr - 2 ) {
+         for (j = 0; j + 3 < count; j += nr - 2) {
 	    nr = MIN2( currentsz, count - j );
-	    TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
+            TAG(emit_elts)(ctx, elts + start + j, nr, ALLOC_ELTS(nr));
 	    FLUSH();
 	    currentsz = dmasz;
 	 }
@@ -1076,6 +1067,9 @@ static void TAG(render_quads_elts)( struct gl_context *ctx,
 				    GLuint count,
 				    GLuint flags )
 {
+   /* Emit whole number of quads in total. */
+   count -= count & 3;
+
   if (HAVE_QUADS) {
      LOCAL_VARS;
      GLuint *elts = TNL_CONTEXT(ctx)->vb.Elts;
@@ -1088,14 +1082,12 @@ static void TAG(render_quads_elts)( struct gl_context *ctx,

      currentsz = GET_CURRENT_VB_MAX_ELTS()/4*4;

-      count -= (count-start)%4;
-
      if (currentsz < 8)
 	 currentsz = dmasz;

-      for (j = start; j < count; j += nr) {
+      for (j = 0; j < count; j += nr) {
 	 nr = MIN2( currentsz, count - j );
-	 TAG(emit_elts)( ctx, elts+j, nr, ALLOC_ELTS(nr) );
+         TAG(emit_elts)(ctx, elts + start + j, nr, ALLOC_ELTS(nr));
 	 FLUSH();
 	 currentsz = dmasz;
      }
@@ -1112,7 +1104,6 @@ static void TAG(render_quads_elts)( struct gl_context *ctx,
      /* Emit whole number of quads in total, and in each buffer.
       */
      dmasz -= dmasz & 3;
-      count -= (count-start) & 3;
      currentsz -= currentsz & 3;

      /* Adjust for rendering as triangles:
@@ -1123,7 +1114,7 @@ static void TAG(render_quads_elts)( struct gl_context *ctx,
      if (currentsz < 8)
 	 currentsz = dmasz;

-      for (j = start; j + 3 < count; j += nr - 2 ) {
+      for (j = 0; j + 3 < count; j += nr - 2) {
 	 nr = MIN2( currentsz, count - j );

 	 if (nr >= 4)
@@ -1132,7 +1123,7 @@ static void TAG(render_quads_elts)( struct gl_context *ctx,
 	    GLint i;
 	    ELTS_VARS( ALLOC_ELTS( quads * 6 ) );

-	    for ( i = j-start ; i < j-start+quads ; i++, elts += 4 ) {
+	    for (i = j; i < j + quads; i++, elts += 4) {
 	       EMIT_TWO_ELTS( 0, elts[0], elts[1] );
 	       EMIT_TWO_ELTS( 2, elts[3], elts[1] );
 	       EMIT_TWO_ELTS( 4, elts[2], elts[3] );
@@ -1 +1 @@
 .6.5
 .6.9