docs: add release notes for 10.6.6

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Update version to 10.6.6
2015-09-04 22:36:23 +01:00 · 2015-09-04 22:36:23 +01:00 · 2015-09-04 21:20:05 +01:00 · 2015-09-04 21:19:25 +01:00 · 2015-09-04 21:18:36 +01:00 · 2015-09-04 21:16:00 +01:00
59 changed files with 780 additions and 191 deletions
--- a/2
+++ b/2
@@ -1 +1 @@
-10.6.4
+10.6.6
--- a/bin/get-pick-list.sh
+++ b/bin/get-pick-list.sh
@@ -14,7 +14,7 @@ git log --reverse --grep="cherry picked from commit" origin/master..HEAD |\
 	sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked

 # Grep for commits that were marked as a candidate for the stable tree.
-git log --reverse --pretty=%H -i --grep='^\([[:space:]]*NOTE: .*[Cc]andidate\|CC:.*mesa-stable\)' HEAD..origin/master |\
+git log --reverse --pretty=%H -i --grep='^\([[:space:]]*NOTE: .*[Cc]andidate\|CC:.*10\.6.*mesa-stable\)' HEAD..origin/master |\
 while read sha
 do
 	# Check to see whether the patch is on the ignore list.
--- a/configure.ac
+++ b/configure.ac
@@ -1515,6 +1515,10 @@ if test "x$enable_nine" = xyes; then
    if test "x$with_gallium_drivers" = xswrast; then
        AC_MSG_ERROR([nine requires at least one non-swrast gallium driver])
    fi
+    if test $GCC_VERSION_MAJOR -lt 4 -o $GCC_VERSION_MAJOR -eq 4 -a $GCC_VERSION_MINOR -lt 6; then
+        AC_MSG_ERROR([gcc >= 4.6 is required to build nine])
+    fi
+
    if test "x$enable_dri3" = xno; then
        AC_MSG_WARN([using nine together with wine requires DRI3 enabled system])
    fi
--- a/docs/relnotes/10.6.4.html
+++ b/docs/relnotes/10.6.4.html
@@ -31,7 +31,8 @@ because compatibility contexts are not supported.

 <h2>SHA256 checksums</h2>
 <pre>
-TBD
+4960bf17d8b5d6a6503c6954ec6cf480b5cd930797bac901c60bea192675f85e  mesa-10.6.4.tar.gz
+8f5ac103f0f503de2f7a985b0df349bd4ecdfe7f51c714be146fa5a9a3c07b77  mesa-10.6.4.tar.xz
 </pre>


--- a/docs/relnotes/10.6.5.html
+++ b/docs/relnotes/10.6.5.html
@@ -0,0 +1,124 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.6.5 Release Notes / August 22, 2015</h1>
+
+<p>
+Mesa 10.6.5 is a bug fix release which fixes bugs found since the 10.6.4 release.
+</p>
+<p>
+Mesa 10.6.5 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+afe290fc7af75a25df5ee52396a9f09e5dba85fb3e159304bdda265b8564b0d4  mesa-10.6.5.tar.gz
+fb6fac3c85bcfa9d06b8dd439169f23f0c0924a88e44362e738b99b1feff762f  mesa-10.6.5.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=85252">Bug 85252</a> - Segfault in compiler while processing ternary operator with void arguments</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91570">Bug 91570</a> - Upgrading mesa to 10.6 causes segfault in OpenGL applications with GeForce4 MX 440 / AGP 8X</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91610">Bug 91610</a> - [BSW] GPU hang for spec.shaders.point-vertex-id gl_instanceid divisor</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Adam Jackson (1):</p>
+<ul>
+  <li>glx: Fix __glXWireToEvent for BufferSwapComplete</li>
+</ul>
+
+<p>Alex Deucher (2):</p>
+<ul>
+  <li>radeonsi: add new OLAND pci id</li>
+  <li>radeonsi: properly set the raster_config for KV</li>
+</ul>
+
+<p>Emil Velikov (4):</p>
+<ul>
+  <li>docs: add sha256 checksums for 10.6.4</li>
+  <li>vc4: add missing nir include, to fix the build</li>
+  <li>Revert "radeonsi: properly set the raster_config for KV"</li>
+  <li>Update version to 10.6.5</li>
+</ul>
+
+<p>Frank Binns (1):</p>
+<ul>
+  <li>egl/x11: don't abort when creating a DRI2 drawable fails</li>
+</ul>
+
+<p>Ilia Mirkin (3):</p>
+<ul>
+  <li>nouveau: no need to do tnl wakeup, state updates are always hooked up</li>
+  <li>gm107/ir: indirect handle goes first on maxwell also</li>
+  <li>nv50,nvc0: take level into account when doing eng2d multi-layer blits</li>
+</ul>
+
+<p>Jason Ekstrand (4):</p>
+<ul>
+  <li>meta/copy_image: Stash off the scissor</li>
+  <li>mesa/formats: Only do byteswapping for packed formats</li>
+  <li>mesa/formats: Fix swizzle flipping for big-endian targets</li>
+  <li>mesa/formats: Don't flip channels of null array formats</li>
+</ul>
+
+<p>Marek Olšák (3):</p>
+<ul>
+  <li>radeonsi: fix polygon offset scale</li>
+  <li>r600g: fix polygon offset scale</li>
+  <li>r600g: allow setting geometry shader sampler states</li>
+</ul>
+
+<p>Neil Roberts (1):</p>
+<ul>
+  <li>i965/bdw: Fix setting the instancing state for the SGVS element</li>
+</ul>
+
+<p>Oded Gabbay (2):</p>
+<ul>
+  <li>mesa: clear existing swizzle info before bitwise-OR</li>
+  <li>mesa/formats: don't byteswap when building array formats</li>
+</ul>
+
+<p>Renaud Gaubert (1):</p>
+<ul>
+  <li>glsl: avoid compiler's segfault when processing operators with void arguments</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/10.6.6.html
+++ b/docs/relnotes/10.6.6.html
@@ -0,0 +1,163 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.6.6 Release Notes / September 04, 2015</h1>
+
+<p>
+Mesa 10.6.6 is a bug fix release which fixes bugs found since the 10.6.5 release.
+</p>
+<p>
+Mesa 10.6.6 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+TBD
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=84677">Bug 84677</a> - Triangle disappears with glPolygonMode GL_LINE</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90734">Bug 90734</a> - glBufferSubData is corrupting data when buffer is &gt; 32k</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90748">Bug 90748</a> - [BDW Bisected]dEQP-GLES3.functional.fbo.completeness.renderable.texture.depth.rg_half_float_oes fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90902">Bug 90902</a> - [bsw][regression] dEQP: &quot;Found invalid pixel values&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90925">Bug 90925</a> - &quot;high fidelity&quot;: Segfault in _mesa_program_resource_find_name</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91254">Bug 91254</a> - (regresion) video using VA-API on Intel slow and freeze system with mesa 10.6 or 10.6.1</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91292">Bug 91292</a> - [BDW+] glVertexAttribDivisor not working in combination with glPolygonMode</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91673">Bug 91673</a> - Segfault when calling glTexSubImage2D on storage texture to bound FBO</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91726">Bug 91726</a> - R600 asserts in tgsi_cmp/make_src_for_op3</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Chris Wilson (2):</p>
+<ul>
+  <li>i965: Prevent coordinate overflow in intel_emit_linear_blit</li>
+  <li>i965: Always re-emit the pipeline select during invariant state emission</li>
+</ul>
+
+<p>Daniel Scharrer (1):</p>
+<ul>
+  <li>mesa: add missing queries for ARB_direct_state_access</li>
+</ul>
+
+<p>Dave Airlie (8):</p>
+<ul>
+  <li>mesa/arb_gpu_shader_fp64: add support for glGetUniformdv</li>
+  <li>mesa/texgetimage: fix missing stencil check</li>
+  <li>st/readpixels: fix accel path for skipimages.</li>
+  <li>texcompress_s3tc/fxt1: fix stride checks (v1.1)</li>
+  <li>mesa/readpixels: check strides are equal before skipping conversion</li>
+  <li>mesa: enable texture stencil8 for multisample</li>
+  <li>r600/sb: update last_cf for finalize if.</li>
+  <li>r600g: fix calculation for gpr allocation</li>
+</ul>
+
+<p>David Heidelberg (1):</p>
+<ul>
+  <li>st/nine: Require gcc &gt;= 4.6</li>
+</ul>
+
+<p>Emil Velikov (2):</p>
+<ul>
+  <li>docs: add sha256 checksums for 10.6.5</li>
+  <li>get-pick-list.sh: Require explicit "10.6" for nominating stable patches</li>
+</ul>
+
+<p>Glenn Kennard (4):</p>
+<ul>
+  <li>r600g: Fix assert in tgsi_cmp</li>
+  <li>r600g/sb: Handle undef in read port tracker</li>
+  <li>r600g/sb: Don't read junk after EOP</li>
+  <li>r600g/sb: Don't crash on empty if jump target</li>
+</ul>
+
+<p>Ilia Mirkin (5):</p>
+<ul>
+  <li>st/mesa: fix assignments with 4-operand arguments (i.e. BFI)</li>
+  <li>st/mesa: pass through 4th opcode argument in bitmap/pixel visitors</li>
+  <li>nv50,nvc0: disable depth bounds test on blit</li>
+  <li>nv50: fix 2d engine blits for 64- and 128-bit formats</li>
+  <li>mesa: only copy the requested teximage faces</li>
+</ul>
+
+<p>Jason Ekstrand (1):</p>
+<ul>
+  <li>i965/fs: Split VGRFs after lowering pull constants</li>
+</ul>
+
+<p>Kenneth Graunke (3):</p>
+<ul>
+  <li>i965: Fix copy propagation type changes.</li>
+  <li>Revert "i965: Advertise a line width of 40.0 on Cherryview and Skylake."</li>
+  <li>i965: Momentarily pretend to support ARB_texture_stencil8 for blits.</li>
+</ul>
+
+<p>Marek Olšák (3):</p>
+<ul>
+  <li>gallium/radeon: fix the ADDRESS_HI mask for EVENT_WRITE CIK packets</li>
+  <li>mesa: create multisample fallback textures like normal textures</li>
+  <li>radeonsi: fix a Unigine Heaven hang when drirc is missing</li>
+</ul>
+
+<p>Matt Turner (1):</p>
+<ul>
+  <li>i965/fs: Handle MRF destinations in lower_integer_multiplication().</li>
+</ul>
+
+<p>Neil Roberts (2):</p>
+<ul>
+  <li>i965: Swap the order of the vertex ID and edge flag attributes</li>
+  <li>i965/bdw: Fix 3DSTATE_VF_INSTANCING when the edge flag is used</li>
+</ul>
+
+<p>Tapani Pälli (5):</p>
+<ul>
+  <li>mesa: update fbo state in glTexStorage</li>
+  <li>glsl: build stageref mask using IR, not symbol table</li>
+  <li>glsl: expose build_program_resource_list function</li>
+  <li>glsl: create program resource list after LinkShader</li>
+  <li>mesa: add GL_RED, GL_RG support for floating point textures</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/include/pci_ids/radeonsi_pci_ids.h
+++ b/include/pci_ids/radeonsi_pci_ids.h
@@ -63,6 +63,7 @@ CHIPSET(0x6608, OLAND_6608, OLAND)
 CHIPSET(0x6610, OLAND_6610, OLAND)
 CHIPSET(0x6611, OLAND_6611, OLAND)
 CHIPSET(0x6613, OLAND_6613, OLAND)
+CHIPSET(0x6617, OLAND_6617, OLAND)
 CHIPSET(0x6620, OLAND_6620, OLAND)
 CHIPSET(0x6621, OLAND_6621, OLAND)
 CHIPSET(0x6623, OLAND_6623, OLAND)
--- a/src/egl/drivers/dri2/platform_x11.c
+++ b/src/egl/drivers/dri2/platform_x11.c
@@ -273,7 +273,25 @@ dri2_x11_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
   }

   if (dri2_dpy->dri2) {
-      xcb_dri2_create_drawable (dri2_dpy->conn, dri2_surf->drawable);
+      xcb_void_cookie_t cookie;
+      int conn_error;
+
+      cookie = xcb_dri2_create_drawable_checked(dri2_dpy->conn,
+                                                dri2_surf->drawable);
+      error = xcb_request_check(dri2_dpy->conn, cookie);
+      conn_error = xcb_connection_has_error(dri2_dpy->conn);
+      if (conn_error || error != NULL) {
+         if (type == EGL_PBUFFER_BIT || conn_error || error->error_code == BadAlloc)
+            _eglError(EGL_BAD_ALLOC, "xcb_dri2_create_drawable_checked");
+         else if (type == EGL_WINDOW_BIT)
+            _eglError(EGL_BAD_NATIVE_WINDOW,
+                      "xcb_dri2_create_drawable_checked");
+         else
+            _eglError(EGL_BAD_NATIVE_PIXMAP,
+                      "xcb_dri2_create_drawable_checked");
+         free(error);
+         goto cleanup_dri_drawable;
+      }
   } else {
      if (type == EGL_PBUFFER_BIT) {
         dri2_surf->depth = _eglGetConfigKey(conf, EGL_BUFFER_SIZE);
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -987,14 +987,10 @@ NVC0LoweringPass::handleTXQ(TexInstruction *txq)
      txq->tex.r = 0xff;
      txq->tex.s = 0x1f;

-      if (chipset < NVISA_GM107_CHIPSET) {
-         txq->setIndirectR(NULL);
-         txq->moveSources(0, 1);
-         txq->setSrc(0, hnd);
-         txq->tex.rIndirectSrc = 0;
-      } else {
-         txq->setIndirectR(hnd);
-      }
+      txq->setIndirectR(NULL);
+      txq->moveSources(0, 1);
+      txq->setSrc(0, hnd);
+      txq->tex.rIndirectSrc = 0;
   }

   return true;
--- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -68,6 +68,10 @@ nv50_2d_format(enum pipe_format format, boolean dst, boolean dst_src_equal)
      return NV50_SURFACE_FORMAT_R16_UNORM;
   case 4:
      return NV50_SURFACE_FORMAT_BGRA8_UNORM;
+   case 8:
+      return NV50_SURFACE_FORMAT_RGBA16_FLOAT;
+   case 16:
+      return NV50_SURFACE_FORMAT_RGBA32_FLOAT;
   default:
      return 0;
   }
@@ -1003,6 +1007,8 @@ nv50_blitctx_prepare_state(struct nv50_blitctx *blit)
   /* zsa state */
   BEGIN_NV04(push, NV50_3D(DEPTH_TEST_ENABLE), 1);
   PUSH_DATA (push, 0);
+   BEGIN_NV04(push, NV50_3D(DEPTH_BOUNDS_EN), 1);
+   PUSH_DATA (push, 0);
   BEGIN_NV04(push, NV50_3D(STENCIL_ENABLE), 1);
   PUSH_DATA (push, 0);
   BEGIN_NV04(push, NV50_3D(ALPHA_TEST_ENABLE), 1);
@@ -1387,18 +1393,24 @@ nv50_blit_eng2d(struct nv50_context *nv50, const struct pipe_blit_info *info)
            PUSH_DATA (push, info->dst.box.z + i);
         } else {
            const unsigned z = info->dst.box.z + i;
+            const uint64_t address = dst->base.address +
+               dst->level[info->dst.level].offset +
+               z * dst->layer_stride;
            BEGIN_NV04(push, NV50_2D(DST_ADDRESS_HIGH), 2);
-            PUSH_DATAh(push, dst->base.address + z * dst->layer_stride);
-            PUSH_DATA (push, dst->base.address + z * dst->layer_stride);
+            PUSH_DATAh(push, address);
+            PUSH_DATA (push, address);
         }
         if (src->layout_3d) {
            /* not possible because of depth tiling */
            assert(0);
         } else {
            const unsigned z = info->src.box.z + i;
+            const uint64_t address = src->base.address +
+               src->level[info->src.level].offset +
+               z * src->layer_stride;
            BEGIN_NV04(push, NV50_2D(SRC_ADDRESS_HIGH), 2);
-            PUSH_DATAh(push, src->base.address + z * src->layer_stride);
-            PUSH_DATA (push, src->base.address + z * src->layer_stride);
+            PUSH_DATAh(push, address);
+            PUSH_DATA (push, address);
         }
         BEGIN_NV04(push, NV50_2D(BLIT_SRC_Y_INT), 1); /* trigger */
         PUSH_DATA (push, srcy >> 32);
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -887,6 +887,7 @@ nvc0_blitctx_prepare_state(struct nvc0_blitctx *blit)

   /* zsa state */
   IMMED_NVC0(push, NVC0_3D(DEPTH_TEST_ENABLE), 0);
+   IMMED_NVC0(push, NVC0_3D(DEPTH_BOUNDS_EN), 0);
   IMMED_NVC0(push, NVC0_3D(STENCIL_ENABLE), 0);
   IMMED_NVC0(push, NVC0_3D(ALPHA_TEST_ENABLE), 0);

@@ -1336,18 +1337,24 @@ nvc0_blit_eng2d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
            PUSH_DATA (push, info->dst.box.z + i);
         } else {
            const unsigned z = info->dst.box.z + i;
+            const uint64_t address = dst->base.address +
+               dst->level[info->dst.level].offset +
+               z * dst->layer_stride;
            BEGIN_NVC0(push, NVC0_2D(DST_ADDRESS_HIGH), 2);
-            PUSH_DATAh(push, dst->base.address + z * dst->layer_stride);
-            PUSH_DATA (push, dst->base.address + z * dst->layer_stride);
+            PUSH_DATAh(push, address);
+            PUSH_DATA (push, address);
         }
         if (src->layout_3d) {
            /* not possible because of depth tiling */
            assert(0);
         } else {
            const unsigned z = info->src.box.z + i;
+            const uint64_t address = src->base.address +
+               src->level[info->src.level].offset +
+               z * src->layer_stride;
            BEGIN_NVC0(push, NVC0_2D(SRC_ADDRESS_HIGH), 2);
-            PUSH_DATAh(push, src->base.address + z * src->layer_stride);
-            PUSH_DATA (push, src->base.address + z * src->layer_stride);
+            PUSH_DATAh(push, address);
+            PUSH_DATA (push, address);
         }
         BEGIN_NVC0(push, NVC0_2D(BLIT_SRC_Y_INT), 1); /* trigger */
         PUSH_DATA (push, srcy >> 32);
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -485,7 +485,7 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx,

 	/* offset */
 	rs->offset_units = state->offset_units;
-	rs->offset_scale = state->offset_scale * 12.0f;
+	rs->offset_scale = state->offset_scale * 16.0f;
 	rs->offset_enable = state->offset_point || state->offset_line || state->offset_tri;

 	if (state->point_size_per_vertex) {
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -2029,6 +2029,8 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
 					fprintf(stderr, "CND:%X ", cf->cond);
 				if (cf->pop_count)
 					fprintf(stderr, "POP:%X ", cf->pop_count);
+				if (cf->end_of_program)
+					fprintf(stderr, "EOP ");
 				fprintf(stderr, "\n");
 			}
 		}
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -6143,10 +6143,10 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx)
 		r = tgsi_make_src_for_op3(ctx, temp_regs[0], i, &alu.src[0], &ctx->src[0]);
 		if (r)
 			return r;
-		r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[1], &ctx->src[2]);
+		r = tgsi_make_src_for_op3(ctx, temp_regs[2], i, &alu.src[1], &ctx->src[2]);
 		if (r)
 			return r;
-		r = tgsi_make_src_for_op3(ctx, temp_regs[2], i, &alu.src[2], &ctx->src[1]);
+		r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[2], &ctx->src[1]);
 		if (r)
 			return r;
 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -473,7 +473,7 @@ static void *r600_create_rs_state(struct pipe_context *ctx,

 	/* offset */
 	rs->offset_units = state->offset_units;
-	rs->offset_scale = state->offset_scale * 12.0f;
+	rs->offset_scale = state->offset_scale * 16.0f;
 	rs->offset_enable = state->offset_point || state->offset_line || state->offset_tri;

 	if (state->point_size_per_vertex) {
@@ -2051,7 +2051,7 @@ bool r600_adjust_gprs(struct r600_context *rctx)
 			/* always privilege vs stage so that at worst we have the
 			 * pixel stage producing wrong output (not the vertex
 			 * stage) */
-			new_num_ps_gprs = max_gprs - ((new_num_vs_gprs - new_num_es_gprs - new_num_gs_gprs) + def_num_clause_temp_gprs * 2);
+			new_num_ps_gprs = max_gprs - ((new_num_vs_gprs + new_num_es_gprs + new_num_gs_gprs) + def_num_clause_temp_gprs * 2);
 			new_num_vs_gprs = num_vs_gprs;
 			new_num_gs_gprs = num_gs_gprs;
 			new_num_es_gprs = num_es_gprs;
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -399,11 +399,6 @@ static void r600_bind_sampler_states(struct pipe_context *pipe,

 	assert(start == 0); /* XXX fix below */

-	if (shader != PIPE_SHADER_VERTEX &&
-	    shader != PIPE_SHADER_FRAGMENT) {
-		return;
-	}
-
 	for (i = 0; i < count; i++) {
 		struct r600_pipe_sampler_state *rstate = rstates[i];

--- a/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp
@@ -32,6 +32,7 @@ int bc_decoder::decode_cf(unsigned &i, bc_cf& bc) {
 	int r = 0;
 	uint32_t dw0 = dw[i];
 	uint32_t dw1 = dw[i+1];
+	assert(i+1 <= ndw);

 	if ((dw1 >> 29) & 1) { // CF_ALU
 		return decode_cf_alu(i, bc);
--- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
@@ -199,6 +199,9 @@ void bc_finalizer::finalize_if(region_node* r) {
 		cf_node *if_jump = sh.create_cf(CF_OP_JUMP);
 		cf_node *if_pop = sh.create_cf(CF_OP_POP);

+		if (!last_cf || last_cf->get_parent_region() == r) {
+			last_cf = if_pop;
+		}
 		if_pop->bc.pop_count = 1;
 		if_pop->jump_after(if_pop);

--- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
@@ -95,7 +95,7 @@ int bc_parser::decode_shader() {
 		if ((r = decode_cf(i, eop)))
 			return r;

-	} while (!eop || (i >> 1) <= max_cf);
+	} while (!eop || (i >> 1) < max_cf);

 	return 0;
 }
@@ -769,6 +769,7 @@ int bc_parser::prepare_ir() {
 }

 int bc_parser::prepare_loop(cf_node* c) {
+	assert(c->bc.addr-1 < cf_map.size());

 	cf_node *end = cf_map[c->bc.addr - 1];
 	assert(end->bc.op == CF_OP_LOOP_END);
@@ -788,8 +789,12 @@ int bc_parser::prepare_loop(cf_node* c) {
 }

 int bc_parser::prepare_if(cf_node* c) {
+	assert(c->bc.addr-1 < cf_map.size());
 	cf_node *c_else = NULL, *end = cf_map[c->bc.addr];

+	if (!end)
+		return 0; // not quite sure how this happens, malformed input?
+
 	BCP_DUMP(
 		sblog << "parsing JUMP @" << c->bc.id;
 		sblog << "\n";
@@ -815,7 +820,7 @@ int bc_parser::prepare_if(cf_node* c) {
 	if (c_else->parent != c->parent)
 		c_else = NULL;

-	if (end->parent != c->parent)
+	if (end && end->parent != c->parent)
 		end = NULL;

 	region_node *reg = sh->create_region();
--- a/src/gallium/drivers/r600/sb/sb_sched.cpp
+++ b/src/gallium/drivers/r600/sb/sb_sched.cpp
@@ -236,7 +236,7 @@ void rp_gpr_tracker::unreserve(alu_node* n) {

 	for (i = 0; i < nsrc; ++i) {
 		value *v = n->src[i];
-		if (v->is_readonly())
+		if (v->is_readonly() || v->is_undef())
 			continue;
 		if (i == 1 && opt)
 			continue;
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -184,7 +184,7 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (va >> 32UL) & 0xFF);
+		radeon_emit(cs, (va >> 32) & 0xFFFF);
 		break;
 	case PIPE_QUERY_PRIMITIVES_EMITTED:
 	case PIPE_QUERY_PRIMITIVES_GENERATED:
@@ -193,13 +193,13 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (va >> 32UL) & 0xFF);
+		radeon_emit(cs, (va >> 32) & 0xFFFF);
 		break;
 	case PIPE_QUERY_TIME_ELAPSED:
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (3 << 29) | ((va >> 32UL) & 0xFF));
+		radeon_emit(cs, (3 << 29) | ((va >> 32) & 0xFFFF));
 		radeon_emit(cs, 0);
 		radeon_emit(cs, 0);
 		break;
@@ -207,7 +207,7 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (va >> 32UL) & 0xFF);
+		radeon_emit(cs, (va >> 32) & 0xFFFF);
 		break;
 	default:
 		assert(0);
@@ -240,7 +240,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (va >> 32UL) & 0xFF);
+		radeon_emit(cs, (va >> 32) & 0xFFFF);
 		break;
 	case PIPE_QUERY_PRIMITIVES_EMITTED:
 	case PIPE_QUERY_PRIMITIVES_GENERATED:
@@ -250,7 +250,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (va >> 32UL) & 0xFF);
+		radeon_emit(cs, (va >> 32) & 0xFFFF);
 		break;
 	case PIPE_QUERY_TIME_ELAPSED:
 		va += query->buffer.results_end + query->result_size/2;
@@ -259,7 +259,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (3 << 29) | ((va >> 32UL) & 0xFF));
+		radeon_emit(cs, (3 << 29) | ((va >> 32) & 0xFFFF));
 		radeon_emit(cs, 0);
 		radeon_emit(cs, 0);
 		break;
@@ -268,7 +268,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (va >> 32UL) & 0xFF);
+		radeon_emit(cs, (va >> 32) & 0xFFFF);
 		break;
 	default:
 		assert(0);
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -116,6 +116,7 @@ struct si_shader_selector {
 	unsigned	gs_output_prim;
 	unsigned	gs_max_out_vertices;
 	uint64_t	gs_used_inputs; /* mask of "get_unique_index" bits */
+	uint32_t	ps_colors_written;
 };

 union si_shader_key {
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -29,6 +29,7 @@
 #include "sid.h"
 #include "radeon/r600_cs.h"

+#include "util/u_dual_blend.h"
 #include "util/u_format.h"
 #include "util/u_format_s3tc.h"
 #include "util/u_memory.h"
@@ -218,8 +219,10 @@ static unsigned si_pack_float_12p4(float x)
 * - The COLOR1 format isn't INVALID because of possible dual-source blending,
 *   so COLOR1 is enabled pretty much all the time.
 * So CB_TARGET_MASK is the only register that can disable COLOR1.
+ *
+ * Another reason is to avoid a hang with dual source blending.
 */
-static void si_update_fb_blend_state(struct si_context *sctx)
+void si_update_fb_blend_state(struct si_context *sctx)
 {
 	struct si_pm4_state *pm4;
 	struct si_state_blend *blend = sctx->queued.named.blend;
@@ -237,6 +240,16 @@ static void si_update_fb_blend_state(struct si_context *sctx)
 			mask |= 0xf << (4*i);
 	mask &= blend->cb_target_mask;

+	/* Avoid a hang that happens when dual source blending is enabled
+	 * but there is not enough color outputs. This is undefined behavior,
+	 * so disable color writes completely.
+	 *
+	 * Reproducible with Unigine Heaven 4.0 and drirc missing.
+	 */
+	if (blend->dual_src_blend &&
+	    (sctx->ps_shader->ps_colors_written & 0x3) != 0x3)
+		mask = 0;
+
 	si_pm4_set_reg(pm4, R_028238_CB_TARGET_MASK, mask);
 	si_pm4_set_state(sctx, fb_blend, pm4);
 }
@@ -328,6 +341,7 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
 		return NULL;

 	blend->alpha_to_one = state->alpha_to_one;
+	blend->dual_src_blend = util_blend_state_is_dual(state, 0);

 	if (state->logicop_enable) {
 		color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4));
@@ -634,7 +648,7 @@ static void *si_create_rs_state(struct pipe_context *ctx,

 	/* offset */
 	rs->offset_units = state->offset_units;
-	rs->offset_scale = state->offset_scale * 12.0f;
+	rs->offset_scale = state->offset_scale * 16.0f;

 	tmp = S_0286D4_FLAT_SHADE_ENA(1);
 	if (state->sprite_coord_enable) {
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -39,6 +39,7 @@ struct si_state_blend {
 	struct si_pm4_state	pm4;
 	uint32_t		cb_target_mask;
 	bool			alpha_to_one;
+	bool			dual_src_blend;
 };

 struct si_state_sample_mask {
@@ -247,6 +248,7 @@ void si_shader_change_notify(struct si_context *sctx);
 /* si_state.c */
 struct si_shader_selector;

+void si_update_fb_blend_state(struct si_context *sctx);
 boolean si_is_format_supported(struct pipe_screen *screen,
                               enum pipe_format format,
                               enum pipe_texture_target target,
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -493,6 +493,16 @@ static void *si_create_shader_state(struct pipe_context *ctx,
 					1llu << si_shader_io_get_unique_index(name, index);
 			}
 		}
+		break;
+	case PIPE_SHADER_FRAGMENT:
+		for (i = 0; i < sel->info.num_outputs; i++) {
+			unsigned name = sel->info.output_semantic_name[i];
+			unsigned index = sel->info.output_semantic_index[i];
+
+			if (name == TGSI_SEMANTIC_COLOR)
+				sel->ps_colors_written |= 1 << index;
+		}
+		break;
 	}

 	if (sscreen->b.debug_flags & DBG_PRECOMPILE)
@@ -574,6 +584,7 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
 	}

 	sctx->ps_shader = sel;
+	si_update_fb_blend_state(sctx);
 }

 static void si_delete_shader_selector(struct pipe_context *ctx,
--- a/src/gallium/drivers/vc4/Makefile.am
+++ b/src/gallium/drivers/vc4/Makefile.am
@@ -30,6 +30,7 @@ SIM_LDFLAGS = -lsimpenrose
 endif

 AM_CFLAGS = \
+	-I$(top_builddir)/src/glsl/nir \
 	$(LIBDRM_CFLAGS) \
 	$(GALLIUM_DRIVER_CFLAGS) \
 	$(SIM_CFLAGS) \
--- a/src/glsl/ast_function.cpp
+++ b/src/glsl/ast_function.cpp
@@ -1773,7 +1773,14 @@ ast_function_expression::hir(exec_list *instructions,
 	 /* an error has already been emitted */
 	 value = ir_rvalue::error_value(ctx);
      } else {
-	 value = generate_call(instructions, sig, &actual_parameters, state);
+         value = generate_call(instructions, sig, &actual_parameters, state);
+         if (!value) {
+            ir_variable *const tmp = new(ctx) ir_variable(glsl_type::void_type,
+                                                          "void_var",
+                                                          ir_var_temporary);
+            instructions->push_tail(tmp);
+            value = new(ctx) ir_dereference_variable(tmp);
+         }
      }

      return value;
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -1270,7 +1270,14 @@ ast_expression::do_hir(exec_list *instructions,
       *    applied to one operand that can make them match, in which
       *    case this conversion is done."
       */
-      if ((!apply_implicit_conversion(op[0]->type, op[1], state)
+
+      if (op[0]->type == glsl_type::void_type || op[1]->type == glsl_type::void_type) {
+         _mesa_glsl_error(& loc, state, "`%s':  wrong operand types: "
+                         "no operation `%1$s' exists that takes a left-hand "
+                         "operand of type 'void' or a right operand of type "
+                         "'void'", (this->oper == ast_equal) ? "==" : "!=");
+         error_emitted = true;
+      } else if ((!apply_implicit_conversion(op[0]->type, op[1], state)
           && !apply_implicit_conversion(op[1]->type, op[0], state))
          || (op[0]->type != op[1]->type)) {
         _mesa_glsl_error(& loc, state, "operands of `%s' must have the same "
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -2617,9 +2617,17 @@ build_stageref(struct gl_shader_program *shProg, const char *name)
      struct gl_shader *sh = shProg->_LinkedShaders[i];
      if (!sh)
         continue;
-      ir_variable *var = sh->symbols->get_variable(name);
-      if (var)
-         stages |= (1 << i);
+
+      /* Shader symbol table may contain variables that have
+       * been optimized away. Search IR for the variable instead.
+       */
+      foreach_in_list(ir_instruction, node, sh->ir) {
+         ir_variable *var = node->as_variable();
+         if (var && strcmp(var->name, name) == 0) {
+            stages |= (1 << i);
+            break;
+         }
+      }
   }
   return stages;
 }
@@ -2674,7 +2682,7 @@ add_interface_variables(struct gl_shader_program *shProg,
 * Builds up a list of program resources that point to existing
 * resource data.
 */
-static void
+void
 build_program_resource_list(struct gl_context *ctx,
                            struct gl_shader_program *shProg)
 {
@@ -3227,10 +3235,6 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
      }
   }

-   build_program_resource_list(ctx, prog);
-   if (!prog->LinkStatus)
-      goto done;
-
   /* FINISHME: Assign fragment shader output locations. */

 done:
--- a/src/glsl/program.h
+++ b/src/glsl/program.h
@@ -39,6 +39,10 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader,
 extern void
 link_shaders(struct gl_context *ctx, struct gl_shader_program *prog);

+extern void
+build_program_resource_list(struct gl_context *ctx,
+                            struct gl_shader_program *shProg);
+
 extern void
 linker_error(struct gl_shader_program *prog, const char *fmt, ...)
   PRINTFLIKE(2, 3);
--- a/src/glx/glxext.c
+++ b/src/glx/glxext.c
@@ -138,6 +138,9 @@ __glXWireToEvent(Display *dpy, XEvent *event, xEvent *wire)
      if (!glxDraw)
 	 return False;

+      aevent->serial = _XSetLastRequestRead(dpy, (xGenericReply *) wire);
+      aevent->send_event = (awire->type & 0x80) != 0;
+      aevent->display = dpy;
      aevent->event_type = awire->event_type;
      aevent->drawable = glxDraw->xDrawable;
      aevent->ust = ((CARD64)awire->ust_hi << 32) | awire->ust_lo;
--- a/src/mesa/drivers/common/meta_copy_image.c
+++ b/src/mesa/drivers/common/meta_copy_image.c
@@ -138,8 +138,8 @@ _mesa_meta_CopyImageSubData_uncompressed(struct gl_context *ctx,
         goto cleanup;
   }

-   /* We really only need to stash the bound framebuffers. */
-   _mesa_meta_begin(ctx, 0);
+   /* We really only need to stash the bound framebuffers and scissor. */
+   _mesa_meta_begin(ctx, MESA_META_SCISSOR);

   _mesa_GenFramebuffers(2, fbos);
   _mesa_BindFramebuffer(GL_READ_FRAMEBUFFER, fbos[0]);
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -428,11 +428,7 @@ brw_initialize_context_constants(struct brw_context *brw)

   ctx->Const.MinLineWidth = 1.0;
   ctx->Const.MinLineWidthAA = 1.0;
-   if (brw->gen >= 9 || brw->is_cherryview) {
-      ctx->Const.MaxLineWidth = 40.0;
-      ctx->Const.MaxLineWidthAA = 40.0;
-      ctx->Const.LineWidthGranularity = 0.125;
-   } else if (brw->gen >= 6) {
+   if (brw->gen >= 6) {
      ctx->Const.MaxLineWidth = 7.375;
      ctx->Const.MaxLineWidthAA = 7.375;
      ctx->Const.LineWidthGranularity = 0.125;
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -787,21 +787,6 @@ static void brw_emit_vertices(struct brw_context *brw)
                    ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
   }

-   if (brw->gen >= 6 && gen6_edgeflag_input) {
-      uint32_t format =
-         brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
-
-      OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) |
-                GEN6_VE0_VALID |
-                GEN6_VE0_EDGE_FLAG_ENABLE |
-                (format << BRW_VE0_FORMAT_SHIFT) |
-                (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
-      OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
-                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
-                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
-                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
-   }
-
   if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) {
      uint32_t dw0 = 0, dw1 = 0;
      uint32_t comp0 = BRW_VE1_COMPONENT_STORE_0;
@@ -842,6 +827,21 @@ static void brw_emit_vertices(struct brw_context *brw)
      OUT_BATCH(dw1);
   }

+   if (brw->gen >= 6 && gen6_edgeflag_input) {
+      uint32_t format =
+         brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
+
+      OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) |
+                GEN6_VE0_VALID |
+                GEN6_VE0_EDGE_FLAG_ENABLE |
+                (format << BRW_VE0_FORMAT_SHIFT) |
+                (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
+      OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
+                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
+                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
+                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
+   }
+
   ADVANCE_BATCH();
 }

--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3607,7 +3607,8 @@ fs_visitor::lower_integer_multiplication()
          * schedule multi-component multiplications much better.
          */

-         if (inst->conditional_mod && inst->dst.is_null()) {
+         fs_reg orig_dst = inst->dst;
+         if (orig_dst.is_null() || orig_dst.file == MRF) {
            inst->dst = fs_reg(GRF, alloc.allocate(dispatch_width / 8),
                               inst->dst.type, dispatch_width);
         }
@@ -3673,9 +3674,8 @@ fs_visitor::lower_integer_multiplication()

         insert(ADD(dst, low, high));

-         if (inst->conditional_mod) {
-            fs_reg null(retype(brw_null_reg(), inst->dst.type));
-            fs_inst *mov = MOV(null, inst->dst);
+         if (inst->conditional_mod || orig_dst.file == MRF) {
+            fs_inst *mov = MOV(orig_dst, inst->dst);
            mov->conditional_mod = inst->conditional_mod;
            insert(mov);
         }
@@ -4098,12 +4098,12 @@ fs_visitor::calculate_register_pressure()
 void
 fs_visitor::optimize()
 {
-   split_virtual_grfs();
-
   move_uniform_array_access_to_pull_constants();
   assign_constant_locations();
   demote_pull_constants();

+   split_virtual_grfs();
+
 #define OPT(pass, args...) ({                                           \
      pass_num++;                                                       \
      bool this_progress = pass(args);                                  \
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -279,6 +279,7 @@ static bool
 can_change_source_types(fs_inst *inst)
 {
   return !inst->src[0].abs && !inst->src[0].negate &&
+          inst->dst.type == inst->src[0].type &&
          (inst->opcode == BRW_OPCODE_MOV ||
           (inst->opcode == BRW_OPCODE_SEL &&
            inst->predicate != BRW_PREDICATE_NONE &&
--- a/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c
@@ -414,6 +414,12 @@ brw_meta_stencil_blit(struct brw_context *brw,
   GLenum target;

   _mesa_meta_fb_tex_blit_begin(ctx, &blit);
+   /* XXX: Pretend to support stencil textures so _mesa_base_tex_format()
+    * returns a valid format.  When we properly support the extension, we
+    * should remove this.
+    */
+   assert(ctx->Extensions.ARB_texture_stencil8 == false);
+   ctx->Extensions.ARB_texture_stencil8 = true;

   _mesa_GenFramebuffers(1, &fbo);
   /* Force the surface to be configured for level zero. */
@@ -451,6 +457,7 @@ brw_meta_stencil_blit(struct brw_context *brw,
   _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);

 error:
+   ctx->Extensions.ARB_texture_stencil8 = false;
   _mesa_meta_fb_tex_blit_end(ctx, target, &blit);
   _mesa_meta_end(ctx);

--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -879,7 +879,8 @@ brw_upload_invariant_state(struct brw_context *brw)
 {
   const bool is_965 = brw->gen == 4 && !brw->is_g4x;

-   brw_select_pipeline(brw, BRW_RENDER_PIPELINE);
+   brw_emit_select_pipeline(brw, BRW_RENDER_PIPELINE);
+   brw->last_pipeline = BRW_RENDER_PIPELINE;

   if (brw->gen < 6) {
      /* Disable depth offset clamping. */
--- a/src/mesa/drivers/dri/i965/gen8_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/gen8_draw_upload.c
@@ -40,16 +40,25 @@ gen8_emit_vertices(struct brw_context *brw)
 {
   struct gl_context *ctx = &brw->ctx;
   uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
+   bool uses_edge_flag;

   brw_prepare_vertices(brw);
   brw_prepare_shader_draw_parameters(brw);

+   uses_edge_flag = (ctx->Polygon.FrontMode != GL_FILL ||
+                     ctx->Polygon.BackMode != GL_FILL);
+
   if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) {
      unsigned vue = brw->vb.nr_enabled;

-      WARN_ONCE(brw->vs.prog_data->inputs_read & VERT_BIT_EDGEFLAG,
-                "Using VID/IID with edgeflags, need to reorder the "
-                "vertex attributes");
+      /* The element for the edge flags must always be last, so we have to
+       * insert the SGVS before it in that case.
+       */
+      if (uses_edge_flag) {
+         assert(vue > 0);
+         vue--;
+      }
+
      WARN_ONCE(vue >= 33,
                "Trying to insert VID/IID past 33rd vertex element, "
                "need to reorder the vertex attrbutes.");
@@ -74,7 +83,7 @@ gen8_emit_vertices(struct brw_context *brw)

      BEGIN_BATCH(3);
      OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2));
-      OUT_BATCH(brw->vb.nr_buffers | GEN8_VF_INSTANCING_ENABLE);
+      OUT_BATCH(vue | GEN8_VF_INSTANCING_ENABLE);
      OUT_BATCH(0);
      ADVANCE_BATCH();
   } else {
@@ -138,7 +147,18 @@ gen8_emit_vertices(struct brw_context *brw)
      ADVANCE_BATCH();
   }

-   unsigned nr_elements = brw->vb.nr_enabled + brw->vs.prog_data->uses_vertexid;
+   /* Normally we don't need an element for the SGVS attribute because the
+    * 3DSTATE_VF_SGVS instruction lets you store the generated attribute in an
+    * element that is past the list in 3DSTATE_VERTEX_ELEMENTS. However if the
+    * vertex ID is used then it needs an element for the base vertex buffer.
+    * Additionally if there is an edge flag element then the SGVS can't be
+    * inserted past that so we need a dummy element to ensure that the edge
+    * flag is the last one.
+    */
+   bool needs_sgvs_element = (brw->vs.prog_data->uses_vertexid ||
+                              (brw->vs.prog_data->uses_instanceid &&
+                               uses_edge_flag));
+   unsigned nr_elements = brw->vb.nr_enabled + needs_sgvs_element;

   /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS,
    * presumably for VertexID/InstanceID.
@@ -192,6 +212,24 @@ gen8_emit_vertices(struct brw_context *brw)
                (comp3 << BRW_VE1_COMPONENT_3_SHIFT));
   }

+   if (needs_sgvs_element) {
+      if (brw->vs.prog_data->uses_vertexid) {
+         OUT_BATCH(GEN6_VE0_VALID |
+                   brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT |
+                   BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT);
+         OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
+                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
+                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
+                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
+      } else {
+         OUT_BATCH(GEN6_VE0_VALID);
+         OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
+                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
+                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
+                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
+      }
+   }
+
   if (gen6_edgeflag_input) {
      uint32_t format =
         brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
@@ -206,25 +244,26 @@ gen8_emit_vertices(struct brw_context *brw)
                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
   }
-
-   if (brw->vs.prog_data->uses_vertexid) {
-      OUT_BATCH(GEN6_VE0_VALID |
-                brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT |
-                BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT);
-      OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
-                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
-                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
-                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
-   }
   ADVANCE_BATCH();

-   for (unsigned i = 0; i < brw->vb.nr_enabled; i++) {
+   for (unsigned i = 0, j = 0; i < brw->vb.nr_enabled; i++) {
      const struct brw_vertex_element *input = brw->vb.enabled[i];
      const struct brw_vertex_buffer *buffer = &brw->vb.buffers[input->buffer];
+      unsigned element_index;
+
+      /* The edge flag element is reordered to be the last one in the code
+       * above so we need to compensate for that in the element indices used
+       * below.
+       */
+      if (input == gen6_edgeflag_input)
+         element_index = nr_elements - 1;
+      else
+         element_index = j++;

      BEGIN_BATCH(3);
      OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2));
-      OUT_BATCH(i | (buffer->step_rate ? GEN8_VF_INSTANCING_ENABLE : 0));
+      OUT_BATCH(element_index |
+                (buffer->step_rate ? GEN8_VF_INSTANCING_ENABLE : 0));
      OUT_BATCH(buffer->step_rate);
      ADVANCE_BATCH();
   }
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -569,43 +569,41 @@ intel_emit_linear_blit(struct brw_context *brw,
   int16_t src_x, dst_x;
   bool ok;

-   /* The pitch given to the GPU must be DWORD aligned, and
-    * we want width to match pitch. Max width is (1 << 15 - 1),
-    * rounding that down to the nearest DWORD is 1 << 15 - 4
-    */
-   pitch = ROUND_DOWN_TO(MIN2(size, (1 << 15) - 1), 4);
-   height = (pitch == 0) ? 1 : size / pitch;
-   src_x = src_offset % 64;
-   dst_x = dst_offset % 64;
-   ok = intelEmitCopyBlit(brw, 1,
-			  pitch, src_bo, src_offset - src_x, I915_TILING_NONE,
-			  pitch, dst_bo, dst_offset - dst_x, I915_TILING_NONE,
-			  src_x, 0, /* src x/y */
-			  dst_x, 0, /* dst x/y */
-			  pitch, height, /* w, h */
-			  GL_COPY);
-   if (!ok)
-      _mesa_problem(ctx, "Failed to linear blit %dx%d\n", pitch, height);
+   do {
+      /* The pitch given to the GPU must be DWORD aligned, and
+       * we want width to match pitch. Max width is (1 << 15 - 1),
+       * rounding that down to the nearest DWORD is 1 << 15 - 4
+       */
+      pitch = ROUND_DOWN_TO(MIN2(size, (1 << 15) - 64), 4);
+      height = (size < pitch || pitch == 0) ? 1 : size / pitch;

-   src_offset += pitch * height;
-   dst_offset += pitch * height;
-   src_x = src_offset % 64;
-   dst_x = dst_offset % 64;
-   size -= pitch * height;
-   assert (size < (1 << 15));
-   pitch = ALIGN(size, 4);
+      src_x = src_offset % 64;
+      dst_x = dst_offset % 64;
+      pitch = ALIGN(MIN2(size, (1 << 15) - 64), 4);
+      assert(src_x + pitch < 1 << 15);
+      assert(dst_x + pitch < 1 << 15);

-   if (size != 0) {
      ok = intelEmitCopyBlit(brw, 1,
-			     pitch, src_bo, src_offset - src_x, I915_TILING_NONE,
-			     pitch, dst_bo, dst_offset - dst_x, I915_TILING_NONE,
-			     src_x, 0, /* src x/y */
-			     dst_x, 0, /* dst x/y */
-			     size, 1, /* w, h */
-			     GL_COPY);
-      if (!ok)
-         _mesa_problem(ctx, "Failed to linear blit %dx%d\n", size, 1);
-   }
+                             pitch, src_bo, src_offset - src_x, I915_TILING_NONE,
+                             pitch, dst_bo, dst_offset - dst_x, I915_TILING_NONE,
+                             src_x, 0, /* src x/y */
+                             dst_x, 0, /* dst x/y */
+                             MIN2(size, pitch), height, /* w, h */
+                             GL_COPY);
+      if (!ok) {
+         _mesa_problem(ctx, "Failed to linear blit %dx%d\n",
+                       MIN2(size, pitch), height);
+         return;
+      }
+
+      pitch *= height;
+      if (size <= pitch)
+         return;
+
+      src_offset += pitch;
+      dst_offset += pitch;
+      size -= pitch;
+   } while (1);
 }

 /**
--- a/src/mesa/drivers/dri/nouveau/nouveau_swtnl_t.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_swtnl_t.c
@@ -338,7 +338,6 @@ TAG(swtnl_init)(struct gl_context *ctx)
 			   NUM_VERTEX_ATTRS * 4 * sizeof(GLfloat));
 	_tnl_need_projected_coords(ctx, GL_FALSE);
 	_tnl_allow_vertex_fog(ctx, GL_FALSE);
-	_tnl_wakeup(ctx);

 	swtnl_alloc_vertices(ctx);
 }
--- a/src/mesa/drivers/dri/nouveau/nv04_render.c
+++ b/src/mesa/drivers/dri/nouveau/nv04_render.c
@@ -285,7 +285,6 @@ nv04_render_init(struct gl_context *ctx)
 	_tnl_init_vertices(ctx, tnl->vb.Size,
 			   NUM_VERTEX_ATTRS * 4 * sizeof(GLfloat));
 	_tnl_allow_pixel_fog(ctx, GL_FALSE);
-	_tnl_wakeup(ctx);
 }

 void
--- a/src/mesa/main/formats.c
+++ b/src/mesa/main/formats.c
@@ -354,14 +354,22 @@ _mesa_array_format_flip_channels(mesa_array_format format)
      return format;

   if (num_channels == 2) {
-      _mesa_array_format_set_swizzle(&format, swizzle[1], swizzle[0],
-                                     swizzle[2], swizzle[3]);
+      /* Assert that the swizzle makes sense for 2 channels */
+      for (unsigned i = 0; i < 4; i++)
+         assert(swizzle[i] != 2 && swizzle[i] != 3);
+
+      static const uint8_t flip_xy[6] = { 1, 0, 2, 3, 4, 5 };
+      _mesa_array_format_set_swizzle(&format,
+                                     flip_xy[swizzle[0]], flip_xy[swizzle[1]],
+                                     flip_xy[swizzle[2]], flip_xy[swizzle[3]]);
      return format;
   }

   if (num_channels == 4) {
-      _mesa_array_format_set_swizzle(&format, swizzle[3], swizzle[2],
-                                     swizzle[1], swizzle[0]);
+      static const uint8_t flip[6] = { 3, 2, 1, 0, 4, 5 };
+      _mesa_array_format_set_swizzle(&format,
+                                     flip[swizzle[0]], flip[swizzle[1]],
+                                     flip[swizzle[2]], flip[swizzle[3]]);
      return format;
   }

@@ -372,10 +380,11 @@ uint32_t
 _mesa_format_to_array_format(mesa_format format)
 {
   const struct gl_format_info *info = _mesa_get_format_info(format);
-   if (_mesa_little_endian())
-      return info->ArrayFormat;
-   else
+   if (info->ArrayFormat && !_mesa_little_endian() &&
+       info->Layout == MESA_FORMAT_LAYOUT_PACKED)
      return _mesa_array_format_flip_channels(info->ArrayFormat);
+   else
+      return info->ArrayFormat;
 }

 static struct hash_table *format_array_format_table;
--- a/src/mesa/main/formats.h
+++ b/src/mesa/main/formats.h
@@ -191,6 +191,11 @@ static inline void
 _mesa_array_format_set_swizzle(mesa_array_format *f,
                               int32_t x, int32_t y, int32_t z, int32_t w)
 {
+   *f &= ~(MESA_ARRAY_FORMAT_SWIZZLE_X_MASK |
+           MESA_ARRAY_FORMAT_SWIZZLE_Y_MASK |
+           MESA_ARRAY_FORMAT_SWIZZLE_Z_MASK |
+           MESA_ARRAY_FORMAT_SWIZZLE_W_MASK);
+
   *f |= ((x << 8 ) & MESA_ARRAY_FORMAT_SWIZZLE_X_MASK) |
         ((y << 11) & MESA_ARRAY_FORMAT_SWIZZLE_Y_MASK) |
         ((z << 14) & MESA_ARRAY_FORMAT_SWIZZLE_Z_MASK) |
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -35,6 +35,7 @@
 #include "mtypes.h"
 #include "state.h"
 #include "texcompress.h"
+#include "texstate.h"
 #include "framebuffer.h"
 #include "samplerobj.h"
 #include "stencil.h"
@@ -1709,6 +1710,52 @@ _mesa_GetDoublev(GLenum pname, GLdouble *params)
   }
 }

+/**
+ * Convert a GL texture binding enum such as GL_TEXTURE_BINDING_2D
+ * into the corresponding Mesa texture target index.
+ * \return TEXTURE_x_INDEX or -1 if binding is invalid
+ */
+static int
+tex_binding_to_index(const struct gl_context *ctx, GLenum binding)
+{
+   switch (binding) {
+   case GL_TEXTURE_BINDING_1D:
+      return _mesa_is_desktop_gl(ctx) ? TEXTURE_1D_INDEX : -1;
+   case GL_TEXTURE_BINDING_2D:
+      return TEXTURE_2D_INDEX;
+   case GL_TEXTURE_BINDING_3D:
+      return ctx->API != API_OPENGLES ? TEXTURE_3D_INDEX : -1;
+   case GL_TEXTURE_BINDING_CUBE_MAP:
+      return ctx->Extensions.ARB_texture_cube_map
+         ? TEXTURE_CUBE_INDEX : -1;
+   case GL_TEXTURE_BINDING_RECTANGLE:
+      return _mesa_is_desktop_gl(ctx) && ctx->Extensions.NV_texture_rectangle
+         ? TEXTURE_RECT_INDEX : -1;
+   case GL_TEXTURE_BINDING_1D_ARRAY:
+      return _mesa_is_desktop_gl(ctx) && ctx->Extensions.EXT_texture_array
+         ? TEXTURE_1D_ARRAY_INDEX : -1;
+   case GL_TEXTURE_BINDING_2D_ARRAY:
+      return (_mesa_is_desktop_gl(ctx) && ctx->Extensions.EXT_texture_array)
+         || _mesa_is_gles3(ctx)
+         ? TEXTURE_2D_ARRAY_INDEX : -1;
+   case GL_TEXTURE_BINDING_BUFFER:
+      return ctx->API == API_OPENGL_CORE &&
+             ctx->Extensions.ARB_texture_buffer_object ?
+             TEXTURE_BUFFER_INDEX : -1;
+   case GL_TEXTURE_BINDING_CUBE_MAP_ARRAY:
+      return _mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_cube_map_array
+         ? TEXTURE_CUBE_ARRAY_INDEX : -1;
+   case GL_TEXTURE_BINDING_2D_MULTISAMPLE:
+      return _mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_multisample
+         ? TEXTURE_2D_MULTISAMPLE_INDEX : -1;
+   case GL_TEXTURE_BINDING_2D_MULTISAMPLE_ARRAY:
+      return _mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_multisample
+         ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : -1;
+   default:
+      return -1;
+   }
+}
+
 static enum value_type
 find_value_indexed(const char *func, GLenum pname, GLuint index, union value *v)
 {
@@ -1972,6 +2019,45 @@ find_value_indexed(const char *func, GLenum pname, GLuint index, union value *v)
      v->value_int = ctx->ImageUnits[index].Format;
      return TYPE_INT;

+   /* ARB_direct_state_access */
+   case GL_TEXTURE_BINDING_1D:
+   case GL_TEXTURE_BINDING_1D_ARRAY:
+   case GL_TEXTURE_BINDING_2D:
+   case GL_TEXTURE_BINDING_2D_ARRAY:
+   case GL_TEXTURE_BINDING_2D_MULTISAMPLE:
+   case GL_TEXTURE_BINDING_2D_MULTISAMPLE_ARRAY:
+   case GL_TEXTURE_BINDING_3D:
+   case GL_TEXTURE_BINDING_BUFFER:
+   case GL_TEXTURE_BINDING_CUBE_MAP:
+   case GL_TEXTURE_BINDING_CUBE_MAP_ARRAY:
+   case GL_TEXTURE_BINDING_RECTANGLE: {
+      int target;
+
+      if (ctx->API != API_OPENGL_CORE)
+         goto invalid_enum;
+      target = tex_binding_to_index(ctx, pname);
+      if (target < 0)
+         goto invalid_enum;
+      if (index >= _mesa_max_tex_unit(ctx))
+         goto invalid_value;
+
+      v->value_int = ctx->Texture.Unit[index].CurrentTex[target]->Name;
+      return TYPE_INT;
+   }
+
+   case GL_SAMPLER_BINDING: {
+      struct gl_sampler_object *samp;
+
+      if (ctx->API != API_OPENGL_CORE)
+         goto invalid_enum;
+      if (index >= _mesa_max_tex_unit(ctx))
+         goto invalid_value;
+
+      samp = ctx->Texture.Unit[index].Sampler;
+      v->value_int = samp ? samp->Name : 0;
+      return TYPE_INT;
+   }
+
   case GL_MAX_COMPUTE_WORK_GROUP_COUNT:
      if (!_mesa_is_desktop_gl(ctx) || !ctx->Extensions.ARB_compute_shader)
         goto invalid_enum;
--- a/src/mesa/main/glformats.c
+++ b/src/mesa/main/glformats.c
@@ -2292,8 +2292,18 @@ _mesa_es3_error_check_format_and_type(const struct gl_context *ctx,
         break;

      case GL_HALF_FLOAT:
-         if (internalFormat != GL_RG16F)
-            return GL_INVALID_OPERATION;
+      case GL_HALF_FLOAT_OES:
+         switch (internalFormat) {
+            case GL_RG16F:
+               break;
+            case GL_RG:
+               if (ctx->Extensions.ARB_texture_rg &&
+                   ctx->Extensions.OES_texture_half_float)
+                  break;
+            /* fallthrough */
+            default:
+               return GL_INVALID_OPERATION;
+         }
         break;

      case GL_FLOAT:
@@ -2301,6 +2311,11 @@ _mesa_es3_error_check_format_and_type(const struct gl_context *ctx,
         case GL_RG16F:
         case GL_RG32F:
            break;
+         case GL_RG:
+            if (ctx->Extensions.ARB_texture_rg &&
+                ctx->Extensions.OES_texture_float)
+               break;
+            /* fallthrough */
         default:
            return GL_INVALID_OPERATION;
         }
@@ -2361,8 +2376,19 @@ _mesa_es3_error_check_format_and_type(const struct gl_context *ctx,
         break;

      case GL_HALF_FLOAT:
-         if (internalFormat != GL_R16F)
+      case GL_HALF_FLOAT_OES:
+         switch (internalFormat) {
+         case GL_R16F:
+            break;
+         case GL_RG:
+         case GL_RED:
+            if (ctx->Extensions.ARB_texture_rg &&
+                ctx->Extensions.OES_texture_half_float)
+               break;
+            /* fallthrough */
+         default:
            return GL_INVALID_OPERATION;
+         }
         break;

      case GL_FLOAT:
@@ -2370,6 +2396,11 @@ _mesa_es3_error_check_format_and_type(const struct gl_context *ctx,
         case GL_R16F:
         case GL_R32F:
            break;
+         case GL_RED:
+            if (ctx->Extensions.ARB_texture_rg &&
+                ctx->Extensions.OES_texture_float)
+               break;
+            /* fallthrough */
         default:
            return GL_INVALID_OPERATION;
         }
@@ -2570,8 +2601,6 @@ get_swizzle_from_gl_format(GLenum format, uint8_t *swizzle)
 uint32_t
 _mesa_format_from_format_and_type(GLenum format, GLenum type)
 {
-   mesa_array_format array_format;
-
   bool is_array_format = true;
   uint8_t swizzle[4];
   bool normalized = false, is_float = false, is_signed = false;
@@ -2627,15 +2656,9 @@ _mesa_format_from_format_and_type(GLenum format, GLenum type)
      normalized = !_mesa_is_enum_format_integer(format);
      num_channels = _mesa_components_in_format(format);

-      array_format =
-         MESA_ARRAY_FORMAT(type_size, is_signed, is_float,
-                           normalized, num_channels,
-                           swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
-
-      if (!_mesa_little_endian())
-         array_format = _mesa_array_format_flip_channels(array_format);
-
-      return array_format;
+      return MESA_ARRAY_FORMAT(type_size, is_signed, is_float,
+                               normalized, num_channels,
+                               swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
   }

   /* Otherwise this is not an array format, so return the mesa_format
--- a/src/mesa/main/readpix.c
+++ b/src/mesa/main/readpix.c
@@ -537,7 +537,8 @@ read_rgba_pixels( struct gl_context *ctx,
       * convert to, then we can convert directly into the dst buffer and avoid
       * the final conversion/copy from the rgba buffer to the dst buffer.
       */
-      if (dst_format == rgba_format) {
+      if (dst_format == rgba_format &&
+          dst_stride == rgba_stride) {
         need_convert = false;
         rgba = dst;
      } else {
--- a/src/mesa/main/texcompress_fxt1.c
+++ b/src/mesa/main/texcompress_fxt1.c
@@ -65,7 +65,7 @@ _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
   if (srcFormat != GL_RGB ||
       srcType != GL_UNSIGNED_BYTE ||
       ctx->_ImageTransferState ||
-       srcPacking->RowLength != srcWidth ||
+       ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
       srcPacking->SwapBytes) {
      /* convert image to RGB/GLubyte */
      GLubyte *tempImageSlices[1];
--- a/src/mesa/main/texcompress_s3tc.c
+++ b/src/mesa/main/texcompress_s3tc.c
@@ -130,7 +130,7 @@ _mesa_texstore_rgb_dxt1(TEXSTORE_PARAMS)
   if (srcFormat != GL_RGB ||
       srcType != GL_UNSIGNED_BYTE ||
       ctx->_ImageTransferState ||
-       srcPacking->RowLength != srcWidth ||
+       ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
       srcPacking->SwapBytes) {
      /* convert image to RGB/GLubyte */
      GLubyte *tempImageSlices[1];
@@ -187,7 +187,7 @@ _mesa_texstore_rgba_dxt1(TEXSTORE_PARAMS)
   if (srcFormat != GL_RGBA ||
       srcType != GL_UNSIGNED_BYTE ||
       ctx->_ImageTransferState ||
-       srcPacking->RowLength != srcWidth ||
+       ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
       srcPacking->SwapBytes) {
      /* convert image to RGBA/GLubyte */
      GLubyte *tempImageSlices[1];
@@ -244,7 +244,7 @@ _mesa_texstore_rgba_dxt3(TEXSTORE_PARAMS)
   if (srcFormat != GL_RGBA ||
       srcType != GL_UNSIGNED_BYTE ||
       ctx->_ImageTransferState ||
-       srcPacking->RowLength != srcWidth ||
+       ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
       srcPacking->SwapBytes) {
      /* convert image to RGBA/GLubyte */
      GLubyte *tempImageSlices[1];
@@ -300,7 +300,7 @@ _mesa_texstore_rgba_dxt5(TEXSTORE_PARAMS)
   if (srcFormat != GL_RGBA ||
       srcType != GL_UNSIGNED_BYTE ||
       ctx->_ImageTransferState ||
-       srcPacking->RowLength != srcWidth ||
+       ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
       srcPacking->SwapBytes) {
      /* convert image to RGBA/GLubyte */
      GLubyte *tempImageSlices[1];
--- a/src/mesa/main/texgetimage.c
+++ b/src/mesa/main/texgetimage.c
@@ -915,6 +915,13 @@ getteximage_error_check(struct gl_context *ctx,
                  "glGetTex%sImage(format=GL_STENCIL_INDEX)", suffix);
      return GL_TRUE;
   }
+   else if (_mesa_is_stencil_format(format)
+	    && !_mesa_is_depthstencil_format(baseFormat)
+	    && !_mesa_is_stencil_format(baseFormat)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glGetTex%sImage(format mismatch)", suffix);
+      return true;
+   }
   else if (_mesa_is_ycbcr_format(format)
            && !_mesa_is_ycbcr_format(baseFormat)) {
      _mesa_error(ctx, GL_INVALID_OPERATION,
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -3682,12 +3682,12 @@ texturesubimage(struct gl_context *ctx, GLuint dims,
      rowStride = _mesa_image_image_stride(&ctx->Unpack, width, height,
                                           format, type);
      /* Copy in each face. */
-      for (i = 0; i < 6; ++i) {
+      for (i = zoffset; i < zoffset + depth; ++i) {
         texImage = texObj->Image[i][level];
         assert(texImage);

         _mesa_texture_sub_image(ctx, 3, texObj, texImage, texObj->Target,
-                                 level, xoffset, yoffset, zoffset,
+                                 level, xoffset, yoffset, 0,
                                 width, height, 1, format,
                                 type, pixels, true);
         pixels = (GLubyte *) pixels + rowStride;
@@ -5537,10 +5537,13 @@ static GLboolean
 is_renderable_texture_format(struct gl_context *ctx, GLenum internalformat)
 {
   /* Everything that is allowed for renderbuffers,
-    * except for a base format of GL_STENCIL_INDEX.
+    * except for a base format of GL_STENCIL_INDEX, unless supported.
    */
   GLenum baseFormat = _mesa_base_fbo_format(ctx, internalformat);
-   return baseFormat != 0 && baseFormat != GL_STENCIL_INDEX;
+   if (ctx->Extensions.ARB_texture_stencil8)
+      return baseFormat != 0;
+   else
+      return baseFormat != 0 && baseFormat != GL_STENCIL_INDEX;
 }


--- a/src/mesa/main/texparam.c
+++ b/src/mesa/main/texparam.c
@@ -1888,6 +1888,12 @@ get_tex_parameterfv(struct gl_context *ctx,
         *params = (GLfloat) obj->Sampler.sRGBDecode;
         break;

+      case GL_TEXTURE_TARGET:
+         if (ctx->API != API_OPENGL_CORE)
+            goto invalid_pname;
+         *params = ENUM_TO_FLOAT(obj->Target);
+         break;
+
      default:
         goto invalid_pname;
   }
@@ -2113,6 +2119,12 @@ get_tex_parameteriv(struct gl_context *ctx,
         *params = obj->ImageFormatCompatibilityType;
         break;

+      case GL_TEXTURE_TARGET:
+         if (ctx->API != API_OPENGL_CORE)
+            goto invalid_pname;
+         *params = (GLint) obj->Target;
+         break;
+
      default:
         goto invalid_pname;
   }
--- a/src/mesa/main/texstorage.c
+++ b/src/mesa/main/texstorage.c
@@ -189,6 +189,20 @@ clear_texture_fields(struct gl_context *ctx,
 }


+/**
+ * Update/re-validate framebuffer object.
+ */
+static void
+update_fbo_texture(struct gl_context *ctx, struct gl_texture_object *texObj)
+{
+   const unsigned numFaces = _mesa_num_tex_faces(texObj->Target);
+   for (int level = 0; level < ARRAY_SIZE(texObj->Image[0]); level++) {
+      for (unsigned face = 0; face < numFaces; face++)
+         _mesa_update_fbo_texture(ctx, texObj, face, level);
+   }
+}
+
+
 GLboolean
 _mesa_is_legal_tex_storage_format(struct gl_context *ctx, GLenum internalformat)
 {
@@ -445,6 +459,7 @@ _mesa_texture_storage(struct gl_context *ctx, GLuint dims,

      _mesa_set_texture_view_state(ctx, texObj, target, levels);

+      update_fbo_texture(ctx, texObj);
   }
 }

--- a/src/mesa/main/texstore.c
+++ b/src/mesa/main/texstore.c
@@ -1004,6 +1004,7 @@ store_texsubimage(struct gl_context *ctx,
   /* compute slice info (and do some sanity checks) */
   switch (target) {
   case GL_TEXTURE_2D:
+   case GL_TEXTURE_2D_MULTISAMPLE:
   case GL_TEXTURE_RECTANGLE:
   case GL_TEXTURE_CUBE_MAP:
   case GL_TEXTURE_EXTERNAL_OES:
@@ -1025,6 +1026,7 @@ store_texsubimage(struct gl_context *ctx,
      srcImageStride = _mesa_image_row_stride(packing, width, format, type);
      break;
   case GL_TEXTURE_2D_ARRAY:
+   case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
      numSlices = depth;
      sliceOffset = zoffset;
      depth = 1;
--- a/src/mesa/main/uniform_query.cpp
+++ b/src/mesa/main/uniform_query.cpp
@@ -312,24 +312,31 @@ _mesa_get_uniform(struct gl_context *ctx, GLuint program, GLint location,

      return;
   }
+   if ((uni->type->base_type == GLSL_TYPE_DOUBLE &&
+        returnType != GLSL_TYPE_DOUBLE) ||
+       (uni->type->base_type != GLSL_TYPE_DOUBLE &&
+        returnType == GLSL_TYPE_DOUBLE)) {
+	 _mesa_error( ctx, GL_INVALID_OPERATION,
+	             "glGetnUniform*vARB(incompatible uniform types)");
+	return;
+   }

   {
      unsigned elements = (uni->type->is_sampler())
 	 ? 1 : uni->type->components();
+      const int dmul = uni->type->base_type == GLSL_TYPE_DOUBLE ? 2 : 1;

      /* Calculate the source base address *BEFORE* modifying elements to
       * account for the size of the user's buffer.
       */
      const union gl_constant_value *const src =
-	 &uni->storage[offset * elements];
+	 &uni->storage[offset * elements * dmul];

      assert(returnType == GLSL_TYPE_FLOAT || returnType == GLSL_TYPE_INT ||
-             returnType == GLSL_TYPE_UINT);
-      /* The three (currently) supported types all have the same size,
-       * which is of course the same as their union. That'll change
-       * with glGetUniformdv()...
-       */
-      unsigned bytes = sizeof(src[0]) * elements;
+             returnType == GLSL_TYPE_UINT || returnType == GLSL_TYPE_DOUBLE);
+
+      /* doubles have a different size than the other 3 types */
+      unsigned bytes = sizeof(src[0]) * elements * dmul;
      if (bufSize < 0 || bytes > (unsigned) bufSize) {
 	 _mesa_error( ctx, GL_INVALID_OPERATION,
 	             "glGetnUniform*vARB(out of bounds: bufSize is %d,"
--- a/src/mesa/main/uniforms.c
+++ b/src/mesa/main/uniforms.c
@@ -888,16 +888,7 @@ _mesa_GetnUniformdvARB(GLuint program, GLint location,
 {
   GET_CURRENT_CONTEXT(ctx);

-   (void) program;
-   (void) location;
-   (void) bufSize;
-   (void) params;
-
-   /*
   _mesa_get_uniform(ctx, program, location, bufSize, GLSL_TYPE_DOUBLE, params);
-   */
-   _mesa_error(ctx, GL_INVALID_OPERATION, "glGetUniformdvARB"
-               "(GL_ARB_gpu_shader_fp64 not implemented)");
 }

 void GLAPIENTRY
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2963,6 +2963,8 @@ _mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
   if (prog->LinkStatus) {
      if (!ctx->Driver.LinkShader(ctx, prog)) {
 	 prog->LinkStatus = GL_FALSE;
+      } else {
+         build_program_resource_list(ctx, prog);
      }
   }

--- a/src/mesa/state_tracker/st_cb_readpixels.c
+++ b/src/mesa/state_tracker/st_cb_readpixels.c
@@ -210,9 +210,9 @@ st_readpixels(struct gl_context *ctx, GLint x, GLint y,
      GLuint row;

      for (row = 0; row < (unsigned) height; row++) {
-         GLvoid *dest = _mesa_image_address3d(pack, pixels,
+         GLvoid *dest = _mesa_image_address2d(pack, pixels,
                                              width, height, format,
-                                              type, 0, row, 0);
+                                              type, row, 0);
         memcpy(dest, map, bytesPerRow);
         map += tex_xfer->stride;
      }
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -2679,7 +2679,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
       */
      glsl_to_tgsi_instruction *inst, *new_inst;
      inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
-      new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]);
+      new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2], inst->src[3]);
      new_inst->saturate = inst->saturate;
      inst->dead_mask = inst->dst[0].writemask;
   } else {
@@ -4240,12 +4240,12 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
    * new visitor. */
   foreach_in_list(glsl_to_tgsi_instruction, inst, &original->instructions) {
      glsl_to_tgsi_instruction *newinst;
-      st_src_reg src_regs[3];
+      st_src_reg src_regs[4];

      if (inst->dst[0].file == PROGRAM_OUTPUT)
         prog->OutputsWritten |= BITFIELD64_BIT(inst->dst[0].index);

-      for (int i = 0; i < 3; i++) {
+      for (int i = 0; i < 4; i++) {
         src_regs[i] = inst->src[i];
         if (src_regs[i].file == PROGRAM_INPUT &&
             src_regs[i].index == VARYING_SLOT_COL0) {
@@ -4256,7 +4256,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
            prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index);
      }

-      newinst = v->emit(NULL, inst->op, inst->dst[0], src_regs[0], src_regs[1], src_regs[2]);
+      newinst = v->emit(NULL, inst->op, inst->dst[0], src_regs[0], src_regs[1], src_regs[2], src_regs[3]);
      newinst->tex_target = inst->tex_target;
      newinst->sampler_array_size = inst->sampler_array_size;
   }
@@ -4325,18 +4325,18 @@ get_bitmap_visitor(struct st_fragment_program *fp,
    * new visitor. */
   foreach_in_list(glsl_to_tgsi_instruction, inst, &original->instructions) {
      glsl_to_tgsi_instruction *newinst;
-      st_src_reg src_regs[3];
+      st_src_reg src_regs[4];

      if (inst->dst[0].file == PROGRAM_OUTPUT)
         prog->OutputsWritten |= BITFIELD64_BIT(inst->dst[0].index);

-      for (int i = 0; i < 3; i++) {
+      for (int i = 0; i < 4; i++) {
         src_regs[i] = inst->src[i];
         if (src_regs[i].file == PROGRAM_INPUT)
            prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index);
      }

-      newinst = v->emit(NULL, inst->op, inst->dst[0], src_regs[0], src_regs[1], src_regs[2]);
+      newinst = v->emit(NULL, inst->op, inst->dst[0], src_regs[0], src_regs[1], src_regs[2], src_regs[3]);
      newinst->tex_target = inst->tex_target;
      newinst->sampler_array_size = inst->sampler_array_size;
   }
@@ -1 +1 @@
 .6.4
 .6.6