Add release notes for the 10.5.7 release

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Update version to 10.5.7
2015-06-07 11:13:19 +01:00 · 2015-06-07 11:11:38 +01:00 · 2015-06-03 12:28:58 +01:00 · 2015-06-03 12:06:08 +01:00 · 2015-06-03 12:06:01 +01:00 · 2015-06-03 12:05:43 +01:00
56 changed files with 663 additions and 171 deletions
--- a/2
+++ b/2
@@ -1 +1 @@
-10.5.5
+10.5.7
--- a/bin/.cherry-ignore
+++ b/bin/.cherry-ignore
@@ -24,3 +24,6 @@ fe5fddd7e2df74233a2a02ae021418485f39d11c

 # The optimisations mentioned are not available in 10.5
 627c68308683abbd6e563a09af6013a33938a790 i965/fs: in MAD optimizations, switch last argument to be immediate
+
+# 10.5 has the compat string implementation, which includes the <string>
+967825d053f71c5f5fc3ba31eabc0c6004fde4f1 clover: Build fix for FreeBSD.
--- a/bin/get-pick-list.sh
+++ b/bin/get-pick-list.sh
@@ -14,7 +14,7 @@ git log --reverse --grep="cherry picked from commit" origin/master..HEAD |\
 	sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked

 # Grep for commits that were marked as a candidate for the stable tree.
-git log --reverse --pretty=%H -i --grep='^\([[:space:]]*NOTE: .*[Cc]andidate\|CC:.*mesa-stable\)' HEAD..origin/master |\
+git log --reverse --pretty=%H -i --grep='^\([[:space:]]*NOTE: .*[Cc]andidate\|CC:.*10\.5.*mesa-stable\)' HEAD..origin/master |\
 while read sha
 do
 	# Check to see whether the patch is on the ignore list.
--- a/configure.ac
+++ b/configure.ac
@@ -609,6 +609,7 @@ if test "x$enable_asm" = xyes; then
 fi

 AC_CHECK_HEADER([xlocale.h], [DEFINES="$DEFINES -DHAVE_XLOCALE_H"])
+AC_CHECK_HEADER([sys/sysctl.h], [DEFINES="$DEFINES -DHAVE_SYS_SYSCTL_H"])
 AC_CHECK_FUNC([strtof], [DEFINES="$DEFINES -DHAVE_STRTOF"])

 dnl Check to see if dlopen is in default libraries (like Solaris, which
--- a/docs/relnotes/10.5.5.html
+++ b/docs/relnotes/10.5.5.html
@@ -31,7 +31,8 @@ because compatibility contexts are not supported.

 <h2>SHA256 checksums</h2>
 <pre>
-TBD
+c10f00fd792b8290dd51ebcc48a9016c4cafab19ec205423c6fcadfd7f3a59f2  mesa-10.5.5.tar.gz
+4ac4e4ea3414f1cadb1467f2f173f9e56170d31e8674f7953a46f0549d319f28  mesa-10.5.5.tar.xz
 </pre>


--- a/docs/relnotes/10.5.6.html
+++ b/docs/relnotes/10.5.6.html
@@ -0,0 +1,147 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.5.6 Release Notes / May 23, 2015</h1>
+
+<p>
+Mesa 10.5.6 is a bug fix release which fixes bugs found since the 10.5.5 release.
+</p>
+<p>
+Mesa 10.5.6 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+89ff9cb08d0f6e3f34154864c3071253057cd21020759457c8ae27e0f70985d3  mesa-10.5.6.tar.gz
+66017853bde5f7a6647db3eede30512a091a3491daa1708e0ad8027c328ba595  mesa-10.5.6.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=86792">Bug 86792</a> - [NVC0] Portal 2 Crashes in Wine</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90147">Bug 90147</a> - swrast: build error undeclared _SC_PHYS_PAGES on osx</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90350">Bug 90350</a> - [G96] Portal's portal are incorrectly rendered</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90363">Bug 90363</a> - [nv50] HW state is not reset correctly when using a new GL context</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Alex Deucher (1):</p>
+<ul>
+  <li>radeonsi: add new bonaire pci id</li>
+</ul>
+
+<p>Axel Davy (2):</p>
+<ul>
+  <li>egl/wayland: properly destroy wayland objects</li>
+  <li>glx/dri3: Add additional check for gpu offloading case</li>
+</ul>
+
+<p>Emil Velikov (4):</p>
+<ul>
+  <li>docs: Add sha256 sums for the 10.5.5 release</li>
+  <li>egl/main: fix EGL_KHR_get_all_proc_addresses</li>
+  <li>targets/osmesa: drop the -module tag from LDFLAGS</li>
+  <li>Update version to 10.5.6</li>
+</ul>
+
+<p>Francisco Jerez (4):</p>
+<ul>
+  <li>clover: Refactor event::trigger and ::abort to prevent deadlock and reentrancy issues.</li>
+  <li>clover: Wrap event::_status in a method to prevent unlocked access.</li>
+  <li>clover: Implement locking of the wait_count, _chain and _status members of event.</li>
+  <li>i965: Fix PBO cache coherency issue after _mesa_meta_pbo_GetTexSubImage().</li>
+</ul>
+
+<p>Fredrik Höglund (2):</p>
+<ul>
+  <li>main: Require that the texture exists in framebuffer_texture</li>
+  <li>mesa: Generate GL_INVALID_VALUE in framebuffer_texture when layer &lt; 0</li>
+</ul>
+
+<p>Ilia Mirkin (7):</p>
+<ul>
+  <li>nv50/ir: only propagate saturate up if some actual folding took place</li>
+  <li>nv50: keep track of PGRAPH state in nv50_screen</li>
+  <li>nvc0: keep track of PGRAPH state in nvc0_screen</li>
+  <li>nvc0: reset the instanced elements state when doing blit using 3d engine</li>
+  <li>nv50/ir: only enable mul saturate on G200+</li>
+  <li>st/mesa: make sure to create a "clean" bool when doing i2b</li>
+  <li>nvc0: switch mechanism for shader eviction to be a while loop</li>
+</ul>
+
+<p>Jeremy Huddleston Sequoia (2):</p>
+<ul>
+  <li>swrast: Build fix for darwin</li>
+  <li>darwin: Fix install name of libOSMesa</li>
+</ul>
+
+<p>Laura Ekstrand (2):</p>
+<ul>
+  <li>main: Fix an error generated by FramebufferTexture</li>
+  <li>main: Complete error conditions for glInvalidate*Framebuffer.</li>
+</ul>
+
+<p>Marta Lofstedt (1):</p>
+<ul>
+  <li>main: glGetIntegeri_v fails for GL_VERTEX_BINDING_STRIDE</li>
+</ul>
+
+<p>Rob Clark (2):</p>
+<ul>
+  <li>freedreno: enable a306</li>
+  <li>freedreno: fix bug in tile/slot calculation</li>
+</ul>
+
+<p>Roland Scheidegger (1):</p>
+<ul>
+  <li>draw: (trivial) fix out-of-bounds vector initialization</li>
+</ul>
+
+<p>Tim Rowley (1):</p>
+<ul>
+  <li>mesa: fix shininess check for ffvertex_prog v2</li>
+</ul>
+
+<p>Tom Stellard (2):</p>
+<ul>
+  <li>clover: Add a mutex to guard queue::queued_events</li>
+  <li>clover: Fix a bug with multi-threaded events v2</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/10.5.7.html
+++ b/docs/relnotes/10.5.7.html
@@ -0,0 +1,102 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.5.7 Release Notes / June 07, 2015</h1>
+
+<p>
+Mesa 10.5.7 is a bug fix release which fixes bugs found since the 10.5.6 release.
+</p>
+<p>
+Mesa 10.5.7 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+TBD
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89131">Bug 89131</a> - [Bisected] Graphical corruption in Weston,  shows old framebuffer pieces</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Ben Widawsky (1):</p>
+<ul>
+  <li>i965: Emit 3DSTATE_MULTISAMPLE before WM_HZ_OP (gen8+)</li>
+</ul>
+
+<p>Emil Velikov (4):</p>
+<ul>
+  <li>docs: Add sha256sums for the 10.5.6 release</li>
+  <li>get-pick-list.sh: Require explicit "10.5" for nominating stable patches</li>
+  <li>cherry-ignore: add clover build fix not applicable for 10.5</li>
+  <li>Update version to 10.5.7</li>
+</ul>
+
+<p>Ilia Mirkin (18):</p>
+<ul>
+  <li>nvc0/ir: set ftz when sources are floats, not just destinations</li>
+  <li>nv50/ir: guess that the constant offset is the starting slot of array</li>
+  <li>nvc0/ir: LOAD's can't be used for shader inputs</li>
+  <li>nvc0: a geometry shader can have up to 1024 vertices output</li>
+  <li>nv50/ir: avoid messing up arg1 of PFETCH</li>
+  <li>nv30: don't leak fragprog consts</li>
+  <li>nv30: avoid leaking render state and draw shaders</li>
+  <li>nv30: fix clip plane uploads and enable changes</li>
+  <li>nv30/draw: avoid leaving stale pointers in draw state</li>
+  <li>nv30/draw: draw expects constbuf size in bytes, not vec4 units</li>
+  <li>st/mesa: don't leak glsl_to_tgsi object on link failure</li>
+  <li>glsl: avoid leaking linked gl_shader when there's a late linker error</li>
+  <li>nv30/draw: fix indexed draws with swtnl path and a resource index buffer</li>
+  <li>nv30/draw: only use the DMA1 object (GART) if the bo is not in VRAM</li>
+  <li>nv30/draw: allocate vertex buffers in gart</li>
+  <li>nv30/draw: switch varying hookup logic to know about texcoords</li>
+  <li>nv30: falling back to draw path for edgeflag does no good</li>
+  <li>nv30: avoid doing extra work on clear and hitting unexpected states</li>
+</ul>
+
+<p>Jason Ekstrand (1):</p>
+<ul>
+  <li>i965/fs: Fix implied_mrf_writes for scratch writes</li>
+</ul>
+
+<p>Marek Olšák (1):</p>
+<ul>
+  <li>st/dri: fix postprocessing crash when there's no depth buffer</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/include/pci_ids/radeonsi_pci_ids.h
+++ b/include/pci_ids/radeonsi_pci_ids.h
@@ -85,6 +85,7 @@ CHIPSET(0x6651, BONAIRE_6651, BONAIRE)
 CHIPSET(0x6658, BONAIRE_6658, BONAIRE)
 CHIPSET(0x665C, BONAIRE_665C, BONAIRE)
 CHIPSET(0x665D, BONAIRE_665D, BONAIRE)
+CHIPSET(0x665F, BONAIRE_665F, BONAIRE)

 CHIPSET(0x9830, KABINI_9830, KABINI)
 CHIPSET(0x9831, KABINI_9831, KABINI)
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -703,6 +703,8 @@ dri2_terminate(_EGLDriver *drv, _EGLDisplay *disp)
 #ifdef HAVE_WAYLAND_PLATFORM
   case _EGL_PLATFORM_WAYLAND:
      wl_drm_destroy(dri2_dpy->wl_drm);
+      wl_registry_destroy(dri2_dpy->wl_registry);
+      wl_event_queue_destroy(dri2_dpy->wl_queue);
      if (dri2_dpy->own_device) {
         wl_display_disconnect(dri2_dpy->wl_dpy);
      }
--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -1028,7 +1028,7 @@ dri2_initialize_wayland(_EGLDriver *drv, _EGLDisplay *disp)
   wl_registry_add_listener(dri2_dpy->wl_registry,
                            &registry_listener, dri2_dpy);
   if (roundtrip(dri2_dpy) < 0 || dri2_dpy->wl_drm == NULL)
-      goto cleanup_dpy;
+      goto cleanup_registry;

   if (roundtrip(dri2_dpy) < 0 || dri2_dpy->fd == -1)
      goto cleanup_drm;
@@ -1111,6 +1111,9 @@ dri2_initialize_wayland(_EGLDriver *drv, _EGLDisplay *disp)
 cleanup_drm:
   free(dri2_dpy->device_name);
   wl_drm_destroy(dri2_dpy->wl_drm);
+ cleanup_registry:
+   wl_registry_destroy(dri2_dpy->wl_registry);
+   wl_event_queue_destroy(dri2_dpy->wl_queue);
 cleanup_dpy:
   free(dri2_dpy);
   
--- a/src/egl/main/eglapi.c
+++ b/src/egl/main/eglapi.c
@@ -1029,8 +1029,9 @@ eglGetProcAddress(const char *procname)
      const char *name;
      _EGLProc function;
   } egl_functions[] = {
-      /* core functions should not be queryable, but, well... */
-#ifdef _EGL_GET_CORE_ADDRESSES
+      /* core functions queryable in the presence of
+       * EGL_KHR_get_all_proc_addresses or EGL 1.5
+       */
      /* alphabetical order */
      { "eglBindAPI", (_EGLProc) eglBindAPI },
      { "eglBindTexImage", (_EGLProc) eglBindTexImage },
@@ -1066,7 +1067,6 @@ eglGetProcAddress(const char *procname)
      { "eglWaitClient", (_EGLProc) eglWaitClient },
      { "eglWaitGL", (_EGLProc) eglWaitGL },
      { "eglWaitNative", (_EGLProc) eglWaitNative },
-#endif /* _EGL_GET_CORE_ADDRESSES */
 #ifdef EGL_MESA_screen_surface
      { "eglChooseModeMESA", (_EGLProc) eglChooseModeMESA },
      { "eglGetModesMESA", (_EGLProc) eglGetModesMESA },
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -2049,7 +2049,7 @@ generate_mask_value(struct draw_gs_llvm_variant *variant,

   num_prims = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, mask_type),
                                  variant->num_prims);
-   for (i = 0; i <= gs_type.length; i++) {
+   for (i = 0; i < gs_type.length; i++) {
      LLVMValueRef idx = lp_build_const_int32(gallivm, i);
      mask_val = LLVMBuildInsertElement(builder, mask_val, idx, idx, "");
   }
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -293,7 +293,7 @@ struct fd_context {
 	 */
 	struct fd_gmem_stateobj gmem;
 	struct fd_vsc_pipe      pipe[8];
-	struct fd_tile          tile[64];
+	struct fd_tile          tile[256];

 	/* which state objects need to be re-emit'd: */
 	enum {
--- a/src/gallium/drivers/freedreno/freedreno_gmem.c
+++ b/src/gallium/drivers/freedreno/freedreno_gmem.c
@@ -91,6 +91,7 @@ calculate_tiles(struct fd_context *ctx)
 	uint32_t i, j, t, xoff, yoff;
 	uint32_t tpp_x, tpp_y;
 	bool has_zs = !!(ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL));
+	int tile_n[ARRAY_SIZE(ctx->pipe)];

 	if (pfb->cbufs[0])
 		cpp = util_format_get_blocksize(pfb->cbufs[0]->format);
@@ -213,6 +214,7 @@ calculate_tiles(struct fd_context *ctx)
 	/* configure tiles: */
 	t = 0;
 	yoff = miny;
+	memset(tile_n, 0, sizeof(tile_n));
 	for (i = 0; i < nbins_y; i++) {
 		uint32_t bw, bh;

@@ -223,20 +225,17 @@ calculate_tiles(struct fd_context *ctx)

 		for (j = 0; j < nbins_x; j++) {
 			struct fd_tile *tile = &ctx->tile[t];
-			uint32_t n, p;
+			uint32_t p;

 			assert(t < ARRAY_SIZE(ctx->tile));

 			/* pipe number: */
 			p = ((i / tpp_y) * div_round_up(nbins_x, tpp_x)) + (j / tpp_x);

-			/* slot number: */
-			n = ((i % tpp_y) * tpp_x) + (j % tpp_x);
-
 			/* clip bin width: */
 			bw = MIN2(bin_w, minx + width - xoff);

-			tile->n = n;
+			tile->n = tile_n[p]++;
 			tile->p = p;
 			tile->bin_w = bw;
 			tile->bin_h = bh;
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -531,6 +531,7 @@ fd_screen_create(struct fd_device *dev)
 	case 220:
 		fd2_screen_init(pscreen);
 		break;
+	case 307:
 	case 320:
 	case 330:
 		fd3_screen_init(pscreen);
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -1270,18 +1270,20 @@ Converter::srcToSym(tgsi::Instruction::SrcRegister src, int c)
 {
   const int swz = src.getSwizzle(c);

+   /* TODO: Use Array ID when it's available for the index */
   return makeSym(src.getFile(),
                  src.is2D() ? src.getIndex(1) : 0,
-                  src.isIndirect(0) ? -1 : src.getIndex(0), swz,
+                  src.getIndex(0), swz,
                  src.getIndex(0) * 16 + swz * 4);
 }

 Symbol *
 Converter::dstToSym(tgsi::Instruction::DstRegister dst, int c)
 {
+   /* TODO: Use Array ID when it's available for the index */
   return makeSym(dst.getFile(),
                  dst.is2D() ? dst.getIndex(1) : 0,
-                  dst.isIndirect(0) ? -1 : dst.getIndex(0), c,
+                  dst.getIndex(0), c,
                  dst.getIndex(0) * 16 + c * 4);
 }

--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
@@ -240,6 +240,7 @@ GM107LoweringPass::visit(Instruction *i)
            Value *ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
                                    i->getIndirect(0, 0), bld.mkImm(4));
            i->setIndirect(0, 0, ptr);
+            i->op = OP_VFETCH;
         } else {
            i->op = OP_VFETCH;
            assert(prog->getType() != Program::TYPE_FRAGMENT); // INTERP
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -77,8 +77,7 @@ void
 NVC0LegalizeSSA::handleFTZ(Instruction *i)
 {
   // Only want to flush float inputs
-   if (i->sType != TYPE_F32)
-      return;
+   assert(i->sType == TYPE_F32);

   // If we're already flushing denorms (and NaN's) to zero, no need for this.
   if (i->dnz)
@@ -106,7 +105,7 @@ NVC0LegalizeSSA::visit(BasicBlock *bb)
   Instruction *next;
   for (Instruction *i = bb->getEntry(); i; i = next) {
      next = i->next;
-      if (i->dType == TYPE_F32) {
+      if (i->sType == TYPE_F32) {
         if (prog->getType() != Program::TYPE_COMPUTE)
            handleFTZ(i);
         continue;
@@ -1715,6 +1714,7 @@ NVC0LoweringPass::visit(Instruction *i)
            Value *ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
                                    i->getIndirect(0, 0), bld.mkImm(4));
            i->setIndirect(0, 0, ptr);
+            i->op = OP_VFETCH;
         } else {
            i->op = OP_VFETCH;
            assert(prog->getType() != Program::TYPE_FRAGMENT); // INTERP
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -207,6 +207,9 @@ LoadPropagation::visit(BasicBlock *bb)
      if (i->op == OP_CALL) // calls have args as sources, they must be in regs
         continue;

+      if (i->op == OP_PFETCH) // pfetch expects arg1 to be a reg
+         continue;
+
      if (i->srcExists(1))
         checkSwapSrc01(i);

@@ -545,6 +548,11 @@ ConstantFolding::expr(Instruction *i,
   case OP_POPCNT:
      res.data.u32 = util_bitcount(a->data.u32 & b->data.u32);
      break;
+   case OP_PFETCH:
+      // The two arguments to pfetch are logically added together. Normally
+      // the second argument will not be constant, but that can happen.
+      res.data.u32 = a->data.u32 + b->data.u32;
+      break;
   default:
      return;
   }
@@ -559,7 +567,9 @@ ConstantFolding::expr(Instruction *i,

   i->getSrc(0)->reg.data = res.data;

-   if (i->op == OP_MAD || i->op == OP_FMA) {
+   switch (i->op) {
+   case OP_MAD:
+   case OP_FMA: {
      i->op = OP_ADD;

      i->setSrc(1, i->getSrc(0));
@@ -574,8 +584,14 @@ ConstantFolding::expr(Instruction *i,
         bld.setPosition(i, false);
         i->setSrc(1, bld.loadImm(NULL, res.data.u32));
      }
-   } else {
+      break;
+   }
+   case OP_PFETCH:
+      // Leave PFETCH alone... we just folded its 2 args into 1.
+      break;
+   default:
      i->op = i->saturate ? OP_SAT : OP_MOV; /* SAT handled by unary() */
+      break;
   }
   i->subOp = 0;
 }
@@ -676,6 +692,7 @@ ConstantFolding::tryCollapseChainedMULs(Instruction *mul2,
            mul1->setSrc(s1, bld.loadImm(NULL, f * imm1.reg.data.f32));
            mul1->src(s1).mod = Modifier(0);
            mul2->def(0).replace(mul1->getDef(0), false);
+            mul1->saturate = mul2->saturate;
         } else
         if (prog->getTarget()->isPostMultiplySupported(OP_MUL, f, e)) {
            // c = mul a, b
@@ -684,8 +701,8 @@ ConstantFolding::tryCollapseChainedMULs(Instruction *mul2,
            mul2->def(0).replace(mul1->getDef(0), false);
            if (f < 0)
               mul1->src(0).mod *= Modifier(NV50_IR_MOD_NEG);
+            mul1->saturate = mul2->saturate;
         }
-         mul1->saturate = mul2->saturate;
         return;
      }
   }
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
@@ -84,7 +84,7 @@ static const struct opProperties _initProps[] =
   //           neg  abs  not  sat  c[]  s[], a[], imm
   { OP_ADD,    0x3, 0x0, 0x0, 0x8, 0x2, 0x1, 0x1, 0x2 },
   { OP_SUB,    0x3, 0x0, 0x0, 0x8, 0x2, 0x1, 0x1, 0x2 },
-   { OP_MUL,    0x3, 0x0, 0x0, 0x8, 0x2, 0x1, 0x1, 0x2 },
+   { OP_MUL,    0x3, 0x0, 0x0, 0x0, 0x2, 0x1, 0x1, 0x2 },
   { OP_MAX,    0x3, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 },
   { OP_MIN,    0x3, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 },
   { OP_MAD,    0x7, 0x0, 0x0, 0x8, 0x6, 0x1, 0x1, 0x0 }, // special constraint
@@ -188,6 +188,9 @@ void TargetNV50::initOpInfo()
      if (prop->mSat & 8)
         opInfo[prop->op].dstMods = NV50_IR_MOD_SAT;
   }
+
+   if (chipset >= 0xa0)
+      opInfo[OP_MUL].dstMods = NV50_IR_MOD_SAT;
 }

 unsigned int
--- a/src/gallium/drivers/nouveau/nv30/nv30_clear.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_clear.c
@@ -58,7 +58,7 @@ nv30_clear(struct pipe_context *pipe, unsigned buffers,
   struct pipe_framebuffer_state *fb = &nv30->framebuffer;
   uint32_t colr = 0, zeta = 0, mode = 0;

-   if (!nv30_state_validate(nv30, TRUE))
+   if (!nv30_state_validate(nv30, NV30_NEW_FRAMEBUFFER | NV30_NEW_SCISSOR, TRUE))
      return;

   if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) {
--- a/src/gallium/drivers/nouveau/nv30/nv30_context.h
+++ b/src/gallium/drivers/nouveau/nv30/nv30_context.h
@@ -204,7 +204,7 @@ void
 nv30_render_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info);

 boolean
-nv30_state_validate(struct nv30_context *nv30, boolean hwtnl);
+nv30_state_validate(struct nv30_context *nv30, uint32_t mask, boolean hwtnl);

 void
 nv30_state_release(struct nv30_context *nv30);
--- a/src/gallium/drivers/nouveau/nv30/nv30_draw.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_draw.c
@@ -71,12 +71,12 @@ nv30_render_allocate_vertices(struct vbuf_render *render,
   struct nv30_render *r = nv30_render(render);
   struct nv30_context *nv30 = r->nv30;

-   r->length = vertex_size * nr_vertices;
+   r->length = (uint32_t)vertex_size * (uint32_t)nr_vertices;

   if (r->offset + r->length >= render->max_vertex_buffer_bytes) {
      pipe_resource_reference(&r->buffer, NULL);
      r->buffer = pipe_buffer_create(&nv30->screen->base.base,
-                                     PIPE_BIND_VERTEX_BUFFER, 0,
+                                     PIPE_BIND_VERTEX_BUFFER, PIPE_USAGE_STREAM,
                                     render->max_vertex_buffer_bytes);
      if (!r->buffer)
         return FALSE;
@@ -91,10 +91,14 @@ static void *
 nv30_render_map_vertices(struct vbuf_render *render)
 {
   struct nv30_render *r = nv30_render(render);
-   char *map = pipe_buffer_map(&r->nv30->base.pipe, r->buffer,
-                               PIPE_TRANSFER_WRITE |
-                               PIPE_TRANSFER_UNSYNCHRONIZED, &r->transfer);
-   return map + r->offset;
+   char *map = pipe_buffer_map_range(
+         &r->nv30->base.pipe, r->buffer,
+         r->offset, r->length,
+         PIPE_TRANSFER_WRITE |
+         PIPE_TRANSFER_DISCARD_RANGE,
+         &r->transfer);
+   assert(map);
+   return map;
 }

 static void
@@ -103,6 +107,7 @@ nv30_render_unmap_vertices(struct vbuf_render *render,
 {
   struct nv30_render *r = nv30_render(render);
   pipe_buffer_unmap(&r->nv30->base.pipe, r->transfer);
+   r->transfer = NULL;
 }

 static void
@@ -126,10 +131,10 @@ nv30_render_draw_elements(struct vbuf_render *render,
   for (i = 0; i < r->vertex_info.num_attribs; i++) {
      PUSH_RESRC(push, NV30_3D(VTXBUF(i)), BUFCTX_VTXTMP,
                       nv04_resource(r->buffer), r->offset + r->vtxptr[i],
-                       NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, 0);
+                       NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, NV30_3D_VTXBUF_DMA1);
   }

-   if (!nv30_state_validate(nv30, FALSE))
+   if (!nv30_state_validate(nv30, ~0, FALSE))
      return;

   BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1);
@@ -171,10 +176,10 @@ nv30_render_draw_arrays(struct vbuf_render *render, unsigned start, uint nr)
   for (i = 0; i < r->vertex_info.num_attribs; i++) {
      PUSH_RESRC(push, NV30_3D(VTXBUF(i)), BUFCTX_VTXTMP,
                       nv04_resource(r->buffer), r->offset + r->vtxptr[i],
-                       NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, 0);
+                       NOUVEAU_BO_LOW | NOUVEAU_BO_RD, 0, NV30_3D_VTXBUF_DMA1);
   }

-   if (!nv30_state_validate(nv30, FALSE))
+   if (!nv30_state_validate(nv30, ~0, FALSE))
      return;

   BEGIN_NV04(push, NV30_3D(VERTEX_BEGIN_END), 1);
@@ -213,22 +218,24 @@ static const struct {
   [TGSI_SEMANTIC_BCOLOR  ] = { EMIT_4F, INTERP_LINEAR     , 1, 3, 0x00000004 },
   [TGSI_SEMANTIC_FOG     ] = { EMIT_4F, INTERP_PERSPECTIVE, 5, 5, 0x00000010 },
   [TGSI_SEMANTIC_PSIZE   ] = { EMIT_1F_PSIZE, INTERP_POS  , 6, 6, 0x00000020 },
-   [TGSI_SEMANTIC_GENERIC ] = { EMIT_4F, INTERP_PERSPECTIVE, 8, 7, 0x00004000 }
+   [TGSI_SEMANTIC_TEXCOORD] = { EMIT_4F, INTERP_PERSPECTIVE, 8, 7, 0x00004000 },
 };

 static boolean
 vroute_add(struct nv30_render *r, uint attrib, uint sem, uint *idx)
 {
-   struct pipe_screen *pscreen = &r->nv30->screen->base.base;
+   struct nv30_screen *screen = r->nv30->screen;
   struct nv30_fragprog *fp = r->nv30->fragprog.program;
   struct vertex_info *vinfo = &r->vertex_info;
   enum pipe_format format;
   uint emit = EMIT_OMIT;
   uint result = *idx;

-   if (sem == TGSI_SEMANTIC_GENERIC && result >= 8) {
-      for (result = 0; result < 8; result++) {
-         if (fp->texcoord[result] == *idx) {
+   if (sem == TGSI_SEMANTIC_GENERIC) {
+      uint num_texcoords = (screen->eng3d->oclass < NV40_3D_CLASS) ? 8 : 10;
+      for (result = 0; result < num_texcoords; result++) {
+         if (fp->texcoord[result] == *idx + 8) {
+            sem = TGSI_SEMANTIC_TEXCOORD;
            emit = vroute[sem].emit;
            break;
         }
@@ -243,11 +250,11 @@ vroute_add(struct nv30_render *r, uint attrib, uint sem, uint *idx)
   draw_emit_vertex_attr(vinfo, emit, vroute[sem].interp, attrib);
   format = draw_translate_vinfo_format(emit);

-   r->vtxfmt[attrib] = nv30_vtxfmt(pscreen, format)->hw;
-   r->vtxptr[attrib] = vinfo->size | NV30_3D_VTXBUF_DMA1;
+   r->vtxfmt[attrib] = nv30_vtxfmt(&screen->base.base, format)->hw;
+   r->vtxptr[attrib] = vinfo->size;
   vinfo->size += draw_translate_vinfo_size(emit);

-   if (nv30_screen(pscreen)->eng3d->oclass < NV40_3D_CLASS) {
+   if (screen->eng3d->oclass < NV40_3D_CLASS) {
      r->vtxprog[attrib][0] = 0x001f38d8;
      r->vtxprog[attrib][1] = 0x0080001b | (attrib << 9);
      r->vtxprog[attrib][2] = 0x0836106c;
@@ -259,7 +266,12 @@ vroute_add(struct nv30_render *r, uint attrib, uint sem, uint *idx)
      r->vtxprog[attrib][3] = 0x6041ff80 | (result + vroute[sem].vp40) << 2;
   }

-   *idx = vroute[sem].ow40 << result;
+   if (result < 8)
+      *idx = vroute[sem].ow40 << result;
+   else {
+      assert(sem == TGSI_SEMANTIC_TEXCOORD);
+      *idx = 0x00001000 << (result - 8);
+   }
   return TRUE;
 }

@@ -313,7 +325,7 @@ nv30_render_validate(struct nv30_context *nv30)

   while (pntc && attrib < 16) {
      uint index = ffs(pntc) - 1; pntc &= ~(1 << index);
-      if (vroute_add(r, attrib, TGSI_SEMANTIC_GENERIC, &index)) {
+      if (vroute_add(r, attrib, TGSI_SEMANTIC_TEXCOORD, &index)) {
         vp_attribs |= (1 << attrib++);
         vp_results |= index;
      }
@@ -398,17 +410,17 @@ nv30_render_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
      if (nv30->vertprog.constbuf) {
         void *map = nv04_resource(nv30->vertprog.constbuf)->data;
         draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0,
-                                         map, nv30->vertprog.constbuf_nr);
+                                         map, nv30->vertprog.constbuf_nr * 16);
+      } else {
+         draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX, 0, NULL, 0);
      }
   }

   for (i = 0; i < nv30->num_vtxbufs; i++) {
      const void *map = nv30->vtxbuf[i].user_buffer;
      if (!map) {
-         if (!nv30->vtxbuf[i].buffer) {
-            continue;
-         }
-         map = pipe_buffer_map(pipe, nv30->vtxbuf[i].buffer,
+         if (nv30->vtxbuf[i].buffer)
+            map = pipe_buffer_map(pipe, nv30->vtxbuf[i].buffer,
                                  PIPE_TRANSFER_UNSYNCHRONIZED |
                                  PIPE_TRANSFER_READ, &transfer[i]);
      }
@@ -418,9 +430,9 @@ nv30_render_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
   if (info->indexed) {
      const void *map = nv30->idxbuf.user_buffer;
      if (!map)
-         pipe_buffer_map(pipe, nv30->idxbuf.buffer,
-                                  PIPE_TRANSFER_UNSYNCHRONIZED |
-                                  PIPE_TRANSFER_READ, &transferi);
+         map = pipe_buffer_map(pipe, nv30->idxbuf.buffer,
+                               PIPE_TRANSFER_UNSYNCHRONIZED |
+                               PIPE_TRANSFER_READ, &transferi);
      draw_set_indexes(draw,
                       (ubyte *) map + nv30->idxbuf.offset,
                       nv30->idxbuf.index_size, ~0);
@@ -444,6 +456,12 @@ nv30_render_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
 static void
 nv30_render_destroy(struct vbuf_render *render)
 {
+   struct nv30_render *r = nv30_render(render);
+
+   if (r->transfer)
+      pipe_buffer_unmap(&r->nv30->base.pipe, r->transfer);
+   pipe_resource_reference(&r->buffer, NULL);
+   nouveau_heap_free(&r->vertprog);
   FREE(render);
 }

--- a/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_fragprog.c
@@ -23,6 +23,7 @@
 *
 */

+#include "draw/draw_context.h"
 #include "tgsi/tgsi_parse.h"

 #include "nv_object.xml.h"
@@ -147,8 +148,12 @@ nv30_fp_state_delete(struct pipe_context *pipe, void *hwcso)

   pipe_resource_reference(&fp->buffer, NULL);

+   if (fp->draw)
+      draw_delete_fragment_shader(nv30_context(pipe)->draw, fp->draw);
+
   FREE((void *)fp->pipe.tokens);
   FREE(fp->insn);
+   FREE(fp->consts);
   FREE(fp);
 }

--- a/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_state_validate.c
@@ -272,15 +272,13 @@ nv30_validate_clip(struct nv30_context *nv30)
   uint32_t clpd_enable = 0;

   for (i = 0; i < 6; i++) {
-      if (nv30->rast->pipe.clip_plane_enable & (1 << i)) {
-         if (nv30->dirty & NV30_NEW_CLIP) {
-            BEGIN_NV04(push, NV30_3D(VP_UPLOAD_CONST_ID), 5);
-            PUSH_DATA (push, i);
-            PUSH_DATAp(push, nv30->clip.ucp[i], 4);
-         }
-
-         clpd_enable |= 1 << (1 + 4*i);
+      if (nv30->dirty & NV30_NEW_CLIP) {
+         BEGIN_NV04(push, NV30_3D(VP_UPLOAD_CONST_ID), 5);
+         PUSH_DATA (push, i);
+         PUSH_DATAp(push, nv30->clip.ucp[i], 4);
      }
+      if (nv30->rast->pipe.clip_plane_enable & (1 << i))
+         clpd_enable |= 2 << (4*i);
   }

   BEGIN_NV04(push, NV30_3D(VP_CLIP_PLANES_ENABLE), 1);
@@ -389,7 +387,7 @@ static struct state_validate hwtnl_validate_list[] = {
    { nv30_validate_stipple,       NV30_NEW_STIPPLE },
    { nv30_validate_scissor,       NV30_NEW_SCISSOR | NV30_NEW_RASTERIZER },
    { nv30_validate_viewport,      NV30_NEW_VIEWPORT },
-    { nv30_validate_clip,          NV30_NEW_CLIP },
+    { nv30_validate_clip,          NV30_NEW_CLIP | NV30_NEW_RASTERIZER },
    { nv30_fragprog_validate,      NV30_NEW_FRAGPROG | NV30_NEW_FRAGCONST },
    { nv30_vertprog_validate,      NV30_NEW_VERTPROG | NV30_NEW_VERTCONST |
                                   NV30_NEW_FRAGPROG | NV30_NEW_RASTERIZER },
@@ -456,7 +454,7 @@ nv30_state_context_switch(struct nv30_context *nv30)
 }

 boolean
-nv30_state_validate(struct nv30_context *nv30, boolean hwtnl)
+nv30_state_validate(struct nv30_context *nv30, uint32_t mask, boolean hwtnl)
 {
   struct nouveau_screen *screen = &nv30->screen->base;
   struct nouveau_pushbuf *push = nv30->base.pushbuf;
@@ -481,14 +479,16 @@ nv30_state_validate(struct nv30_context *nv30, boolean hwtnl)
   else
      validate = swtnl_validate_list;

-   if (nv30->dirty) {
+   mask &= nv30->dirty;
+
+   if (mask) {
      while (validate->func) {
-         if (nv30->dirty & validate->mask)
+         if (mask & validate->mask)
            validate->func(nv30);
         validate++;
      }

-      nv30->dirty = 0;
+      nv30->dirty &= ~mask;
   }

   nouveau_pushbuf_bufctx(push, bctx);
--- a/src/gallium/drivers/nouveau/nv30/nv30_vbo.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_vbo.c
@@ -564,7 +564,7 @@ nv30_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
   if (nv30->vbo_user && !(nv30->dirty & (NV30_NEW_VERTEX | NV30_NEW_ARRAYS)))
      nv30_update_user_vbufs(nv30);

-   nv30_state_validate(nv30, TRUE);
+   nv30_state_validate(nv30, ~0, TRUE);
   if (nv30->draw_flags) {
      nv30_render_vbo(pipe, info);
      return;
--- a/src/gallium/drivers/nouveau/nv30/nv30_vertprog.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_vertprog.c
@@ -23,6 +23,7 @@
 *
 */

+#include "draw/draw_context.h"
 #include "util/u_dynarray.h"
 #include "tgsi/tgsi_parse.h"

@@ -237,6 +238,10 @@ nv30_vp_state_delete(struct pipe_context *pipe, void *hwcso)

   if (vp->translated)
      nv30_vertprog_destroy(vp);
+
+   if (vp->draw)
+      draw_delete_vertex_shader(nv30_context(pipe)->draw, vp->draw);
+
   FREE((void *)vp->pipe.tokens);
   FREE(vp);
 }
--- a/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c
+++ b/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c
@@ -873,9 +873,8 @@ nvfx_vertprog_parse_decl_output(struct nvfx_vpc *vpc,
      }
      break;
   case TGSI_SEMANTIC_EDGEFLAG:
-      /* not really an error just a fallback */
-      NOUVEAU_ERR("cannot handle edgeflag output\n");
-      return FALSE;
+      vpc->r_result[idx] = nvfx_reg(NVFXSR_NONE, 0);
+      return TRUE;
   default:
      NOUVEAU_ERR("bad output semantic\n");
      return FALSE;
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c
@@ -138,8 +138,11 @@ nv50_destroy(struct pipe_context *pipe)
 {
   struct nv50_context *nv50 = nv50_context(pipe);

-   if (nv50_context_screen(nv50)->cur_ctx == nv50)
-      nv50_context_screen(nv50)->cur_ctx = NULL;
+   if (nv50->screen->cur_ctx == nv50) {
+      nv50->screen->cur_ctx = NULL;
+      /* Save off the state in case another context gets created */
+      nv50->screen->save_state = nv50->state;
+   }
   nouveau_pushbuf_bufctx(nv50->base.pushbuf, NULL);
   nouveau_pushbuf_kick(nv50->base.pushbuf, nv50->base.pushbuf->channel);

@@ -290,6 +293,10 @@ nv50_create(struct pipe_screen *pscreen, void *priv)
   pipe->get_sample_position = nv50_context_get_sample_position;

   if (!screen->cur_ctx) {
+      /* Restore the last context's state here, normally handled during
+       * context switch
+       */
+      nv50->state = screen->save_state;
      screen->cur_ctx = nv50;
      nouveau_pushbuf_bufctx(screen->base.pushbuf, nv50->bufctx);
   }
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h
@@ -104,28 +104,7 @@ struct nv50_context {
   uint32_t dirty;
   boolean cb_dirty;

-   struct {
-      uint32_t instance_elts; /* bitmask of per-instance elements */
-      uint32_t instance_base;
-      uint32_t interpolant_ctrl;
-      uint32_t semantic_color;
-      uint32_t semantic_psize;
-      int32_t index_bias;
-      boolean uniform_buffer_bound[3];
-      boolean prim_restart;
-      boolean point_sprite;
-      boolean rt_serialize;
-      boolean flushed;
-      boolean rasterizer_discard;
-      uint8_t tls_required;
-      boolean new_tls_space;
-      uint8_t num_vtxbufs;
-      uint8_t num_vtxelts;
-      uint8_t num_textures[3];
-      uint8_t num_samplers[3];
-      uint8_t prim_size;
-      uint16_t scissor;
-   } state;
+   struct nv50_graph_state state;

   struct nv50_blend_stateobj *blend;
   struct nv50_rasterizer_stateobj *rast;
@@ -191,12 +170,6 @@ nv50_context(struct pipe_context *pipe)
   return (struct nv50_context *)pipe;
 }

-static INLINE struct nv50_screen *
-nv50_context_screen(struct nv50_context *nv50)
-{
-   return nv50_screen(&nv50->base.screen->base);
-}
-
 /* return index used in nv50_context arrays for a specific shader type */
 static INLINE unsigned
 nv50_context_shader_stage(unsigned pipe)
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
@@ -25,10 +25,34 @@ struct nv50_context;

 struct nv50_blitter;

+struct nv50_graph_state {
+   uint32_t instance_elts; /* bitmask of per-instance elements */
+   uint32_t instance_base;
+   uint32_t interpolant_ctrl;
+   uint32_t semantic_color;
+   uint32_t semantic_psize;
+   int32_t index_bias;
+   boolean uniform_buffer_bound[3];
+   boolean prim_restart;
+   boolean point_sprite;
+   boolean rt_serialize;
+   boolean flushed;
+   boolean rasterizer_discard;
+   uint8_t tls_required;
+   boolean new_tls_space;
+   uint8_t num_vtxbufs;
+   uint8_t num_vtxelts;
+   uint8_t num_textures[3];
+   uint8_t num_samplers[3];
+   uint8_t prim_size;
+   uint16_t scissor;
+};
+
 struct nv50_screen {
   struct nouveau_screen base;

   struct nv50_context *cur_ctx;
+   struct nv50_graph_state save_state;

   struct nouveau_bo *code;
   struct nouveau_bo *uniforms;
--- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
@@ -394,6 +394,8 @@ nv50_switch_pipe_context(struct nv50_context *ctx_to)

   if (ctx_from)
      ctx_to->state = ctx_from->state;
+   else
+      ctx_to->state = ctx_to->screen->save_state;

   ctx_to->dirty = ~0;
   ctx_to->viewports_dirty = ~0;
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
@@ -139,8 +139,12 @@ nvc0_destroy(struct pipe_context *pipe)
 {
   struct nvc0_context *nvc0 = nvc0_context(pipe);

-   if (nvc0->screen->cur_ctx == nvc0)
+   if (nvc0->screen->cur_ctx == nvc0) {
      nvc0->screen->cur_ctx = NULL;
+      nvc0->screen->save_state = nvc0->state;
+      nvc0->screen->save_state.tfb = NULL;
+   }
+
   /* Unset bufctx, we don't want to revalidate any resources after the flush.
    * Other contexts will always set their bufctx again on action calls.
    */
@@ -303,6 +307,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv)
   pipe->get_sample_position = nvc0_context_get_sample_position;

   if (!screen->cur_ctx) {
+      nvc0->state = screen->save_state;
      screen->cur_ctx = nvc0;
      nouveau_pushbuf_bufctx(screen->base.pushbuf, nvc0->bufctx);
   }
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -113,29 +113,7 @@ struct nvc0_context {
   uint32_t dirty;
   uint32_t dirty_cp; /* dirty flags for compute state */

-   struct {
-      boolean flushed;
-      boolean rasterizer_discard;
-      boolean early_z_forced;
-      boolean prim_restart;
-      uint32_t instance_elts; /* bitmask of per-instance elements */
-      uint32_t instance_base;
-      uint32_t constant_vbos;
-      uint32_t constant_elts;
-      int32_t index_bias;
-      uint16_t scissor;
-      uint8_t vbo_mode; /* 0 = normal, 1 = translate, 3 = translate, forced */
-      uint8_t num_vtxbufs;
-      uint8_t num_vtxelts;
-      uint8_t num_textures[6];
-      uint8_t num_samplers[6];
-      uint8_t tls_required; /* bitmask of shader types using l[] */
-      uint8_t c14_bound; /* whether immediate array constbuf is bound */
-      uint8_t clip_enable;
-      uint32_t clip_mode;
-      uint32_t uniform_buffer_bound[5];
-      struct nvc0_transform_feedback_state *tfb;
-   } state;
+   struct nvc0_graph_state state;

   struct nvc0_blend_stateobj *blend;
   struct nvc0_rasterizer_stateobj *rast;
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -392,7 +392,7 @@ nvc0_gp_gen_header(struct nvc0_program *gp, struct nv50_ir_prog_info *info)
      break;
   }

-   gp->hdr[4] = info->prop.gp.maxVertices & 0x1ff;
+   gp->hdr[4] = MIN2(info->prop.gp.maxVertices, 1024);

   return nvc0_vtgp_gen_header(gp, info);
 }
@@ -683,11 +683,12 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
   ret = nouveau_heap_alloc(screen->text_heap, size, prog, &prog->mem);
   if (ret) {
      struct nouveau_heap *heap = screen->text_heap;
-      struct nouveau_heap *iter;
-      for (iter = heap; iter && iter->next != heap; iter = iter->next) {
-         struct nvc0_program *evict = iter->priv;
-         if (evict)
-            nouveau_heap_free(&evict->mem);
+      /* Note that the code library, which is allocated before anything else,
+       * does not have a priv pointer. We can stop once we hit it.
+       */
+      while (heap->next && heap->next->priv) {
+         struct nvc0_program *evict = heap->next->priv;
+         nouveau_heap_free(&evict->mem);
      }
      debug_printf("WARNING: out of code space, evicting all shaders.\n");
      ret = nouveau_heap_alloc(heap, size, prog, &prog->mem);
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
@@ -27,10 +27,35 @@ struct nvc0_context;

 struct nvc0_blitter;

+struct nvc0_graph_state {
+   boolean flushed;
+   boolean rasterizer_discard;
+   boolean early_z_forced;
+   boolean prim_restart;
+   uint32_t instance_elts; /* bitmask of per-instance elements */
+   uint32_t instance_base;
+   uint32_t constant_vbos;
+   uint32_t constant_elts;
+   int32_t index_bias;
+   uint16_t scissor;
+   uint8_t vbo_mode; /* 0 = normal, 1 = translate, 3 = translate, forced */
+   uint8_t num_vtxbufs;
+   uint8_t num_vtxelts;
+   uint8_t num_textures[6];
+   uint8_t num_samplers[6];
+   uint8_t tls_required; /* bitmask of shader types using l[] */
+   uint8_t c14_bound; /* whether immediate array constbuf is bound */
+   uint8_t clip_enable;
+   uint32_t clip_mode;
+   uint32_t uniform_buffer_bound[5];
+   struct nvc0_transform_feedback_state *tfb;
+};
+
 struct nvc0_screen {
   struct nouveau_screen base;

   struct nvc0_context *cur_ctx;
+   struct nvc0_graph_state save_state;

   int num_occlusion_queries_active;

--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -543,6 +543,8 @@ nvc0_switch_pipe_context(struct nvc0_context *ctx_to)

   if (ctx_from)
      ctx_to->state = ctx_from->state;
+   else
+      ctx_to->state = ctx_to->screen->save_state;

   ctx_to->dirty = ~0;
   ctx_to->viewports_dirty = ~0;
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -1152,6 +1152,12 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
                      NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32 |
                      NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST);
   }
+   if (nvc0->state.instance_elts) {
+      nvc0->state.instance_elts = 0;
+      BEGIN_NVC0(push, NVC0_3D(MACRO_VERTEX_ARRAY_PER_INSTANCE), 2);
+      PUSH_DATA (push, n);
+      PUSH_DATA (push, 0);
+   }
   nvc0->state.num_vtxelts = 2;

   for (i = 0; i < info->dst.box.depth; ++i, z += dz) {
--- a/src/gallium/state_trackers/clover/core/event.cpp
+++ b/src/gallium/state_trackers/clover/core/event.cpp
@@ -27,7 +27,7 @@ using namespace clover;

 event::event(clover::context &ctx, const ref_vector<event> &deps,
             action action_ok, action action_fail) :
-   context(ctx), _status(0), wait_count(1),
+   context(ctx), wait_count(1), _status(0),
   action_ok(action_ok), action_fail(action_fail) {
   for (auto &ev : deps)
      ev.chain(*this);
@@ -36,36 +36,69 @@ event::event(clover::context &ctx, const ref_vector<event> &deps,
 event::~event() {
 }

+std::vector<intrusive_ref<event>>
+event::trigger_self() {
+   std::lock_guard<std::mutex> lock(mutex);
+   std::vector<intrusive_ref<event>> evs;
+
+   if (!--wait_count)
+      std::swap(_chain, evs);
+
+   return evs;
+}
+
 void
 event::trigger() {
-   if (!--wait_count) {
-      action_ok(*this);
+   auto evs = trigger_self();

-      while (!_chain.empty()) {
-         _chain.back()().trigger();
-         _chain.pop_back();
-      }
+   if (signalled()) {
+      action_ok(*this);
+      cv.notify_all();
   }
+
+   for (event &ev : evs)
+      ev.trigger();
+}
+
+std::vector<intrusive_ref<event>>
+event::abort_self(cl_int status) {
+   std::lock_guard<std::mutex> lock(mutex);
+   std::vector<intrusive_ref<event>> evs;
+
+   _status = status;
+   std::swap(_chain, evs);
+
+   return evs;
 }

 void
 event::abort(cl_int status) {
-   _status = status;
+   auto evs = abort_self(status);
+
   action_fail(*this);

-   while (!_chain.empty()) {
-      _chain.back()().abort(status);
-      _chain.pop_back();
-   }
+   for (event &ev : evs)
+      ev.abort(status);
 }

 bool
 event::signalled() const {
+   std::lock_guard<std::mutex> lock(mutex);
   return !wait_count;
 }

+cl_int
+event::status() const {
+   std::lock_guard<std::mutex> lock(mutex);
+   return _status;
+}
+
 void
 event::chain(event &ev) {
+   std::unique_lock<std::mutex> lock(mutex, std::defer_lock);
+   std::unique_lock<std::mutex> lock_ev(ev.mutex, std::defer_lock);
+   std::lock(lock, lock_ev);
+
   if (wait_count) {
      ev.wait_count++;
      _chain.push_back(ev);
@@ -73,6 +106,15 @@ event::chain(event &ev) {
   ev.deps.push_back(*this);
 }

+void
+event::wait() const {
+   for (event &ev : deps)
+      ev.wait();
+
+   std::unique_lock<std::mutex> lock(mutex);
+   cv.wait(lock, [=]{ return !wait_count; });
+}
+
 hard_event::hard_event(command_queue &q, cl_command_type command,
                       const ref_vector<event> &deps, action action) :
   event(q.context(), deps, profile(q, action), [](event &ev){}),
@@ -93,8 +135,8 @@ cl_int
 hard_event::status() const {
   pipe_screen *screen = queue()->device().pipe;

-   if (_status < 0)
-      return _status;
+   if (event::status() < 0)
+      return event::status();

   else if (!_fence)
      return CL_QUEUED;
@@ -120,6 +162,8 @@ void
 hard_event::wait() const {
   pipe_screen *screen = queue()->device().pipe;

+   event::wait();
+
   if (status() == CL_QUEUED)
      queue()->flush();

@@ -182,8 +226,8 @@ soft_event::soft_event(clover::context &ctx, const ref_vector<event> &deps,

 cl_int
 soft_event::status() const {
-   if (_status < 0)
-      return _status;
+   if (event::status() < 0)
+      return event::status();

   else if (!signalled() ||
            any_of([](const event &ev) {
@@ -207,8 +251,7 @@ soft_event::command() const {

 void
 soft_event::wait() const {
-   for (event &ev : deps)
-      ev.wait();
+   event::wait();

   if (status() != CL_COMPLETE)
      throw error(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST);
--- a/src/gallium/state_trackers/clover/core/event.hpp
+++ b/src/gallium/state_trackers/clover/core/event.hpp
@@ -23,6 +23,7 @@
 #ifndef CLOVER_CORE_EVENT_HPP
 #define CLOVER_CORE_EVENT_HPP

+#include <condition_variable>
 #include <functional>

 #include "core/object.hpp"
@@ -65,24 +66,29 @@ namespace clover {
      void abort(cl_int status);
      bool signalled() const;

-      virtual cl_int status() const = 0;
+      virtual cl_int status() const;
      virtual command_queue *queue() const = 0;
      virtual cl_command_type command() const = 0;
-      virtual void wait() const = 0;
+      virtual void wait() const;

      const intrusive_ref<clover::context> context;

   protected:
      void chain(event &ev);

-      cl_int _status;
      std::vector<intrusive_ref<event>> deps;

   private:
+      std::vector<intrusive_ref<event>> trigger_self();
+      std::vector<intrusive_ref<event>> abort_self(cl_int status);
+
      unsigned wait_count;
+      cl_int _status;
      action action_ok;
      action action_fail;
      std::vector<intrusive_ref<event>> _chain;
+      mutable std::condition_variable cv;
+      mutable std::mutex mutex;
   };

   ///
--- a/src/gallium/state_trackers/clover/core/queue.cpp
+++ b/src/gallium/state_trackers/clover/core/queue.cpp
@@ -44,6 +44,7 @@ command_queue::flush() {
   pipe_screen *screen = device().pipe;
   pipe_fence_handle *fence = NULL;

+   std::lock_guard<std::mutex> lock(queued_events_mutex);
   if (!queued_events.empty()) {
      pipe->flush(pipe, &fence, 0);

@@ -69,6 +70,7 @@ command_queue::profiling_enabled() const {

 void
 command_queue::sequence(hard_event &ev) {
+   std::lock_guard<std::mutex> lock(queued_events_mutex);
   if (!queued_events.empty())
      queued_events.back()().chain(ev);

--- a/src/gallium/state_trackers/clover/core/queue.hpp
+++ b/src/gallium/state_trackers/clover/core/queue.hpp
@@ -24,6 +24,7 @@
 #define CLOVER_CORE_QUEUE_HPP

 #include <deque>
+#include <mutex>

 #include "core/object.hpp"
 #include "core/context.hpp"
@@ -69,6 +70,7 @@ namespace clover {

      cl_command_queue_properties props;
      pipe_context *pipe;
+      std::mutex queued_events_mutex;
      std::deque<intrusive_ref<hard_event>> queued_events;
   };
 }
--- a/src/gallium/state_trackers/dri/dri_context.c
+++ b/src/gallium/state_trackers/dri/dri_context.c
@@ -233,11 +233,10 @@ dri_make_current(__DRIcontext * cPriv,

   ctx->stapi->make_current(ctx->stapi, ctx->st, &draw->base, &read->base);

-   // This is ok to call here. If they are already init, it's a no-op.
-   if (draw->textures[ST_ATTACHMENT_BACK_LEFT] && draw->textures[ST_ATTACHMENT_DEPTH_STENCIL]
-      && ctx->pp)
-         pp_init_fbos(ctx->pp, draw->textures[ST_ATTACHMENT_BACK_LEFT]->width0,
-            draw->textures[ST_ATTACHMENT_BACK_LEFT]->height0);
+   /* This is ok to call here. If they are already init, it's a no-op. */
+   if (ctx->pp && draw->textures[ST_ATTACHMENT_BACK_LEFT])
+      pp_init_fbos(ctx->pp, draw->textures[ST_ATTACHMENT_BACK_LEFT]->width0,
+                   draw->textures[ST_ATTACHMENT_BACK_LEFT]->height0);

   return GL_TRUE;
 }
--- a/src/gallium/targets/osmesa/Makefile.am
+++ b/src/gallium/targets/osmesa/Makefile.am
@@ -42,7 +42,6 @@ nodist_EXTRA_lib@OSMESA_LIB@_la_SOURCES = dummy.cpp
 lib@OSMESA_LIB@_la_SOURCES = target.c

 lib@OSMESA_LIB@_la_LDFLAGS = \
-	-module \
 	-no-undefined \
 	-version-number @OSMESA_VERSION@ \
 	$(GC_SECTIONS) \
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -2570,8 +2570,11 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
            link_intrastage_shaders(mem_ctx, ctx, prog, shader_list[stage],
                                    num_shaders[stage]);

-         if (!prog->LinkStatus)
+         if (!prog->LinkStatus) {
+            if (sh)
+               ctx->Driver.DeleteShader(ctx, sh);
            goto done;
+         }

         switch (stage) {
         case MESA_SHADER_VERTEX:
@@ -2584,8 +2587,11 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
            validate_fragment_shader_executable(prog, sh);
            break;
         }
-         if (!prog->LinkStatus)
+         if (!prog->LinkStatus) {
+            if (sh)
+               ctx->Driver.DeleteShader(ctx, sh);
            goto done;
+         }

         _mesa_reference_shader(ctx, &prog->_LinkedShaders[stage], sh);
      }
--- a/src/glx/dri3_glx.c
+++ b/src/glx/dri3_glx.c
@@ -1985,6 +1985,11 @@ dri3_create_screen(int screen, struct glx_display * priv)
      goto handle_error;
   }

+   if (psc->is_different_gpu && !psc->image->blitImage) {
+      ErrorMessageF("Different GPU, but blitImage not implemented for this driver\n");
+      goto handle_error;
+   }
+
   if (!psc->is_different_gpu && (
       !psc->texBuffer || psc->texBuffer->base.version < 2 ||
       !psc->texBuffer->setTexBuffer2
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1036,7 +1036,7 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
   case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD:
      return inst->mlen;
   case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
-      return 2;
+      return inst->mlen;
   case SHADER_OPCODE_UNTYPED_ATOMIC:
   case SHADER_OPCODE_UNTYPED_SURFACE_READ:
   case SHADER_OPCODE_URB_WRITE_SIMD8:
--- a/src/mesa/drivers/dri/i965/gen8_depth_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_depth_state.c
@@ -412,6 +412,16 @@ gen8_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt,
   uint32_t surface_width  = ALIGN(mt->logical_width0,  level == 0 ? 8 : 1);
   uint32_t surface_height = ALIGN(mt->logical_height0, level == 0 ? 4 : 1);

+   /* From the documentation for 3DSTATE_WM_HZ_OP: "3DSTATE_MULTISAMPLE packet
+    * must be used prior to this packet to change the Number of Multisamples.
+    * This packet must not be used to change Number of Multisamples in a
+    * rendering sequence."
+    */
+   if (brw->num_samples != mt->num_samples) {
+      gen8_emit_3dstate_multisample(brw, mt->num_samples);
+      brw->NewGLState |= _NEW_MULTISAMPLE;
+   }
+
   /* The basic algorithm is:
    * - If needed, emit 3DSTATE_{DEPTH,HIER_DEPTH,STENCIL}_BUFFER and
    *   3DSTATE_CLEAR_PARAMS packets to set up the relevant buffers.
--- a/src/mesa/drivers/dri/i965/intel_pixel_read.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c
@@ -226,8 +226,30 @@ intelReadPixels(struct gl_context * ctx,

   if (_mesa_is_bufferobj(pack->BufferObj)) {
      if (_mesa_meta_pbo_GetTexSubImage(ctx, 2, NULL, x, y, 0, width, height, 1,
-                                        format, type, pixels, pack))
+                                        format, type, pixels, pack)) {
+         /* _mesa_meta_pbo_GetTexSubImage() implements PBO transfers by
+          * binding the user-provided BO as a fake framebuffer and rendering
+          * to it.  This breaks the invariant of the GL that nothing is able
+          * to render to a BO, causing nondeterministic corruption issues
+          * because the render cache is not coherent with a number of other
+          * caches that the BO could potentially be bound to afterwards.
+          *
+          * This could be solved in the same way that we guarantee texture
+          * coherency after a texture is attached to a framebuffer and
+          * rendered to, but that would involve checking *all* BOs bound to
+          * the pipeline for the case we need to emit a cache flush due to
+          * previous rendering to any of them -- Including vertex, index,
+          * uniform, atomic counter, shader image, transform feedback,
+          * indirect draw buffers, etc.
+          *
+          * That would increase the per-draw call overhead even though it's
+          * very unlikely that any of the BOs bound to the pipeline has been
+          * rendered to via a PBO at any point, so it seems better to just
+          * flush here unconditionally.
+          */
+         intel_batchbuffer_emit_mi_flush(brw);
         return;
+      }

      perf_debug("%s: fallback to CPU mapping in PBO case\n", __FUNCTION__);
   }
--- a/src/mesa/drivers/dri/i965/intel_tex_image.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
@@ -476,8 +476,15 @@ intel_get_tex_image(struct gl_context *ctx,
      if (_mesa_meta_pbo_GetTexSubImage(ctx, 3, texImage, 0, 0, 0,
                                        texImage->Width, texImage->Height,
                                        texImage->Depth, format, type,
-                                        pixels, &ctx->Pack))
+                                        pixels, &ctx->Pack)) {
+         /* Flush to guarantee coherency between the render cache and other
+          * caches the PBO could potentially be bound to after this point.
+          * See the related comment in intelReadPixels() for a more detailed
+          * explanation.
+          */
+         intel_batchbuffer_emit_mi_flush(brw);
         return;
+      }

      perf_debug("%s: fallback to CPU mapping in PBO case\n", __FUNCTION__);
   }
--- a/src/mesa/drivers/dri/swrast/swrast.c
+++ b/src/mesa/drivers/dri/swrast/swrast.c
@@ -60,6 +60,11 @@
 #include "swrast_priv.h"
 #include "swrast/s_context.h"

+#include <sys/types.h>
+#ifdef HAVE_SYS_SYSCTL_H
+# include <sys/sysctl.h>
+#endif
+
 const __DRIextension **__driDriverGetExtensions_swrast(void);

 const char * const swrast_vendor_string = "Mesa Project";
@@ -136,6 +141,16 @@ swrast_query_renderer_integer(__DRIscreen *psp, int param,
      value[0] = 0;
      return 0;
   case __DRI2_RENDERER_VIDEO_MEMORY: {
+      /* This should probably share code with os_get_total_physical_memory()
+       * from src/gallium/auxiliary/os/os_misc.c
+       */
+#if defined(CTL_HW) && defined(HW_MEMSIZE)
+        int mib[2] = { CTL_HW, HW_MEMSIZE };
+        unsigned long system_memory_bytes;
+        size_t len = sizeof(system_memory_bytes);
+        if (sysctl(mib, 2, &system_memory_bytes, &len, NULL, 0) != 0)
+            return -1;
+#elif defined(_SC_PHYS_PAGES) && defined(_SC_PAGE_SIZE)
      /* XXX: Do we want to return the full amount of system memory ? */
      const long system_memory_pages = sysconf(_SC_PHYS_PAGES);
      const long system_page_size = sysconf(_SC_PAGE_SIZE);
@@ -145,6 +160,9 @@ swrast_query_renderer_integer(__DRIscreen *psp, int param,

      const uint64_t system_memory_bytes = (uint64_t) system_memory_pages
         * (uint64_t) system_page_size;
+#else
+#error "Unsupported platform"
+#endif

      const unsigned system_memory_megabytes =
         (unsigned) (system_memory_bytes / (1024 * 1024));
--- a/src/mesa/drivers/osmesa/Makefile.am
+++ b/src/mesa/drivers/osmesa/Makefile.am
@@ -39,7 +39,6 @@ nodist_EXTRA_lib@OSMESA_LIB@_la_SOURCES = dummy.cpp
 lib@OSMESA_LIB@_la_SOURCES = osmesa.c

 lib@OSMESA_LIB@_la_LDFLAGS = \
-	-module \
 	-no-undefined \
 	-version-number @OSMESA_VERSION@ \
 	$(GC_SECTIONS) \
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -2387,7 +2387,7 @@ framebuffer_texture(struct gl_context *ctx, const char *caller, GLenum target,
      GLboolean err = GL_TRUE;

      texObj = _mesa_lookup_texture(ctx, texture);
-      if (texObj != NULL) {
+      if (texObj != NULL && texObj->Target != 0) {
         if (textarget == 0) {
            if (layered) {
               /* We're being called by glFramebufferTexture() and textarget
@@ -2438,10 +2438,19 @@ framebuffer_texture(struct gl_context *ctx, const char *caller, GLenum target,
         }
      }
      else {
-         /* can't render to a non-existant texture */
-         _mesa_error(ctx, GL_INVALID_OPERATION,
-                     "glFramebufferTexture%s(non existant texture)",
-                     caller);
+         /* Can't render to a non-existent texture object.
+          *
+          * The OpenGL 4.5 core spec (02.02.2015) in Section 9.2 Binding and
+          * Managing Framebuffer Objects specifies a different error
+          * depending upon the calling function (PDF pages 325-328).
+          * *FramebufferTexture (where layered = GL_TRUE) throws invalid
+          * value, while the other commands throw invalid operation (where
+          * layered = GL_FALSE).
+          */
+         const GLenum error = layered ? GL_INVALID_VALUE :
+                              GL_INVALID_OPERATION;
+         _mesa_error(ctx, error,
+                     "%s(non-existent texture %u)", caller, texture);
         return;
      }

@@ -2452,6 +2461,18 @@ framebuffer_texture(struct gl_context *ctx, const char *caller, GLenum target,
         return;
      }

+      /* Page 306 (page 328 of the PDF) of the OpenGL 4.5 (Core Profile)
+       * spec says:
+       *
+       *    "An INVALID_VALUE error is generated if texture is non-zero
+       *     and layer is negative."
+       */
+      if (zoffset < 0) {
+         _mesa_error(ctx, GL_INVALID_VALUE,
+                     "glFramebufferTexture%s(layer %u < 0)", caller, zoffset);
+         return;
+      }
+
      if (texObj->Target == GL_TEXTURE_3D) {
         const GLuint maxSize = 1 << (ctx->Const.Max3DTextureLevels - 1);
         if (zoffset >= maxSize) {
@@ -3062,12 +3083,29 @@ invalidate_framebuffer_storage(GLenum target, GLsizei numAttachments,
      return;
   }

+   /* Section 17.4 Whole Framebuffer Operations of the OpenGL 4.5 Core
+    * Spec (2.2.2015, PDF page 522) says:
+    *    "An INVALID_VALUE error is generated if numAttachments, width, or
+    *    height is negative."
+    */
   if (numAttachments < 0) {
      _mesa_error(ctx, GL_INVALID_VALUE,
                  "%s(numAttachments < 0)", name);
      return;
   }

+   if (width < 0) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "%s(width < 0)", name);
+      return;
+   }
+
+   if (height < 0) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "%s(height < 0)", name);
+      return;
+   }
+
   /* The GL_ARB_invalidate_subdata spec says:
    *
    *     "If an attachment is specified that does not exist in the
@@ -3160,7 +3198,8 @@ invalidate_framebuffer_storage(GLenum target, GLsizei numAttachments,
   return;

 invalid_enum:
-   _mesa_error(ctx, GL_INVALID_ENUM, "%s(attachment)", name);
+   _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid attachment %s)", name,
+               _mesa_lookup_enum_by_nr(attachments[i]));
   return;
 }

--- a/src/mesa/main/ffvertex_prog.c
+++ b/src/mesa/main/ffvertex_prog.c
@@ -135,7 +135,7 @@ static GLboolean check_active_shininess( struct gl_context *ctx,
       (key->light_color_material_mask & (1 << attr)))
      return GL_TRUE;

-   if (key->varying_vp_inputs & VERT_ATTRIB_GENERIC(attr))
+   if (key->varying_vp_inputs & VERT_BIT_GENERIC(attr))
      return GL_TRUE;

   if (ctx->Light.Material.Attrib[attr][0] != 0.0F)
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -1911,6 +1911,7 @@ find_value_indexed(const char *func, GLenum pname, GLuint index, union value *v)
      if (index >= ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs)
          goto invalid_value;
      v->value_int = ctx->Array.VAO->VertexBinding[VERT_ATTRIB_GENERIC(index)].Stride;
+      return TYPE_INT;

   /* ARB_shader_image_load_store */
   case GL_IMAGE_BINDING_NAME: {
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -1832,7 +1832,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
      break;
   case ir_unop_i2b:
      if (native_integers)
-         emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
+         emit(ir, TGSI_OPCODE_USNE, result_dst, op[0], st_src_reg_for_int(0));
      else
         emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
      break;
@@ -5339,6 +5339,7 @@ get_mesa_program(struct gl_context *ctx,
    */
   _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters);
   if (!shader_program->LinkStatus) {
+      free_glsl_to_tgsi_visitor(v);
      return NULL;
   }
@@ -1 +1 @@
 .5.5
 .5.7