docs: add release notes for 11.0.4

Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
Update version to 11.0.4
2015-10-24 19:34:01 +01:00 · 2015-10-24 19:29:27 +01:00 · 2015-10-21 14:23:22 +01:00 · 2015-10-21 14:23:22 +01:00 · 2015-10-21 14:23:22 +01:00 · 2015-10-21 14:23:22 +01:00
112 changed files with 2246 additions and 937 deletions
--- a/2
+++ b/2
@@ -1 +1 @@
-11.0.1
+11.0.4
--- a/configure.ac
+++ b/configure.ac
@@ -106,6 +106,8 @@ AC_SYS_LARGEFILE
 LT_PREREQ([2.2])
 LT_INIT([disable-static])

+AC_CHECK_PROG(RM, rm, [rm -f])
+
 AX_PROG_BISON([],
              AS_IF([test ! -f "$srcdir/src/glsl/glcpp/glcpp-parse.c"],
                    [AC_MSG_ERROR([bison not found - unable to compile glcpp-parse.y])]))
--- a/docs/relnotes/11.0.1.html
+++ b/docs/relnotes/11.0.1.html
@@ -31,7 +31,8 @@ because compatibility contexts are not supported.

 <h2>SHA256 checksums</h2>
 <pre>
-TBD
+6dab262877e12c0546a0e2970c6835a0f217e6d4026ccecb3cd5dd733d1ce867  mesa-11.0.1.tar.gz
+43d0dfcd1f1e36f07f8228cd76d90175d3fc74c1ed25d7071794a100a98ef2a6  mesa-11.0.1.tar.xz
 </pre>


--- a/docs/relnotes/11.0.2.html
+++ b/docs/relnotes/11.0.2.html
@@ -0,0 +1,85 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 11.0.2 Release Notes / September 28, 2015</h1>
+
+<p>
+Mesa 11.0.2 is a bug fix release which fixes bugs found since the 11.0.1 release.
+</p>
+<p>
+Mesa 11.0.2 implements the OpenGL 4.1 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.1.  OpenGL
+4.1 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+45170773500d6ae2f9eb93fc85efee69f7c97084411ada4eddf92f78bca56d20  mesa-11.0.2.tar.gz
+fce11fb27eb87adf1e620a76455d635c6136dfa49ae58c53b34ef8d0c7b7eae4  mesa-11.0.2.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91582">Bug 91582</a> - [bisected] Regression in DEQP gles2.functional.negative_api.texture.texsubimage2d_neg_offset</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91970">Bug 91970</a> - [BSW regression] dEQP-GLES3.functional.shaders.precision.int.highp_mul_vertex</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92095">Bug 92095</a> - [Regression, bisected] arb_shader_atomic_counters.compiler.builtins.frag</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Eduardo Lima Mitev (3):</p>
+<ul>
+  <li>mesa: Fix order of format+type and internal format checks for glTexImageXD ops</li>
+  <li>mesa: Move _mesa_base_tex_format() from teximage to glformats files</li>
+  <li>mesa: Use the effective internal format instead for validation</li>
+</ul>
+
+<p>Emil Velikov (2):</p>
+<ul>
+  <li>docs: add sha256 checksums for 11.0.1</li>
+  <li>Update version to 11.0.2</li>
+</ul>
+
+<p>Kristian Høgsberg Kristensen (1):</p>
+<ul>
+  <li>i965: Respect stride and subreg_offset for ATTR registers</li>
+</ul>
+
+<p>Matt Turner (1):</p>
+<ul>
+  <li>glsl: Expose gl_MaxTess{Control,Evaluation}AtomicCounters.</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/11.0.3.html
+++ b/docs/relnotes/11.0.3.html
@@ -0,0 +1,185 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 11.0.3 Release Notes / October 10, 2015</h1>
+
+<p>
+Mesa 11.0.3 is a bug fix release which fixes bugs found since the 11.0.2 release.
+</p>
+<p>
+Mesa 11.0.3 implements the OpenGL 4.1 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.1.  OpenGL
+4.1 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+c2210e3daecc10ed9fdcea500327652ed6effc2f47c4b9cee63fb08f560d7117  mesa-11.0.3.tar.gz
+ab2992eece21adc23c398720ef8c6933cb69ea42e1b2611dc09d031e17e033d6  mesa-11.0.3.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=55552">Bug 55552</a> - Compile errors with --enable-mangling</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=71789">Bug 71789</a> - [r300g] Visuals not found in (default) depth = 24</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91044">Bug 91044</a> - piglit spec/egl_khr_create_context/valid debug flag gles* fail</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91342">Bug 91342</a> - Very dark textures on some objects in indoors environments in Postal 2</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91596">Bug 91596</a> - EGL_KHR_gl_colorspace (v2) causes problem with Android-x86 GUI</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91718">Bug 91718</a> - piglit.spec.arb_shader_image_load_store.invalid causes intermittent GPU HANG</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92072">Bug 92072</a> - Wine breakage since d082c5324 (st/mesa: don't call st_validate_state in BlitFramebuffer)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92265">Bug 92265</a> - Black windows in weston after update mesa to 11.0.2-1</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Brian Paul (1):</p>
+<ul>
+  <li>st/mesa: try PIPE_BIND_RENDER_TARGET when choosing float texture formats</li>
+</ul>
+
+<p>Daniel Scharrer (1):</p>
+<ul>
+  <li>mesa: Add abs input modifier to base for POW in ffvertex_prog</li>
+</ul>
+
+<p>Emil Velikov (3):</p>
+<ul>
+  <li>docs: add sha256 checksums for 11.0.2</li>
+  <li>Revert "nouveau: make sure there's always room to emit a fence"</li>
+  <li>Update version to 11.0.3</li>
+</ul>
+
+<p>Francisco Jerez (1):</p>
+<ul>
+  <li>i965/fs: Fix hang on IVB and VLV with image format mismatch.</li>
+</ul>
+
+<p>Ian Romanick (1):</p>
+<ul>
+  <li>meta: Handle array textures in scaled MSAA blits</li>
+</ul>
+
+<p>Ilia Mirkin (6):</p>
+<ul>
+  <li>nouveau: be more careful about freeing temporary transfer buffers</li>
+  <li>nouveau: delay deleting buffer with unflushed fence</li>
+  <li>nouveau: wait to unref the transfer's bo until it's no longer used</li>
+  <li>nv30: pretend to have packed texture/surface formats</li>
+  <li>nv30: always go through translate module on big-endian</li>
+  <li>nouveau: make sure there's always room to emit a fence</li>
+</ul>
+
+<p>Jason Ekstrand (1):</p>
+<ul>
+  <li>mesa: Correctly handle GL_BGRA_EXT in ES3 format_and_type checks</li>
+</ul>
+
+<p>Kyle Brenneman (3):</p>
+<ul>
+  <li>glx: Fix build errors with --enable-mangling (v2)</li>
+  <li>mapi: Make _glapi_get_stub work with "gl" or "mgl" prefix.</li>
+  <li>glx: Don't hard-code the name "libGL.so.1" in driOpenDriver (v3)</li>
+</ul>
+
+<p>Leo Liu (1):</p>
+<ul>
+  <li>radeon/vce: fix vui time_scale zero error</li>
+</ul>
+
+<p>Marek Olšák (21):</p>
+<ul>
+  <li>st/mesa: fix front buffer regression after dropping st_validate_state in Blit</li>
+  <li>radeonsi: handle index buffer alloc failures</li>
+  <li>radeonsi: handle constant buffer alloc failures</li>
+  <li>gallium/radeon: handle buffer_map staging buffer failures better</li>
+  <li>gallium/radeon: handle buffer alloc failures in r600_draw_rectangle</li>
+  <li>gallium/radeon: add a fail path for depth MSAA texture readback</li>
+  <li>radeonsi: report alloc failure from si_shader_binary_read</li>
+  <li>radeonsi: add malloc fail paths to si_create_shader_state</li>
+  <li>radeonsi: skip drawing if the tess factor ring allocation fails</li>
+  <li>radeonsi: skip drawing if GS ring allocations fail</li>
+  <li>radeonsi: handle shader precompile failures</li>
+  <li>radeonsi: handle fixed-func TCS shader create failure</li>
+  <li>radeonsi: skip drawing if VS, TCS, TES, GS fail to compile or upload</li>
+  <li>radeonsi: skip drawing if PS fails to compile or upload</li>
+  <li>radeonsi: skip drawing if updating the scratch buffer fails</li>
+  <li>radeonsi: don't forget to update scratch relocations for LS, HS, ES shaders</li>
+  <li>radeonsi: handle dummy constant buffer allocation failure</li>
+  <li>gallium/u_blitter: handle allocation failures</li>
+  <li>radeonsi: add scratch buffer to the buffer list when it's re-allocated</li>
+  <li>st/dri: don't use _ctx in client_wait_sync</li>
+  <li>egl/dri2: don't require a context for ClientWaitSync (v2)</li>
+</ul>
+
+<p>Matthew Waters (1):</p>
+<ul>
+  <li>egl: rework handling EGL_CONTEXT_FLAGS</li>
+</ul>
+
+<p>Michel Dänzer (1):</p>
+<ul>
+  <li>st/dri: Use packed RGB formats</li>
+</ul>
+
+<p>Roland Scheidegger (1):</p>
+<ul>
+  <li>mesa: fix mipmap generation for immutable, compressed textures</li>
+</ul>
+
+<p>Tom Stellard (3):</p>
+<ul>
+  <li>gallium/radeon: Use call_once() when initailizing LLVM targets</li>
+  <li>gallivm: Allow drivers and state trackers to initialize gallivm LLVM targets v2</li>
+  <li>radeon/llvm: Initialize gallivm targets when initializing the AMDGPU target v2</li>
+</ul>
+
+<p>Varad Gautam (1):</p>
+<ul>
+  <li>egl: restore surface type before linking config to its display</li>
+</ul>
+
+<p>Ville Syrjälä (3):</p>
+<ul>
+  <li>i830: Fix collision between I830_UPLOAD_RASTER_RULES and I830_UPLOAD_TEX(0)</li>
+  <li>i915: Fix texcoord vs. varying collision in fragment programs</li>
+  <li>i915: Remember to call intel_prepare_render() before blitting</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/11.0.4.html
+++ b/docs/relnotes/11.0.4.html
@@ -0,0 +1,167 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 11.0.4 Release Notes / October 24, 2015</h1>
+
+<p>
+Mesa 11.0.4 is a bug fix release which fixes bugs found since the 11.0.3 release.
+</p>
+<p>
+Mesa 11.0.4 implements the OpenGL 4.1 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.1.  OpenGL
+4.1 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+TBD
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=86281">Bug 86281</a> - brw_meta_fast_clear (brw=brw&#64;entry=0x7fffd4097a08, fb=fb&#64;entry=0x7fffd40fa900, buffers=buffers&#64;entry=2, partial_clear=partial_clear&#64;entry=false)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=86720">Bug 86720</a> - [radeon] Europa Universalis 4 freezing during game start (10.3.3+, still broken on 11.0.2)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91788">Bug 91788</a> - [HSW Regression] Synmark2_v6 Multithread performance case FPS reduced by 36%</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92304">Bug 92304</a> - [cts] cts.shaders.negative conformance tests fail</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Alejandro Piñeiro (2):</p>
+<ul>
+  <li>i965/vec4: check writemask when bailing out at register coalesce</li>
+  <li>i965/vec4: fill src_reg type using the constructor type parameter</li>
+</ul>
+
+<p>Brian Paul (2):</p>
+<ul>
+  <li>vbo: fix incorrect switch statement in init_mat_currval()</li>
+  <li>mesa: fix incorrect opcode in save_BlendFunci()</li>
+</ul>
+
+<p>Chih-Wei Huang (3):</p>
+<ul>
+  <li>mesa: android: Fix the incorrect path of sse_minmax.c</li>
+  <li>nv50/ir: use C++11 standard std::unordered_map if possible</li>
+  <li>nv30: include the header of ffs prototype</li>
+</ul>
+
+<p>Chris Wilson (1):</p>
+<ul>
+  <li>i965: Remove early release of DRI2 miptree</li>
+</ul>
+
+<p>Dave Airlie (1):</p>
+<ul>
+  <li>mesa/uniforms: fix get_uniform for doubles (v2)</li>
+</ul>
+
+<p>Emil Velikov (1):</p>
+<ul>
+  <li>docs: add sha256 checksums for 11.0.3</li>
+</ul>
+
+<p>Francisco Jerez (5):</p>
+<ul>
+  <li>i965: Don't tell the hardware about our UAV access.</li>
+  <li>mesa: Expose function to calculate whether a shader image unit is valid.</li>
+  <li>mesa: Skip redundant texture completeness checking during image validation.</li>
+  <li>i965: Use _mesa_is_image_unit_valid() instead of gl_image_unit::_Valid.</li>
+  <li>mesa: Get rid of texture-dependent image unit derived state.</li>
+</ul>
+
+<p>Ian Romanick (8):</p>
+<ul>
+  <li>glsl: Allow built-in functions as constant expressions in OpenGL ES 1.00</li>
+  <li>ff_fragment_shader: Use binding to set the sampler unit</li>
+  <li>glsl/linker: Use constant_initializer instead of constant_value to initialize uniforms</li>
+  <li>glsl: Use constant_initializer instead of constant_value to determine whether to keep an unused uniform</li>
+  <li>glsl: Only set ir_variable::constant_value for const-decorated variables</li>
+  <li>glsl: Restrict initializers for global variables to constant expression in ES</li>
+  <li>glsl: Add method to determine whether an expression contains the sequence operator</li>
+  <li>glsl: In later GLSL versions, sequence operator is cannot be a constant expression</li>
+</ul>
+
+<p>Ilia Mirkin (1):</p>
+<ul>
+  <li>nouveau: make sure there's always room to emit a fence</li>
+</ul>
+
+<p>Indrajit Das (1):</p>
+<ul>
+  <li>st/va: Used correct parameter to derive the value of the "h" variable in vlVaCreateImage</li>
+</ul>
+
+<p>Jonathan Gray (1):</p>
+<ul>
+  <li>configure.ac: ensure RM is set</li>
+</ul>
+
+<p>Krzysztof Sobiecki (1):</p>
+<ul>
+  <li>st/fbo: use pipe_surface_release instead of pipe_surface_reference</li>
+</ul>
+
+<p>Leo Liu (1):</p>
+<ul>
+  <li>st/omx/dec/h264: fix field picture type 0 poc disorder</li>
+</ul>
+
+<p>Marek Olšák (3):</p>
+<ul>
+  <li>st/mesa: fix clip state dependencies</li>
+  <li>radeonsi: fix a GS copy shader leak</li>
+  <li>gallium: add PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT</li>
+</ul>
+
+<p>Nicolai Hähnle (1):</p>
+<ul>
+  <li>u_vbuf: fix vb slot assignment for translated buffers</li>
+</ul>
+
+<p>Rob Clark (1):</p>
+<ul>
+  <li>freedreno/a3xx: cache-flush is needed after MEM_WRITE</li>
+</ul>
+
+<p>Tapani Pälli (3):</p>
+<ul>
+  <li>mesa: add GL_UNSIGNED_INT_24_8 to _mesa_pack_depth_span</li>
+  <li>mesa: Set api prefix to version string when overriding version</li>
+  <li>mesa: fix ARRAY_SIZE query for GetProgramResourceiv</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -312,6 +312,8 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,
         else
            conf->dri_single_config = dri_config;
      }
+
+      conf->base.SurfaceType = 0;
      conf->base.ConfigID = config_id;

      _eglLinkConfig(&conf->base);
@@ -2384,13 +2386,18 @@ dri2_client_wait_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync,
   unsigned wait_flags = 0;
   EGLint ret = EGL_CONDITION_SATISFIED_KHR;

-   if (flags & EGL_SYNC_FLUSH_COMMANDS_BIT_KHR)
+   /* The EGL_KHR_fence_sync spec states:
+    *
+    *    "If no context is current for the bound API,
+    *     the EGL_SYNC_FLUSH_COMMANDS_BIT_KHR bit is ignored.
+    */
+   if (dri2_ctx && flags & EGL_SYNC_FLUSH_COMMANDS_BIT_KHR)
      wait_flags |= __DRI2_FENCE_FLAG_FLUSH_COMMANDS;

   /* the sync object should take a reference while waiting */
   dri2_egl_ref_sync(dri2_sync);

-   if (dri2_dpy->fence->client_wait_sync(dri2_ctx->dri_context,
+   if (dri2_dpy->fence->client_wait_sync(dri2_ctx ? dri2_ctx->dri_context : NULL,
                                         dri2_sync->fence, wait_flags,
                                         timeout))
      dri2_sync->base.SyncStatus = EGL_SIGNALED_KHR;
--- a/src/egl/main/eglcontext.c
+++ b/src/egl/main/eglcontext.c
@@ -152,12 +152,51 @@ _eglParseContextAttribList(_EGLContext *ctx, _EGLDisplay *dpy,

         /* The EGL_KHR_create_context spec says:
          *
-          *     "Flags are only defined for OpenGL context creation, and
-          *     specifying a flags value other than zero for other types of
-          *     contexts, including OpenGL ES contexts, will generate an
-          *     error."
+          *     "If the EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR flag bit is set in
+          *     EGL_CONTEXT_FLAGS_KHR, then a <debug context> will be created.
+          *     [...]
+          *     In some cases a debug context may be identical to a non-debug
+          *     context. This bit is supported for OpenGL and OpenGL ES
+          *     contexts."
          */
-         if (api != EGL_OPENGL_API && val != 0) {
+         if ((val & EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR) &&
+             (api != EGL_OPENGL_API && api != EGL_OPENGL_ES_API)) {
+            err = EGL_BAD_ATTRIBUTE;
+            break;
+         }
+
+         /* The EGL_KHR_create_context spec says:
+          *
+          *     "If the EGL_CONTEXT_OPENGL_FORWARD_COMPATIBLE_BIT_KHR flag bit
+          *     is set in EGL_CONTEXT_FLAGS_KHR, then a <forward-compatible>
+          *     context will be created. Forward-compatible contexts are
+          *     defined only for OpenGL versions 3.0 and later. They must not
+          *     support functionality marked as <deprecated> by that version of
+          *     the API, while a non-forward-compatible context must support
+          *     all functionality in that version, deprecated or not. This bit
+          *     is supported for OpenGL contexts, and requesting a
+          *     forward-compatible context for OpenGL versions less than 3.0
+          *     will generate an error."
+          */
+         if ((val & EGL_CONTEXT_OPENGL_FORWARD_COMPATIBLE_BIT_KHR) &&
+             (api != EGL_OPENGL_API || ctx->ClientMajorVersion < 3)) {
+            err = EGL_BAD_ATTRIBUTE;
+            break;
+         }
+
+         /* The EGL_KHR_create_context_spec says:
+          *
+          *     "If the EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR bit is set in
+          *     EGL_CONTEXT_FLAGS_KHR, then a context supporting <robust buffer
+          *     access> will be created. Robust buffer access is defined in the
+          *     GL_ARB_robustness extension specification, and the resulting
+          *     context must also support either the GL_ARB_robustness
+          *     extension, or a version of OpenGL incorporating equivalent
+          *     functionality. This bit is supported for OpenGL contexts.
+          */
+         if ((val & EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR) &&
+             (api != EGL_OPENGL_API ||
+              !dpy->Extensions.EXT_create_context_robustness)) {
            err = EGL_BAD_ATTRIBUTE;
            break;
         }
--- a/src/gallium/auxiliary/gallivm/lp_bld_limits.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
@@ -137,6 +137,8 @@ gallivm_get_shader_param(enum pipe_shader_cap param)
   case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
   case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
      return 0;
+   case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+      return 32;
   }
   /* if we get here, we missed a shader cap above (and should have seen
    * a compiler warning.)
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -81,6 +81,8 @@
 #  pragma pop_macro("DEBUG")
 #endif

+#include "c11/threads.h"
+#include "os/os_thread.h"
 #include "pipe/p_config.h"
 #include "util/u_debug.h"
 #include "util/u_cpu_detect.h"
@@ -103,6 +105,33 @@ static LLVMEnsureMultithreaded lLVMEnsureMultithreaded;

 }

+static once_flag init_native_targets_once_flag;
+
+static void init_native_targets()
+{
+   // If we have a native target, initialize it to ensure it is linked in and
+   // usable by the JIT.
+   llvm::InitializeNativeTarget();
+
+   llvm::InitializeNativeTargetAsmPrinter();
+
+   llvm::InitializeNativeTargetDisassembler();
+}
+
+/**
+ * The llvm target registry is not thread-safe, so drivers and state-trackers
+ * that want to initialize targets should use the gallivm_init_llvm_targets()
+ * function to safely initialize targets.
+ *
+ * LLVM targets should be initialized before the driver or state-tracker tries
+ * to access the registry.
+ */
+extern "C" void
+gallivm_init_llvm_targets(void)
+{
+   call_once(&init_native_targets_once_flag, init_native_targets);
+}
+
 extern "C" void
 lp_set_target_options(void)
 {
@@ -115,13 +144,7 @@ lp_set_target_options(void)
   llvm::DisablePrettyStackTrace = true;
 #endif

-   // If we have a native target, initialize it to ensure it is linked in and
-   // usable by the JIT.
-   llvm::InitializeNativeTarget();
-
-   llvm::InitializeNativeTargetAsmPrinter();
-
-   llvm::InitializeNativeTargetDisassembler();
+   gallivm_init_llvm_targets();
 }


--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.h
@@ -41,6 +41,8 @@ extern "C" {

 struct lp_generated_code;

+extern void
+gallivm_init_llvm_targets(void);

 extern void
 lp_set_target_options(void);
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -463,6 +463,8 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param)
   case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
   case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
      return 0;
+   case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+      return 32;
   }
   /* if we get here, we missed a shader cap above (and should have seen
    * a compiler warning.)
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -1190,6 +1190,8 @@ static void blitter_draw(struct blitter_context_priv *ctx,

   u_upload_data(ctx->upload, 0, sizeof(ctx->vertices), ctx->vertices,
                 &vb.buffer_offset, &vb.buffer);
+   if (!vb.buffer)
+      return;
   u_upload_unmap(ctx->upload);

   pipe->set_vertex_buffers(pipe, ctx->base.vb_slot, 1, &vb);
@@ -2089,6 +2091,9 @@ void util_blitter_clear_buffer(struct blitter_context *blitter,

   u_upload_data(ctx->upload, 0, num_channels*4, clear_value,
                 &vb.buffer_offset, &vb.buffer);
+   if (!vb.buffer)
+      goto out;
+
   vb.stride = 0;

   blitter_set_running_flag(ctx);
@@ -2112,6 +2117,7 @@ void util_blitter_clear_buffer(struct blitter_context *blitter,

   util_draw_arrays(pipe, PIPE_PRIM_POINTS, 0, size / 4);

+out:
   blitter_restore_vertex_states(ctx);
   blitter_restore_render_cond(ctx);
   blitter_unset_running_flag(ctx);
--- a/src/gallium/auxiliary/util/u_vbuf.c
+++ b/src/gallium/auxiliary/util/u_vbuf.c
@@ -545,6 +545,7 @@ u_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr,

         index = ffs(unused_vb_mask) - 1;
         fallback_vbs[type] = index;
+         unused_vb_mask &= ~(1 << index);
         /*printf("found slot=%i for type=%i\n", index, type);*/
      }
   }
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -355,6 +355,10 @@ to be 0.
  are supported.
 * ``PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE``: Whether the driver doesn't
  ignore tgsi_declaration_range::Last for shader inputs and outputs.
+* ``PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT``: This is the maximum number
+  of iterations that loops are allowed to have to be unrolled. It is only
+  a hint to state trackers. Whether any loops will be unrolled is not
+  guaranteed.


 .. _pipe_compute_cap:
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -828,11 +828,7 @@ fd3_emit_restore(struct fd_context *ctx)
 	OUT_RING(ring, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(0) |
 			A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(0));

-	OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
-	OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(0));
-	OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(0) |
-			A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(INVALIDATE) |
-			A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE);
+	fd3_emit_cache_flush(ctx, ring);

 	OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
 	OUT_RING(ring, 0x00000000);                  /* GRAS_CL_CLIP_CNTL */
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
@@ -90,4 +90,15 @@ void fd3_emit_restore(struct fd_context *ctx);

 void fd3_emit_init(struct pipe_context *pctx);

+static inline void
+fd3_emit_cache_flush(struct fd_context *ctx, struct fd_ringbuffer *ring)
+{
+	fd_wfi(ctx, ring);
+	OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
+	OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(0));
+	OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(0) |
+			A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(INVALIDATE) |
+			A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE);
+}
+
 #endif /* FD3_EMIT_H */
--- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
@@ -558,6 +558,8 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
 	OUT_RING(ring, fui(x1));
 	OUT_RING(ring, fui(y1));

+	fd3_emit_cache_flush(ctx, ring);
+
 	for (i = 0; i < 4; i++) {
 		OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
 		OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) |
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -407,6 +407,8 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
 		return 16;
 	case PIPE_SHADER_CAP_PREFERRED_IR:
 		return PIPE_SHADER_IR_TGSI;
+	case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+		return 32;
 	}
 	debug_printf("unknown shader param %d\n", param);
 	return 0;
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -167,6 +167,8 @@ i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_sha
      case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
      case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
         return 0;
+      case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+         return 32;
      default:
         debug_printf("%s: Unknown cap %u.\n", __FUNCTION__, cap);
         return 0;
--- a/src/gallium/drivers/ilo/ilo_screen.c
+++ b/src/gallium/drivers/ilo/ilo_screen.c
@@ -138,6 +138,8 @@ ilo_get_shader_param(struct pipe_screen *screen, unsigned shader,
      return PIPE_SHADER_IR_TGSI;
   case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
      return 1;
+   case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+      return 32;

   default:
      return 0;
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -25,10 +25,24 @@

 #include <stack>
 #include <limits>
+#if __cplusplus >= 201103L
+#include <unordered_map>
+#else
 #include <tr1/unordered_map>
+#endif

 namespace nv50_ir {

+#if __cplusplus >= 201103L
+using std::hash;
+using std::unordered_map;
+#elif !defined(ANDROID)
+using std::tr1::hash;
+using std::tr1::unordered_map;
+#else
+#error Android release before Lollipop is not supported!
+#endif
+
 #define MAX_REGISTER_FILE_SIZE 256

 class RegisterSet
@@ -349,12 +363,12 @@ RegAlloc::PhiMovesPass::needNewElseBlock(BasicBlock *b, BasicBlock *p)

 struct PhiMapHash {
   size_t operator()(const std::pair<Instruction *, BasicBlock *>& val) const {
-      return std::tr1::hash<Instruction*>()(val.first) * 31 +
-         std::tr1::hash<BasicBlock*>()(val.second);
+      return hash<Instruction*>()(val.first) * 31 +
+         hash<BasicBlock*>()(val.second);
   }
 };

-typedef std::tr1::unordered_map<
+typedef unordered_map<
   std::pair<Instruction *, BasicBlock *>, Value *, PhiMapHash> PhiMap;

 // Critical edges need to be split up so that work can be inserted along
--- a/src/gallium/drivers/nouveau/nouveau_buffer.c
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
@@ -80,7 +80,12 @@ release_allocation(struct nouveau_mm_allocation **mm,
 inline void
 nouveau_buffer_release_gpu_storage(struct nv04_resource *buf)
 {
-   nouveau_bo_ref(NULL, &buf->bo);
+   if (buf->fence && buf->fence->state < NOUVEAU_FENCE_STATE_FLUSHED) {
+      nouveau_fence_work(buf->fence, nouveau_fence_unref_bo, buf->bo);
+      buf->bo = NULL;
+   } else {
+      nouveau_bo_ref(NULL, &buf->bo);
+   }

   if (buf->mm)
      release_allocation(&buf->mm, buf->fence);
@@ -281,7 +286,8 @@ nouveau_buffer_transfer_del(struct nouveau_context *nv,
 {
   if (tx->map) {
      if (likely(tx->bo)) {
-         nouveau_bo_ref(NULL, &tx->bo);
+         nouveau_fence_work(nv->screen->fence.current,
+                            nouveau_fence_unref_bo, tx->bo);
         if (tx->mm)
            release_allocation(&tx->mm, nv->screen->fence.current);
      } else {
@@ -782,7 +788,7 @@ nouveau_buffer_migrate(struct nouveau_context *nv,
      nv->copy_data(nv, buf->bo, buf->offset, new_domain,
                    bo, offset, old_domain, buf->base.width0);

-      nouveau_bo_ref(NULL, &bo);
+      nouveau_fence_work(screen->fence.current, nouveau_fence_unref_bo, bo);
      if (mm)
         release_allocation(&mm, screen->fence.current);
   } else
--- a/src/gallium/drivers/nouveau/nouveau_fence.c
+++ b/src/gallium/drivers/nouveau/nouveau_fence.c
@@ -190,8 +190,14 @@ nouveau_fence_wait(struct nouveau_fence *fence)
   /* wtf, someone is waiting on a fence in flush_notify handler? */
   assert(fence->state != NOUVEAU_FENCE_STATE_EMITTING);

-   if (fence->state < NOUVEAU_FENCE_STATE_EMITTED)
-      nouveau_fence_emit(fence);
+   if (fence->state < NOUVEAU_FENCE_STATE_EMITTED) {
+      PUSH_SPACE(screen->pushbuf, 8);
+      /* The space allocation might trigger a flush, which could emit the
+       * current fence. So check again.
+       */
+      if (fence->state < NOUVEAU_FENCE_STATE_EMITTED)
+         nouveau_fence_emit(fence);
+   }

   if (fence->state < NOUVEAU_FENCE_STATE_FLUSHED)
      if (nouveau_pushbuf_kick(screen->pushbuf, screen->pushbuf->channel))
@@ -224,10 +230,22 @@ nouveau_fence_wait(struct nouveau_fence *fence)
 void
 nouveau_fence_next(struct nouveau_screen *screen)
 {
-   if (screen->fence.current->state < NOUVEAU_FENCE_STATE_EMITTING)
-      nouveau_fence_emit(screen->fence.current);
+   if (screen->fence.current->state < NOUVEAU_FENCE_STATE_EMITTING) {
+      if (screen->fence.current->ref > 1)
+         nouveau_fence_emit(screen->fence.current);
+      else
+         return;
+   }

   nouveau_fence_ref(NULL, &screen->fence.current);

   nouveau_fence_new(screen, &screen->fence.current, false);
 }
+
+void
+nouveau_fence_unref_bo(void *data)
+{
+   struct nouveau_bo *bo = data;
+
+   nouveau_bo_ref(NULL, &bo);
+}
--- a/src/gallium/drivers/nouveau/nouveau_fence.h
+++ b/src/gallium/drivers/nouveau/nouveau_fence.h
@@ -37,6 +37,9 @@ void nouveau_fence_next(struct nouveau_screen *);
 bool nouveau_fence_wait(struct nouveau_fence *);
 bool nouveau_fence_signalled(struct nouveau_fence *);

+void nouveau_fence_unref_bo(void *data); /* generic unref bo callback */
+
+
 static inline void
 nouveau_fence_ref(struct nouveau_fence *fence, struct nouveau_fence **ref)
 {
--- a/src/gallium/drivers/nouveau/nouveau_winsys.h
+++ b/src/gallium/drivers/nouveau/nouveau_winsys.h
@@ -24,6 +24,8 @@ PUSH_AVAIL(struct nouveau_pushbuf *push)
 static inline bool
 PUSH_SPACE(struct nouveau_pushbuf *push, uint32_t size)
 {
+   /* Provide a buffer so that fences always have room to be emitted */
+   size += 8;
   if (PUSH_AVAIL(push) < size)
      return nouveau_pushbuf_space(push, size, 0, 0) == 0;
   return true;
--- a/src/gallium/drivers/nouveau/nv30/nv30_format.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_format.c
@@ -78,12 +78,12 @@ nv30_format_info_table[PIPE_FORMAT_COUNT] = {
   _(B4G4R4X4_UNORM      , S___),
   _(B4G4R4A4_UNORM      , S___),
   _(B5G6R5_UNORM        , SB__),
-   _(B8G8R8X8_UNORM      , SB__),
-   _(B8G8R8X8_SRGB       , S___),
-   _(B8G8R8A8_UNORM      , SB__),
-   _(B8G8R8A8_SRGB       , S___),
+   _(BGRX8888_UNORM      , SB__),
+   _(BGRX8888_SRGB       , S___),
+   _(BGRA8888_UNORM      , SB__),
+   _(BGRA8888_SRGB       , S___),
   _(R8G8B8A8_UNORM      , __V_),
-   _(R8G8B8A8_SNORM      , S___),
+   _(RGBA8888_SNORM      , S___),
   _(DXT1_RGB            , S___),
   _(DXT1_SRGB           , S___),
   _(DXT1_RGBA           , S___),
@@ -138,8 +138,8 @@ const struct nv30_format
 nv30_format_table[PIPE_FORMAT_COUNT] = {
   R_(B5G5R5X1_UNORM    , X1R5G5B5          ),
   R_(B5G6R5_UNORM      , R5G6B5            ),
-   R_(B8G8R8X8_UNORM    , X8R8G8B8          ),
-   R_(B8G8R8A8_UNORM    , A8R8G8B8          ),
+   R_(BGRX8888_UNORM    , X8R8G8B8          ),
+   R_(BGRA8888_UNORM    , A8R8G8B8          ),
   Z_(Z16_UNORM         , Z16               ),
   Z_(X8Z24_UNORM       , Z24S8             ),
   Z_(S8_UINT_Z24_UNORM , Z24S8             ),
@@ -223,11 +223,11 @@ nv30_texfmt_table[PIPE_FORMAT_COUNT] = {
   _(B4G4R4X4_UNORM    , A4R4G4B4, 0, C, C, C, 1, 2, 1, 0, x, NONE, ____),
   _(B4G4R4A4_UNORM    , A4R4G4B4, 0, C, C, C, C, 2, 1, 0, 3, NONE, ____),
   _(B5G6R5_UNORM      , R5G6B5  , 0, C, C, C, 1, 2, 1, 0, x, NONE, ____),
-   _(B8G8R8X8_UNORM    , A8R8G8B8, 0, C, C, C, 1, 2, 1, 0, x, NONE, ____),
-   _(B8G8R8X8_SRGB     , A8R8G8B8, 0, C, C, C, 1, 2, 1, 0, x, SRGB, ____),
-   _(B8G8R8A8_UNORM    , A8R8G8B8, 0, C, C, C, C, 2, 1, 0, 3, NONE, ____),
-   _(B8G8R8A8_SRGB     , A8R8G8B8, 0, C, C, C, C, 2, 1, 0, 3, SRGB, ____),
-   _(R8G8B8A8_SNORM    , A8R8G8B8, 0, C, C, C, C, 0, 1, 2, 3, NONE, SSSS),
+   _(BGRX8888_UNORM    , A8R8G8B8, 0, C, C, C, 1, 2, 1, 0, x, NONE, ____),
+   _(BGRX8888_SRGB     , A8R8G8B8, 0, C, C, C, 1, 2, 1, 0, x, SRGB, ____),
+   _(BGRA8888_UNORM    , A8R8G8B8, 0, C, C, C, C, 2, 1, 0, 3, NONE, ____),
+   _(BGRA8888_SRGB     , A8R8G8B8, 0, C, C, C, C, 2, 1, 0, 3, SRGB, ____),
+   _(RGBA8888_SNORM    , A8R8G8B8, 0, C, C, C, C, 0, 1, 2, 3, NONE, SSSS),
   _(DXT1_RGB          , DXT1    , 0, C, C, C, 1, 2, 1, 0, x, NONE, ____),
   _(DXT1_SRGB         , DXT1    , 0, C, C, C, 1, 2, 1, 0, x, SRGB, ____),
   _(DXT1_RGBA         , DXT1    , 0, C, C, C, C, 2, 1, 0, 3, NONE, ____),
--- a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
@@ -339,10 +339,15 @@ nv30_miptree_transfer_unmap(struct pipe_context *pipe,
   struct nv30_context *nv30 = nv30_context(pipe);
   struct nv30_transfer *tx = nv30_transfer(ptx);

-   if (ptx->usage & PIPE_TRANSFER_WRITE)
+   if (ptx->usage & PIPE_TRANSFER_WRITE) {
      nv30_transfer_rect(nv30, NEAREST, &tx->tmp, &tx->img);

-   nouveau_bo_ref(NULL, &tx->tmp.bo);
+      /* Allow the copies above to finish executing before freeing the source */
+      nouveau_fence_work(nv30->screen->base.fence.current,
+                         nouveau_fence_unref_bo, tx->tmp.bo);
+   } else {
+      nouveau_bo_ref(NULL, &tx->tmp.bo);
+   }
   pipe_resource_reference(&ptx->resource, NULL);
   FREE(tx);
 }
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -261,6 +261,8 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
      case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
      case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
         return 0;
+      case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+         return 32;
      default:
         debug_printf("unknown vertex shader param %d\n", param);
         return 0;
@@ -302,6 +304,8 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
      case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
      case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
         return 0;
+      case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+         return 32;
      default:
         debug_printf("unknown fragment shader param %d\n", param);
         return 0;
@@ -345,7 +349,9 @@ nv30_screen_fence_emit(struct pipe_screen *pscreen, uint32_t *sequence)

   *sequence = ++screen->base.fence.sequence;

-   BEGIN_NV04(push, NV30_3D(FENCE_OFFSET), 2);
+   assert(PUSH_AVAIL(push) >= 3);
+   PUSH_DATA (push, NV30_3D_FENCE_OFFSET |
+              (2 /* size */ << 18) | (7 /* subchan */ << 13));
   PUSH_DATA (push, 0);
   PUSH_DATA (push, *sequence);
 }
--- a/src/gallium/drivers/nouveau/nv30/nv30_vbo.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_vbo.c
@@ -191,7 +191,11 @@ nv30_vbo_validate(struct nv30_context *nv30)
   if (!nv30->vertex || nv30->draw_flags)
      return;

+#ifdef PIPE_ARCH_BIG_ENDIAN
+   if (1) { /* Figure out where the buffers are getting messed up */
+#else
   if (unlikely(vertex->need_conversion)) {
+#endif
      nv30->vbo_fifo = ~0;
      nv30->vbo_user = 0;
   } else {
--- a/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c
+++ b/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c
@@ -1,3 +1,4 @@
+#include <strings.h>
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_state.h"
--- a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
@@ -163,7 +163,10 @@ nv50_miptree_destroy(struct pipe_screen *pscreen, struct pipe_resource *pt)
 {
   struct nv50_miptree *mt = nv50_miptree(pt);

-   nouveau_bo_ref(NULL, &mt->base.bo);
+   if (mt->base.fence && mt->base.fence->state < NOUVEAU_FENCE_STATE_FLUSHED)
+      nouveau_fence_work(mt->base.fence, nouveau_fence_unref_bo, mt->base.bo);
+   else
+      nouveau_bo_ref(NULL, &mt->base.bo);

   nouveau_fence_ref(NULL, &mt->base.fence);
   nouveau_fence_ref(NULL, &mt->base.fence_wr);
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -297,6 +297,8 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
   case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
   case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
      return 0;
+   case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+      return 32;
   default:
      NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
      return 0;
@@ -386,6 +388,7 @@ nv50_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence)
   /* we need to do it after possible flush in MARK_RING */
   *sequence = ++screen->base.fence.sequence;

+   assert(PUSH_AVAIL(push) >= 5);
   PUSH_DATA (push, NV50_FIFO_PKHDR(NV50_3D(QUERY_ADDRESS_HIGH), 4));
   PUSH_DATAh(push, screen->fence.bo->offset);
   PUSH_DATA (push, screen->fence.bo->offset);
--- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
@@ -65,14 +65,9 @@ nv50_constbufs_validate(struct nv50_context *nv50)
               PUSH_DATA (push, (b << 12) | (i << 8) | p | 1);
            }
            while (words) {
-               unsigned nr;
-
-               if (!PUSH_SPACE(push, 16))
-                  break;
-               nr = PUSH_AVAIL(push);
-               assert(nr >= 16);
-               nr = MIN2(MIN2(nr - 3, words), NV04_PFIFO_MAX_PACKET_LEN);
+               unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN);

+               PUSH_SPACE(push, nr + 3);
               BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
               PUSH_DATA (push, (start << 8) | b);
               BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nr);
--- a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
@@ -187,14 +187,7 @@ nv50_sifc_linear_u8(struct nouveau_context *nv,
   PUSH_DATA (push, 0);

   while (count) {
-      unsigned nr;
-
-      if (!PUSH_SPACE(push, 16))
-         break;
-      nr = PUSH_AVAIL(push);
-      assert(nr >= 16);
-      nr = MIN2(count, nr - 1);
-      nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN);
+      unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN);

      BEGIN_NI04(push, NV50_2D(SIFC_DATA), nr);
      PUSH_DATAp(push, src, nr);
@@ -365,9 +358,14 @@ nv50_miptree_transfer_unmap(struct pipe_context *pctx,
            tx->rect[0].base += mt->layer_stride;
         tx->rect[1].base += tx->nblocksy * tx->base.stride;
      }
+
+      /* Allow the copies above to finish executing before freeing the source */
+      nouveau_fence_work(nv50->screen->base.fence.current,
+                         nouveau_fence_unref_bo, tx->rect[1].bo);
+   } else {
+      nouveau_bo_ref(NULL, &tx->rect[1].bo);
   }

-   nouveau_bo_ref(NULL, &tx->rect[1].bo);
   pipe_resource_reference(&transfer->resource, NULL);

   FREE(tx);
@@ -390,12 +388,9 @@ nv50_cb_push(struct nouveau_context *nv,
   nouveau_pushbuf_validate(push);

   while (words) {
-      unsigned nr;
-
-      nr = PUSH_AVAIL(push);
-      nr = MIN2(nr - 7, words);
-      nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN - 1);
+      unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN);

+      PUSH_SPACE(push, nr + 7);
      BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
      PUSH_DATAh(push, bo->offset + base);
      PUSH_DATA (push, bo->offset + base);
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -310,6 +310,8 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
      return 16; /* would be 32 in linked (OpenGL-style) mode */
   case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
      return 16; /* XXX not sure if more are really safe */
+   case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+      return 32;
   default:
      NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
      return 0;
@@ -535,7 +537,8 @@ nvc0_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence)
   /* we need to do it after possible flush in MARK_RING */
   *sequence = ++screen->base.fence.sequence;

-   BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 4);
+   assert(PUSH_AVAIL(push) >= 5);
+   PUSH_DATA (push, NVC0_FIFO_PKHDR_SQ(NVC0_3D(QUERY_ADDRESS_HIGH), 4));
   PUSH_DATAh(push, screen->fence.bo->offset);
   PUSH_DATA (push, screen->fence.bo->offset);
   PUSH_DATA (push, *sequence);
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
@@ -188,14 +188,10 @@ nvc0_m2mf_push_linear(struct nouveau_context *nv,
   nouveau_pushbuf_validate(push);

   while (count) {
-      unsigned nr;
+      unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN);

-      if (!PUSH_SPACE(push, 16))
+      if (!PUSH_SPACE(push, nr + 9))
         break;
-      nr = PUSH_AVAIL(push);
-      assert(nr >= 16);
-      nr = MIN2(count, nr - 9);
-      nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN);

      BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2);
      PUSH_DATAh(push, dst->offset + offset);
@@ -234,14 +230,10 @@ nve4_p2mf_push_linear(struct nouveau_context *nv,
   nouveau_pushbuf_validate(push);

   while (count) {
-      unsigned nr;
+      unsigned nr = MIN2(count, (NV04_PFIFO_MAX_PACKET_LEN - 1));

-      if (!PUSH_SPACE(push, 16))
+      if (!PUSH_SPACE(push, nr + 10))
         break;
-      nr = PUSH_AVAIL(push);
-      assert(nr >= 16);
-      nr = MIN2(count, nr - 8);
-      nr = MIN2(nr, (NV04_PFIFO_MAX_PACKET_LEN - 1));

      BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_DST_ADDRESS_HIGH), 2);
      PUSH_DATAh(push, dst->offset + offset);
@@ -495,11 +487,16 @@ nvc0_miptree_transfer_unmap(struct pipe_context *pctx,
         tx->rect[1].base += tx->nblocksy * tx->base.stride;
      }
      NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_transfers_wr, 1);
+
+      /* Allow the copies above to finish executing before freeing the source */
+      nouveau_fence_work(nvc0->screen->base.fence.current,
+                         nouveau_fence_unref_bo, tx->rect[1].bo);
+   } else {
+      nouveau_bo_ref(NULL, &tx->rect[1].bo);
   }
   if (tx->base.usage & PIPE_TRANSFER_READ)
      NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_transfers_rd, 1);

-   nouveau_bo_ref(NULL, &tx->rect[1].bo);
   pipe_resource_reference(&transfer->resource, NULL);

   FREE(tx);
@@ -566,9 +563,7 @@ nvc0_cb_bo_push(struct nouveau_context *nv,
   PUSH_DATA (push, bo->offset + base);

   while (words) {
-      unsigned nr = PUSH_AVAIL(push);
-      nr = MIN2(nr, words);
-      nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN - 1);
+      unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN - 1);

      PUSH_SPACE(push, nr + 2);
      PUSH_REFN (push, bo, NOUVEAU_BO_WR | domain);
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -300,6 +300,8 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e
        case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
        case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
            return 0;
+        case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+            return 32;
        case PIPE_SHADER_CAP_PREFERRED_IR:
            return PIPE_SHADER_IR_TGSI;
        }
@@ -356,6 +358,8 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e
        case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
        case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
            return 0;
+        case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+            return 32;
        case PIPE_SHADER_CAP_PREFERRED_IR:
            return PIPE_SHADER_IR_TGSI;
        }
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -504,6 +504,12 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
 	case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
 	case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
 		return 0;
+	case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+		/* due to a bug in the shader compiler, some loops hang
+		 * if they are not unrolled, see:
+		 *    https://bugs.freedesktop.org/show_bug.cgi?id=86720
+		 */
+		return 255;
 	}
 	return 0;
 }
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -305,12 +305,11 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
 				data += box->x % R600_MAP_BUFFER_ALIGNMENT;
 				return r600_buffer_get_transfer(ctx, resource, level, usage, box,
 								ptransfer, data, staging, offset);
-			} else {
-				return NULL; /* error, shouldn't occur though */
 			}
+		} else {
+			/* At this point, the buffer is always idle (we checked it above). */
+			usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
 		}
-		/* At this point, the buffer is always idle (we checked it above). */
-		usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
 	}
 	/* Using a staging buffer in GTT for larger reads is much faster. */
 	else if ((usage & PIPE_TRANSFER_READ) &&
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -78,6 +78,9 @@ void r600_draw_rectangle(struct blitter_context *blitter,
 	 * I guess the 4th one is derived from the first 3.
 	 * The vertex specification should match u_blitter's vertex element state. */
 	u_upload_alloc(rctx->uploader, 0, sizeof(float) * 24, &offset, &buf, (void**)&vb);
+	if (!buf)
+		return;
+
 	vb[0] = x1;
 	vb[1] = y1;
 	vb[2] = depth;
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -989,6 +989,11 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,

 			if (usage & PIPE_TRANSFER_READ) {
 				struct pipe_resource *temp = ctx->screen->resource_create(ctx->screen, &resource);
+				if (!temp) {
+					R600_ERR("failed to create a temporary depth texture\n");
+					FREE(trans);
+					return NULL;
+				}

 				r600_copy_region_with_blit(ctx, temp, 0, 0, 0, 0, texture, level, box);
 				rctx->blit_decompress_depth(ctx, (struct r600_texture*)temp, staging_depth,
--- a/src/gallium/drivers/radeon/radeon_llvm_emit.c
+++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c
@@ -25,6 +25,8 @@
 */
 #include "radeon_llvm_emit.h"
 #include "radeon_elf_util.h"
+#include "c11/threads.h"
+#include "gallivm/lp_bld_misc.h"
 #include "util/u_memory.h"
 #include "pipe/p_shader_tokens.h"

@@ -86,30 +88,29 @@ void radeon_llvm_shader_type(LLVMValueRef F, unsigned type)

 static void init_r600_target()
 {
-	static unsigned initialized = 0;
-	if (!initialized) {
+	gallivm_init_llvm_targets();
 #if HAVE_LLVM < 0x0307
-		LLVMInitializeR600TargetInfo();
-		LLVMInitializeR600Target();
-		LLVMInitializeR600TargetMC();
-		LLVMInitializeR600AsmPrinter();
+	LLVMInitializeR600TargetInfo();
+	LLVMInitializeR600Target();
+	LLVMInitializeR600TargetMC();
+	LLVMInitializeR600AsmPrinter();
 #else
-		LLVMInitializeAMDGPUTargetInfo();
-		LLVMInitializeAMDGPUTarget();
-		LLVMInitializeAMDGPUTargetMC();
-		LLVMInitializeAMDGPUAsmPrinter();
+	LLVMInitializeAMDGPUTargetInfo();
+	LLVMInitializeAMDGPUTarget();
+	LLVMInitializeAMDGPUTargetMC();
+	LLVMInitializeAMDGPUAsmPrinter();

 #endif
-		initialized = 1;
-	}
 }

+static once_flag init_r600_target_once_flag = ONCE_FLAG_INIT;
+
 LLVMTargetRef radeon_llvm_get_r600_target(const char *triple)
 {
 	LLVMTargetRef target = NULL;
 	char *err_message = NULL;

-	init_r600_target();
+	call_once(&init_r600_target_once_flag, init_r600_target);

 	if (LLVMGetTargetFromTriple(triple, &target, &err_message)) {
 		fprintf(stderr, "Cannot find target for triple %s ", triple);
--- a/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
+++ b/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
@@ -233,6 +233,9 @@ static void vui(struct rvce_encoder *enc)
 {
 	int i;

+	if (!enc->pic.rate_ctrl.frame_rate_num)
+		return;
+
 	RVCE_BEGIN(0x04000009); // vui
 	RVCE_CS(0x00000000); //aspectRatioInfoPresentFlag
 	RVCE_CS(0x00000000); //aspectRatioInfo.aspectRatioIdc
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -468,7 +468,8 @@ void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuf

 	u_upload_alloc(sctx->b.uploader, 0, size, const_offset,
 		       (struct pipe_resource**)rbuffer, &tmp);
-	util_memcpy_cpu_to_le32(tmp, ptr, size);
+	if (rbuffer)
+		util_memcpy_cpu_to_le32(tmp, ptr, size);
 }

 static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint slot,
@@ -500,6 +501,11 @@ static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint s
 			si_upload_const_buffer(sctx,
 					       (struct r600_resource**)&buffer, input->user_buffer,
 					       input->buffer_size, &buffer_offset);
+			if (!buffer) {
+				/* Just unbind on failure. */
+				si_set_constant_buffer(ctx, shader, slot, NULL);
+				return;
+			}
 			va = r600_resource(buffer)->gpu_address + buffer_offset;
 		} else {
 			pipe_resource_reference(&buffer, input->buffer);
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -170,6 +170,8 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void *
 	if (sctx->b.chip_class == CIK) {
 		sctx->null_const_buf.buffer = pipe_buffer_create(screen, PIPE_BIND_CONSTANT_BUFFER,
 								 PIPE_USAGE_DEFAULT, 16);
+		if (!sctx->null_const_buf.buffer)
+			goto fail;
 		sctx->null_const_buf.buffer_size = sctx->null_const_buf.buffer->width0;

 		for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
@@ -487,6 +489,8 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
 	case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
 	case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
 		return 1;
+	case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+		return 32;
 	}
 	return 0;
 }
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3829,11 +3829,14 @@ int si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader)
 {
 	const struct radeon_shader_binary *binary = &shader->binary;
 	unsigned i;
+	int r;
 	bool dump  = r600_can_dump_shader(&sscreen->b,
 		shader->selector ? shader->selector->tokens : NULL);

 	si_shader_binary_read_config(sscreen, shader, 0);
-	si_shader_binary_upload(sscreen, shader);
+	r = si_shader_binary_upload(sscreen, shader);
+	if (r)
+		return r;

 	if (dump) {
 		if (!(sscreen->b.debug_flags & DBG_NO_ASM)) {
@@ -4198,8 +4201,10 @@ out:

 void si_shader_destroy(struct pipe_context *ctx, struct si_shader *shader)
 {
-	if (shader->gs_copy_shader)
+	if (shader->gs_copy_shader) {
 		si_shader_destroy(ctx, shader->gs_copy_shader);
+		FREE(shader->gs_copy_shader);
+	}

 	if (shader->scratch_bo)
 		r600_resource_reference(&shader->scratch_bo, NULL);
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -274,7 +274,7 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
 			      unsigned force_level);

 /* si_state_shader.c */
-void si_update_shaders(struct si_context *sctx);
+bool si_update_shaders(struct si_context *sctx);
 void si_init_shader_functions(struct si_context *sctx);

 /* si_state_draw.c */
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -760,8 +760,8 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 	else
 		sctx->current_rast_prim = info->mode;

-	si_update_shaders(sctx);
-	if (!si_upload_shader_descriptors(sctx))
+	if (!si_update_shaders(sctx) ||
+	    !si_upload_shader_descriptors(sctx))
 		return;

 	if (info->indexed) {
@@ -783,6 +783,10 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)

 			u_upload_alloc(sctx->b.uploader, start_offset, count * 2,
 				       &out_offset, &out_buffer, &ptr);
+			if (!out_buffer) {
+				pipe_resource_reference(&ib.buffer, NULL);
+				return;
+			}

 			util_shorten_ubyte_elts_to_userptr(&sctx->b.b, &ib, 0,
 							   ib.offset + start_offset,
@@ -803,6 +807,8 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 			u_upload_data(sctx->b.uploader, start_offset, count * ib.index_size,
 				      (char*)ib.user_buffer + start_offset,
 				      &ib.offset, &ib.buffer);
+			if (!ib.buffer)
+				return;
 			/* info->start will be added by the drawing code */
 			ib.offset -= start_offset;
 		}
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -665,8 +665,16 @@ static void *si_create_shader_state(struct pipe_context *ctx,
 	struct si_shader_selector *sel = CALLOC_STRUCT(si_shader_selector);
 	int i;

+	if (!sel)
+		return NULL;
+
 	sel->type = pipe_shader_type;
 	sel->tokens = tgsi_dup_tokens(state->tokens);
+	if (!sel->tokens) {
+		FREE(sel);
+		return NULL;
+	}
+
 	sel->so = state->stream_output;
 	tgsi_scan_shader(state->tokens, &sel->info);
 	p_atomic_inc(&sscreen->b.num_shaders_created);
@@ -725,7 +733,12 @@ static void *si_create_shader_state(struct pipe_context *ctx,
 	}

 	if (sscreen->b.debug_flags & DBG_PRECOMPILE)
-		si_shader_select(ctx, sel);
+		if (si_shader_select(ctx, sel)) {
+			fprintf(stderr, "radeonsi: can't create a shader\n");
+			tgsi_free_tokens(sel->tokens);
+			FREE(sel);
+			return NULL;
+		}

 	return sel;
 }
@@ -1031,11 +1044,23 @@ static void si_init_gs_rings(struct si_context *sctx)
 	assert(!sctx->gs_rings);
 	sctx->gs_rings = CALLOC_STRUCT(si_pm4_state);

+	if (!sctx->gs_rings)
+		return;
+
 	sctx->esgs_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
 				       PIPE_USAGE_DEFAULT, esgs_ring_size);
+	if (!sctx->esgs_ring) {
+		FREE(sctx->gs_rings);
+		return;
+	}

 	sctx->gsvs_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
 					     PIPE_USAGE_DEFAULT, gsvs_ring_size);
+	if (!sctx->gsvs_ring) {
+		pipe_resource_reference(&sctx->esgs_ring, NULL);
+		FREE(sctx->gs_rings);
+		return;
+	}

 	if (sctx->b.chip_class >= CIK) {
 		if (sctx->b.chip_class >= VI) {
@@ -1094,14 +1119,16 @@ static void si_update_gs_rings(struct si_context *sctx)

 }
 /**
- * @returns 1 if \p sel has been updated to use a new scratch buffer and 0
- *          otherwise.
+ * @returns 1 if \p sel has been updated to use a new scratch buffer
+ *          0 if not
+ *          < 0 if there was a failure
 */
-static unsigned si_update_scratch_buffer(struct si_context *sctx,
+static int si_update_scratch_buffer(struct si_context *sctx,
 				    struct si_shader_selector *sel)
 {
 	struct si_shader *shader;
 	uint64_t scratch_va = sctx->scratch_buffer->gpu_address;
+	int r;

 	if (!sel)
 		return 0;
@@ -1122,7 +1149,9 @@ static unsigned si_update_scratch_buffer(struct si_context *sctx,
 	si_shader_apply_scratch_relocs(sctx, shader, scratch_va);

 	/* Replace the shader bo with a new bo that has the relocs applied. */
-	si_shader_binary_upload(sctx->screen, shader);
+	r = si_shader_binary_upload(sctx->screen, shader);
+	if (r)
+		return r;

 	/* Update the shader state to use the new shader bo. */
 	si_shader_init_pm4_state(shader);
@@ -1161,7 +1190,7 @@ static unsigned si_get_max_scratch_bytes_per_wave(struct si_context *sctx)
 	return bytes;
 }

-static void si_update_spi_tmpring_size(struct si_context *sctx)
+static bool si_update_spi_tmpring_size(struct si_context *sctx)
 {
 	unsigned current_scratch_buffer_size =
 		si_get_current_scratch_buffer_size(sctx);
@@ -1169,6 +1198,7 @@ static void si_update_spi_tmpring_size(struct si_context *sctx)
 		si_get_max_scratch_bytes_per_wave(sctx);
 	unsigned scratch_needed_size = scratch_bytes_per_wave *
 		sctx->scratch_waves;
+	int r;

 	if (scratch_needed_size > 0) {

@@ -1181,6 +1211,9 @@ static void si_update_spi_tmpring_size(struct si_context *sctx)
 			sctx->scratch_buffer =
 					si_resource_create_custom(&sctx->screen->b.b,
 	                                PIPE_USAGE_DEFAULT, scratch_needed_size);
+			if (!sctx->scratch_buffer)
+				return false;
+			sctx->emit_scratch_reloc = true;
 		}

 		/* Update the shaders, so they are using the latest scratch.  The
@@ -1188,31 +1221,57 @@ static void si_update_spi_tmpring_size(struct si_context *sctx)
 		 * last used, so we still need to try to update them, even if
 		 * they require scratch buffers smaller than the current size.
 		 */
-		if (si_update_scratch_buffer(sctx, sctx->ps_shader))
+		r = si_update_scratch_buffer(sctx, sctx->ps_shader);
+		if (r < 0)
+			return false;
+		if (r == 1)
 			si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4);
-		if (si_update_scratch_buffer(sctx, sctx->gs_shader))
+
+		r = si_update_scratch_buffer(sctx, sctx->gs_shader);
+		if (r < 0)
+			return false;
+		if (r == 1)
 			si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4);
-		if (si_update_scratch_buffer(sctx, sctx->tcs_shader))
+
+		r = si_update_scratch_buffer(sctx, sctx->tcs_shader);
+		if (r < 0)
+			return false;
+		if (r == 1)
 			si_pm4_bind_state(sctx, hs, sctx->tcs_shader->current->pm4);

 		/* VS can be bound as LS, ES, or VS. */
 		if (sctx->tes_shader) {
-			if (si_update_scratch_buffer(sctx, sctx->vs_shader))
+			r = si_update_scratch_buffer(sctx, sctx->vs_shader);
+			if (r < 0)
+				return false;
+			if (r == 1)
 				si_pm4_bind_state(sctx, ls, sctx->vs_shader->current->pm4);
 		} else if (sctx->gs_shader) {
-			if (si_update_scratch_buffer(sctx, sctx->vs_shader))
+			r = si_update_scratch_buffer(sctx, sctx->vs_shader);
+			if (r < 0)
+				return false;
+			if (r == 1)
 				si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4);
 		} else {
-			if (si_update_scratch_buffer(sctx, sctx->vs_shader))
+			r = si_update_scratch_buffer(sctx, sctx->vs_shader);
+			if (r < 0)
+				return false;
+			if (r == 1)
 				si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
 		}

 		/* TES can be bound as ES or VS. */
 		if (sctx->gs_shader) {
-			if (si_update_scratch_buffer(sctx, sctx->tes_shader))
+			r = si_update_scratch_buffer(sctx, sctx->tes_shader);
+			if (r < 0)
+				return false;
+			if (r == 1)
 				si_pm4_bind_state(sctx, es, sctx->tes_shader->current->pm4);
 		} else {
-			if (si_update_scratch_buffer(sctx, sctx->tes_shader))
+			r = si_update_scratch_buffer(sctx, sctx->tes_shader);
+			if (r < 0)
+				return false;
+			if (r == 1)
 				si_pm4_bind_state(sctx, vs, sctx->tes_shader->current->pm4);
 		}
 	}
@@ -1223,6 +1282,7 @@ static void si_update_spi_tmpring_size(struct si_context *sctx)

 	sctx->spi_tmpring_size = S_0286E8_WAVES(sctx->scratch_waves) |
 				S_0286E8_WAVESIZE(scratch_bytes_per_wave >> 10);
+	return true;
 }

 static void si_init_tess_factor_ring(struct si_context *sctx)
@@ -1230,11 +1290,20 @@ static void si_init_tess_factor_ring(struct si_context *sctx)
 	assert(!sctx->tf_state);
 	sctx->tf_state = CALLOC_STRUCT(si_pm4_state);

+	if (!sctx->tf_state)
+		return;
+
 	sctx->tf_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
 					   PIPE_USAGE_DEFAULT,
 					   32768 * sctx->screen->b.info.max_se);
+	if (!sctx->tf_ring) {
+		FREE(sctx->tf_state);
+		return;
+	}
+
 	sctx->b.clear_buffer(&sctx->b.b, sctx->tf_ring, 0,
 			     sctx->tf_ring->width0, fui(0), false);
+
 	assert(((sctx->tf_ring->width0 / 4) & C_030938_SIZE) == 0);

 	if (sctx->b.chip_class >= CIK) {
@@ -1290,7 +1359,6 @@ static void si_generate_fixed_func_tcs(struct si_context *sctx)

 	sctx->fixed_func_tcs_shader =
 		ureg_create_shader_and_destroy(ureg, &sctx->b.b);
-	assert(sctx->fixed_func_tcs_shader);
 }

 static void si_update_vgt_shader_config(struct si_context *sctx)
@@ -1338,32 +1406,49 @@ static void si_update_so(struct si_context *sctx, struct si_shader_selector *sha
 	sctx->b.streamout.stride_in_dw = shader->so.stride;
 }

-void si_update_shaders(struct si_context *sctx)
+bool si_update_shaders(struct si_context *sctx)
 {
 	struct pipe_context *ctx = (struct pipe_context*)sctx;
 	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
+	int r;

 	/* Update stages before GS. */
 	if (sctx->tes_shader) {
-		if (!sctx->tf_state)
+		if (!sctx->tf_state) {
 			si_init_tess_factor_ring(sctx);
+			if (!sctx->tf_state)
+				return false;
+		}

 		/* VS as LS */
-		si_shader_select(ctx, sctx->vs_shader);
+		r = si_shader_select(ctx, sctx->vs_shader);
+		if (r)
+			return false;
 		si_pm4_bind_state(sctx, ls, sctx->vs_shader->current->pm4);

 		if (sctx->tcs_shader) {
-			si_shader_select(ctx, sctx->tcs_shader);
+			r = si_shader_select(ctx, sctx->tcs_shader);
+			if (r)
+				return false;
 			si_pm4_bind_state(sctx, hs, sctx->tcs_shader->current->pm4);
 		} else {
-			if (!sctx->fixed_func_tcs_shader)
+			if (!sctx->fixed_func_tcs_shader) {
 				si_generate_fixed_func_tcs(sctx);
-			si_shader_select(ctx, sctx->fixed_func_tcs_shader);
+				if (!sctx->fixed_func_tcs_shader)
+					return false;
+			}
+
+			r = si_shader_select(ctx, sctx->fixed_func_tcs_shader);
+			if (r)
+				return false;
 			si_pm4_bind_state(sctx, hs,
 					  sctx->fixed_func_tcs_shader->current->pm4);
 		}

-		si_shader_select(ctx, sctx->tes_shader);
+		r = si_shader_select(ctx, sctx->tes_shader);
+		if (r)
+			return false;
+
 		if (sctx->gs_shader) {
 			/* TES as ES */
 			si_pm4_bind_state(sctx, es, sctx->tes_shader->current->pm4);
@@ -1374,24 +1459,33 @@ void si_update_shaders(struct si_context *sctx)
 		}
 	} else if (sctx->gs_shader) {
 		/* VS as ES */
-		si_shader_select(ctx, sctx->vs_shader);
+		r = si_shader_select(ctx, sctx->vs_shader);
+		if (r)
+			return false;
 		si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4);
 	} else {
 		/* VS as VS */
-		si_shader_select(ctx, sctx->vs_shader);
+		r = si_shader_select(ctx, sctx->vs_shader);
+		if (r)
+			return false;
 		si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
 		si_update_so(sctx, sctx->vs_shader);
 	}

 	/* Update GS. */
 	if (sctx->gs_shader) {
-		si_shader_select(ctx, sctx->gs_shader);
+		r = si_shader_select(ctx, sctx->gs_shader);
+		if (r)
+			return false;
 		si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4);
 		si_pm4_bind_state(sctx, vs, sctx->gs_shader->current->gs_copy_shader->pm4);
 		si_update_so(sctx, sctx->gs_shader);

-		if (!sctx->gs_rings)
+		if (!sctx->gs_rings) {
 			si_init_gs_rings(sctx);
+			if (!sctx->gs_rings)
+				return false;
+		}

 		if (sctx->emitted.named.gs_rings != sctx->gs_rings)
 			sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
@@ -1406,18 +1500,9 @@ void si_update_shaders(struct si_context *sctx)

 	si_update_vgt_shader_config(sctx);

-	si_shader_select(ctx, sctx->ps_shader);
-
-	if (!sctx->ps_shader->current) {
-		struct si_shader_selector *sel;
-
-		/* use a dummy shader if compiling the shader (variant) failed */
-		si_make_dummy_ps(sctx);
-		sel = sctx->dummy_pixel_shader;
-		si_shader_select(ctx, sel);
-		sctx->ps_shader->current = sel->current;
-	}
-
+	r = si_shader_select(ctx, sctx->ps_shader);
+	if (r)
+		return false;
 	si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4);

 	if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
@@ -1428,9 +1513,14 @@ void si_update_shaders(struct si_context *sctx)
 		si_update_spi_map(sctx);
 	}

-	if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
-	    si_pm4_state_changed(sctx, gs)) {
-		si_update_spi_tmpring_size(sctx);
+	if (si_pm4_state_changed(sctx, ls) ||
+	    si_pm4_state_changed(sctx, hs) ||
+	    si_pm4_state_changed(sctx, es) ||
+	    si_pm4_state_changed(sctx, gs) ||
+	    si_pm4_state_changed(sctx, vs) ||
+	    si_pm4_state_changed(sctx, ps)) {
+		if (!si_update_spi_tmpring_size(sctx))
+			return false;
 	}

 	if (sctx->ps_db_shader_control != sctx->ps_shader->current->db_shader_control) {
@@ -1445,6 +1535,7 @@ void si_update_shaders(struct si_context *sctx)
 		if (sctx->b.chip_class == SI)
 			si_mark_atom_dirty(sctx, &sctx->db_render_state);
 	}
+	return true;
 }

 void si_init_shader_functions(struct si_context *sctx)
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -383,6 +383,8 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
      case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
      case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
         return 0;
+      case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+         return 32;
      }
      /* If we get here, we failed to handle a cap above */
      debug_printf("Unexpected fragment shader query %u\n", param);
@@ -441,6 +443,8 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
      case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
      case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
         return 0;
+      case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+         return 32;
      }
      /* If we get here, we failed to handle a cap above */
      debug_printf("Unexpected vertex shader query %u\n", param);
--- a/src/gallium/drivers/vc4/vc4_screen.c
+++ b/src/gallium/drivers/vc4/vc4_screen.c
@@ -334,6 +334,8 @@ vc4_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
                return VC4_MAX_TEXTURE_SAMPLERS;
        case PIPE_SHADER_CAP_PREFERRED_IR:
                return PIPE_SHADER_IR_TGSI;
+	case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+		return 32;
        default:
                fprintf(stderr, "unknown shader param %d\n", param);
                return 0;
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -674,7 +674,8 @@ enum pipe_shader_cap
   PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED, /* all rounding modes */
   PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED,
   PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED,
-   PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE
+   PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE,
+   PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT,
 };

 /**
--- a/src/gallium/state_trackers/dri/dri2.c
+++ b/src/gallium/state_trackers/dri/dri2.c
@@ -188,10 +188,10 @@ dri2_drawable_get_buffers(struct dri_drawable *drawable,
       * may occur as the stvis->color_format.
       */
      switch(format) {
-      case PIPE_FORMAT_B8G8R8A8_UNORM:
+      case PIPE_FORMAT_BGRA8888_UNORM:
 	 depth = 32;
 	 break;
-      case PIPE_FORMAT_B8G8R8X8_UNORM:
+      case PIPE_FORMAT_BGRX8888_UNORM:
 	 depth = 24;
 	 break;
      case PIPE_FORMAT_B5G6R5_UNORM:
@@ -261,13 +261,13 @@ dri_image_drawable_get_buffers(struct dri_drawable *drawable,
      case PIPE_FORMAT_B5G6R5_UNORM:
         image_format = __DRI_IMAGE_FORMAT_RGB565;
         break;
-      case PIPE_FORMAT_B8G8R8X8_UNORM:
+      case PIPE_FORMAT_BGRX8888_UNORM:
         image_format = __DRI_IMAGE_FORMAT_XRGB8888;
         break;
-      case PIPE_FORMAT_B8G8R8A8_UNORM:
+      case PIPE_FORMAT_BGRA8888_UNORM:
         image_format = __DRI_IMAGE_FORMAT_ARGB8888;
         break;
-      case PIPE_FORMAT_R8G8B8A8_UNORM:
+      case PIPE_FORMAT_RGBA8888_UNORM:
         image_format = __DRI_IMAGE_FORMAT_ABGR8888;
         break;
      default:
@@ -314,10 +314,10 @@ dri2_allocate_buffer(__DRIscreen *sPriv,

   switch (format) {
      case 32:
-         pf = PIPE_FORMAT_B8G8R8A8_UNORM;
+         pf = PIPE_FORMAT_BGRA8888_UNORM;
         break;
      case 24:
-         pf = PIPE_FORMAT_B8G8R8X8_UNORM;
+         pf = PIPE_FORMAT_BGRX8888_UNORM;
         break;
      case 16:
         pf = PIPE_FORMAT_Z16_UNORM;
@@ -724,13 +724,13 @@ dri2_create_image_from_winsys(__DRIscreen *_screen,
      pf = PIPE_FORMAT_B5G6R5_UNORM;
      break;
   case __DRI_IMAGE_FORMAT_XRGB8888:
-      pf = PIPE_FORMAT_B8G8R8X8_UNORM;
+      pf = PIPE_FORMAT_BGRX8888_UNORM;
      break;
   case __DRI_IMAGE_FORMAT_ARGB8888:
-      pf = PIPE_FORMAT_B8G8R8A8_UNORM;
+      pf = PIPE_FORMAT_BGRA8888_UNORM;
      break;
   case __DRI_IMAGE_FORMAT_ABGR8888:
-      pf = PIPE_FORMAT_R8G8B8A8_UNORM;
+      pf = PIPE_FORMAT_RGBA8888_UNORM;
      break;
   default:
      pf = PIPE_FORMAT_NONE;
@@ -845,13 +845,13 @@ dri2_create_image(__DRIscreen *_screen,
      pf = PIPE_FORMAT_B5G6R5_UNORM;
      break;
   case __DRI_IMAGE_FORMAT_XRGB8888:
-      pf = PIPE_FORMAT_B8G8R8X8_UNORM;
+      pf = PIPE_FORMAT_BGRX8888_UNORM;
      break;
   case __DRI_IMAGE_FORMAT_ARGB8888:
-      pf = PIPE_FORMAT_B8G8R8A8_UNORM;
+      pf = PIPE_FORMAT_BGRA8888_UNORM;
      break;
   case __DRI_IMAGE_FORMAT_ABGR8888:
-      pf = PIPE_FORMAT_R8G8B8A8_UNORM;
+      pf = PIPE_FORMAT_RGBA8888_UNORM;
      break;
   default:
      pf = PIPE_FORMAT_NONE;
@@ -1293,6 +1293,7 @@ dri2_load_opencl_interop(struct dri_screen *screen)
 }

 struct dri2_fence {
+   struct dri_screen *driscreen;
   struct pipe_fence_handle *pipe_fence;
   void *cl_event;
 };
@@ -1313,6 +1314,7 @@ dri2_create_fence(__DRIcontext *_ctx)
      return NULL;
   }

+   fence->driscreen = dri_screen(_ctx->driScreenPriv);
   return fence;
 }

@@ -1336,6 +1338,7 @@ dri2_get_fence_from_cl_event(__DRIscreen *_screen, intptr_t cl_event)
      return NULL;
   }

+   fence->driscreen = driscreen;
   return fence;
 }

@@ -1360,9 +1363,9 @@ static GLboolean
 dri2_client_wait_sync(__DRIcontext *_ctx, void *_fence, unsigned flags,
                      uint64_t timeout)
 {
-   struct dri_screen *driscreen = dri_screen(_ctx->driScreenPriv);
-   struct pipe_screen *screen = driscreen->base.screen;
   struct dri2_fence *fence = (struct dri2_fence*)_fence;
+   struct dri_screen *driscreen = fence->driscreen;
+   struct pipe_screen *screen = driscreen->base.screen;

   /* No need to flush. The context was flushed when the fence was created. */

--- a/src/gallium/state_trackers/dri/dri_drawable.c
+++ b/src/gallium/state_trackers/dri/dri_drawable.c
@@ -231,11 +231,11 @@ dri_set_tex_buffer2(__DRIcontext *pDRICtx, GLint target,
      if (format == __DRI_TEXTURE_FORMAT_RGB)  {
         /* only need to cover the formats recognized by dri_fill_st_visual */
         switch (internal_format) {
-         case PIPE_FORMAT_B8G8R8A8_UNORM:
-            internal_format = PIPE_FORMAT_B8G8R8X8_UNORM;
+         case PIPE_FORMAT_BGRA8888_UNORM:
+            internal_format = PIPE_FORMAT_BGRX8888_UNORM;
            break;
-         case PIPE_FORMAT_A8R8G8B8_UNORM:
-            internal_format = PIPE_FORMAT_X8R8G8B8_UNORM;
+         case PIPE_FORMAT_ARGB8888_UNORM:
+            internal_format = PIPE_FORMAT_XRGB8888_UNORM;
            break;
         default:
            break;
--- a/src/gallium/state_trackers/omx/vid_dec_h264.c
+++ b/src/gallium/state_trackers/omx/vid_dec_h264.c
@@ -753,10 +753,14 @@ static void slice_header(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp,
         priv->codec_data.h264.delta_pic_order_cnt_bottom = delta_pic_order_cnt_bottom;
      }

-      priv->picture.h264.field_order_cnt[0] = pic_order_cnt_msb + pic_order_cnt_lsb;
-      priv->picture.h264.field_order_cnt[1] = pic_order_cnt_msb + pic_order_cnt_lsb;
-      if (!priv->picture.h264.field_pic_flag)
-         priv->picture.h264.field_order_cnt[1] += priv->codec_data.h264.delta_pic_order_cnt_bottom;
+      if (!priv->picture.h264.field_pic_flag) {
+         priv->picture.h264.field_order_cnt[0] = pic_order_cnt_msb + pic_order_cnt_lsb;
+         priv->picture.h264.field_order_cnt[1] = priv->picture.h264.field_order_cnt [0] +
+                                          priv->codec_data.h264.delta_pic_order_cnt_bottom;
+      } else if (!priv->picture.h264.bottom_field_flag)
+         priv->picture.h264.field_order_cnt[0] = pic_order_cnt_msb + pic_order_cnt_lsb;
+      else
+         priv->picture.h264.field_order_cnt[1] = pic_order_cnt_msb + pic_order_cnt_lsb;

   } else if (sps->pic_order_cnt_type == 1) {
      unsigned MaxFrameNum = 1 << (sps->log2_max_frame_num_minus4 + 4);
--- a/src/gallium/state_trackers/va/image.c
+++ b/src/gallium/state_trackers/va/image.c
@@ -116,7 +116,7 @@ vlVaCreateImage(VADriverContextP ctx, VAImageFormat *format, int width, int heig
   img->width = width;
   img->height = height;
   w = align(width, 2);
-   h = align(width, 2);
+   h = align(height, 2);

   switch (format->fourcc) {
   case VA_FOURCC('N','V','1','2'):
--- a/src/gallium/targets/opencl/Makefile.am
+++ b/src/gallium/targets/opencl/Makefile.am
@@ -35,7 +35,8 @@ lib@OPENCL_LIBNAME@_la_LIBADD = \
 	-lclangEdit \
 	-lclangLex \
 	-lclangBasic \
-	$(LLVM_LIBS)
+	$(LLVM_LIBS) \
+	$(PTHREAD_LIBS)

 nodist_EXTRA_lib@OPENCL_LIBNAME@_la_SOURCES = dummy.cpp
 lib@OPENCL_LIBNAME@_la_SOURCES =
--- a/src/glsl/ast.h
+++ b/src/glsl/ast.h
@@ -62,6 +62,8 @@ public:
   virtual ir_rvalue *hir(exec_list *instructions,
 			  struct _mesa_glsl_parse_state *state);

+   virtual bool has_sequence_subexpression() const;
+
   /**
    * Retrieve the source location of an AST node
    *
@@ -221,6 +223,8 @@ public:
   virtual void hir_no_rvalue(exec_list *instructions,
                              struct _mesa_glsl_parse_state *state);

+   virtual bool has_sequence_subexpression() const;
+
   ir_rvalue *do_hir(exec_list *instructions,
                     struct _mesa_glsl_parse_state *state,
                     bool needs_rvalue);
@@ -299,6 +303,8 @@ public:
   virtual void hir_no_rvalue(exec_list *instructions,
                              struct _mesa_glsl_parse_state *state);

+   virtual bool has_sequence_subexpression() const;
+
 private:
   /**
    * Is this function call actually a constructor?
--- a/src/glsl/ast_function.cpp
+++ b/src/glsl/ast_function.cpp
@@ -395,13 +395,54 @@ generate_call(exec_list *instructions, ir_function_signature *sig,
      }
   }

-   /* If the function call is a constant expression, don't generate any
-    * instructions; just generate an ir_constant.
+   /* Section 4.3.2 (Const) of the GLSL 1.10.59 spec says:
    *
-    * Function calls were first allowed to be constant expressions in GLSL
-    * 1.20 and GLSL ES 3.00.
+    *     "Initializers for const declarations must be formed from literal
+    *     values, other const variables (not including function call
+    *     paramaters), or expressions of these.
+    *
+    *     Constructors may be used in such expressions, but function calls may
+    *     not."
+    *
+    * Section 4.3.3 (Constant Expressions) of the GLSL 1.20.8 spec says:
+    *
+    *     "A constant expression is one of
+    *
+    *         ...
+    *
+    *         - a built-in function call whose arguments are all constant
+    *           expressions, with the exception of the texture lookup
+    *           functions, the noise functions, and ftransform. The built-in
+    *           functions dFdx, dFdy, and fwidth must return 0 when evaluated
+    *           inside an initializer with an argument that is a constant
+    *           expression."
+    *
+    * Section 5.10 (Constant Expressions) of the GLSL ES 1.00.17 spec says:
+    *
+    *     "A constant expression is one of
+    *
+    *         ...
+    *
+    *         - a built-in function call whose arguments are all constant
+    *           expressions, with the exception of the texture lookup
+    *           functions."
+    *
+    * Section 4.3.3 (Constant Expressions) of the GLSL ES 3.00.4 spec says:
+    *
+    *     "A constant expression is one of
+    *
+    *         ...
+    *
+    *         - a built-in function call whose arguments are all constant
+    *           expressions, with the exception of the texture lookup
+    *           functions.  The built-in functions dFdx, dFdy, and fwidth must
+    *           return 0 when evaluated inside an initializer with an argument
+    *           that is a constant expression."
+    *
+    * If the function call is a constant expression, don't generate any
+    * instructions; just generate an ir_constant.
    */
-   if (state->is_version(120, 300)) {
+   if (state->is_version(120, 100)) {
      ir_constant *value = sig->constant_expression_value(actual_parameters, NULL);
      if (value != NULL) {
 	 return value;
@@ -1911,6 +1952,17 @@ ast_function_expression::hir(exec_list *instructions,
   unreachable("not reached");
 }

+bool
+ast_function_expression::has_sequence_subexpression() const
+{
+   foreach_list_typed(const ast_node, ast, link, &this->expressions) {
+      if (ast->has_sequence_subexpression())
+         return true;
+   }
+
+   return false;
+}
+
 ir_rvalue *
 ast_aggregate_initializer::hir(exec_list *instructions,
                               struct _mesa_glsl_parse_state *state)
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -939,6 +939,12 @@ ast_node::hir(exec_list *instructions, struct _mesa_glsl_parse_state *state)
   return NULL;
 }

+bool
+ast_node::has_sequence_subexpression() const
+{
+   return false;
+}
+
 void
 ast_function_expression::hir_no_rvalue(exec_list *instructions,
                                       struct _mesa_glsl_parse_state *state)
@@ -1850,6 +1856,80 @@ ast_expression::do_hir(exec_list *instructions,
   return result;
 }

+bool
+ast_expression::has_sequence_subexpression() const
+{
+   switch (this->oper) {
+   case ast_plus:
+   case ast_neg:
+   case ast_bit_not:
+   case ast_logic_not:
+   case ast_pre_inc:
+   case ast_pre_dec:
+   case ast_post_inc:
+   case ast_post_dec:
+      return this->subexpressions[0]->has_sequence_subexpression();
+
+   case ast_assign:
+   case ast_add:
+   case ast_sub:
+   case ast_mul:
+   case ast_div:
+   case ast_mod:
+   case ast_lshift:
+   case ast_rshift:
+   case ast_less:
+   case ast_greater:
+   case ast_lequal:
+   case ast_gequal:
+   case ast_nequal:
+   case ast_equal:
+   case ast_bit_and:
+   case ast_bit_xor:
+   case ast_bit_or:
+   case ast_logic_and:
+   case ast_logic_or:
+   case ast_logic_xor:
+   case ast_array_index:
+   case ast_mul_assign:
+   case ast_div_assign:
+   case ast_add_assign:
+   case ast_sub_assign:
+   case ast_mod_assign:
+   case ast_ls_assign:
+   case ast_rs_assign:
+   case ast_and_assign:
+   case ast_xor_assign:
+   case ast_or_assign:
+      return this->subexpressions[0]->has_sequence_subexpression() ||
+             this->subexpressions[1]->has_sequence_subexpression();
+
+   case ast_conditional:
+      return this->subexpressions[0]->has_sequence_subexpression() ||
+             this->subexpressions[1]->has_sequence_subexpression() ||
+             this->subexpressions[2]->has_sequence_subexpression();
+
+   case ast_sequence:
+      return true;
+
+   case ast_field_selection:
+   case ast_identifier:
+   case ast_int_constant:
+   case ast_uint_constant:
+   case ast_float_constant:
+   case ast_bool_constant:
+   case ast_double_constant:
+      return false;
+
+   case ast_aggregate:
+      unreachable("ast_aggregate: Should never get here.");
+
+   case ast_function_call:
+      unreachable("should be handled by ast_function_expression::hir");
+   }
+
+   return false;
+}

 ir_rvalue *
 ast_expression_statement::hir(exec_list *instructions,
@@ -3146,16 +3226,72 @@ process_initializer(ir_variable *var, ast_declaration *decl,

   /* Calculate the constant value if this is a const or uniform
    * declaration.
+    *
+    * Section 4.3 (Storage Qualifiers) of the GLSL ES 1.00.17 spec says:
+    *
+    *     "Declarations of globals without a storage qualifier, or with
+    *     just the const qualifier, may include initializers, in which case
+    *     they will be initialized before the first line of main() is
+    *     executed.  Such initializers must be a constant expression."
+    *
+    * The same section of the GLSL ES 3.00.4 spec has similar language.
    */
   if (type->qualifier.flags.q.constant
-       || type->qualifier.flags.q.uniform) {
+       || type->qualifier.flags.q.uniform
+       || (state->es_shader && state->current_function == NULL)) {
      ir_rvalue *new_rhs = validate_assignment(state, initializer_loc,
                                               lhs, rhs, true);
      if (new_rhs != NULL) {
         rhs = new_rhs;

+         /* Section 4.3.3 (Constant Expressions) of the GLSL ES 3.00.4 spec
+          * says:
+          *
+          *     "A constant expression is one of
+          *
+          *        ...
+          *
+          *        - an expression formed by an operator on operands that are
+          *          all constant expressions, including getting an element of
+          *          a constant array, or a field of a constant structure, or
+          *          components of a constant vector.  However, the sequence
+          *          operator ( , ) and the assignment operators ( =, +=, ...)
+          *          are not included in the operators that can create a
+          *          constant expression."
+          *
+          * Section 12.43 (Sequence operator and constant expressions) says:
+          *
+          *     "Should the following construct be allowed?
+          *
+          *         float a[2,3];
+          *
+          *     The expression within the brackets uses the sequence operator
+          *     (',') and returns the integer 3 so the construct is declaring
+          *     a single-dimensional array of size 3.  In some languages, the
+          *     construct declares a two-dimensional array.  It would be
+          *     preferable to make this construct illegal to avoid confusion.
+          *
+          *     One possibility is to change the definition of the sequence
+          *     operator so that it does not return a constant-expression and
+          *     hence cannot be used to declare an array size.
+          *
+          *     RESOLUTION: The result of a sequence operator is not a
+          *     constant-expression."
+          *
+          * Section 4.3.3 (Constant Expressions) of the GLSL 4.30.9 spec
+          * contains language almost identical to the section 4.3.3 in the
+          * GLSL ES 3.00.4 spec.  This is a new limitation for these GLSL
+          * versions.
+          */
         ir_constant *constant_value = rhs->constant_expression_value();
-         if (!constant_value) {
+         if (!constant_value ||
+             (state->is_version(430, 300) &&
+              decl->initializer->has_sequence_subexpression())) {
+            const char *const variable_mode =
+               (type->qualifier.flags.q.constant)
+               ? "const"
+               : ((type->qualifier.flags.q.uniform) ? "uniform" : "global");
+
            /* If ARB_shading_language_420pack is enabled, initializers of
             * const-qualified local variables do not have to be constant
             * expressions. Const-qualified global variables must still be
@@ -3166,22 +3302,24 @@ process_initializer(ir_variable *var, ast_declaration *decl,
               _mesa_glsl_error(& initializer_loc, state,
                                "initializer of %s variable `%s' must be a "
                                "constant expression",
-                                (type->qualifier.flags.q.constant)
-                                ? "const" : "uniform",
+                                variable_mode,
                                decl->identifier);
               if (var->type->is_numeric()) {
                  /* Reduce cascading errors. */
-                  var->constant_value = ir_constant::zero(state, var->type);
+                  var->constant_value = type->qualifier.flags.q.constant
+                     ? ir_constant::zero(state, var->type) : NULL;
               }
            }
         } else {
            rhs = constant_value;
-            var->constant_value = constant_value;
+            var->constant_value = type->qualifier.flags.q.constant
+               ? constant_value : NULL;
         }
      } else {
         if (var->type->is_numeric()) {
            /* Reduce cascading errors. */
-            var->constant_value = ir_constant::zero(state, var->type);
+            var->constant_value = type->qualifier.flags.q.constant
+               ? ir_constant::zero(state, var->type) : NULL;
         }
      }
   }
--- a/src/glsl/builtin_variables.cpp
+++ b/src/glsl/builtin_variables.cpp
@@ -673,14 +673,10 @@ builtin_variable_generator::generate_constants()
      if (!state->es_shader) {
         add_const("gl_MaxGeometryAtomicCounters",
                   state->Const.MaxGeometryAtomicCounters);
-
-	 if (state->is_version(400, 0) ||
-             state->ARB_tessellation_shader_enable) {
-		 add_const("gl_MaxTessControlAtomicCounters",
-                           state->Const.MaxTessControlAtomicCounters);
-		 add_const("gl_MaxTessEvaluationAtomicCounters",
-                           state->Const.MaxTessEvaluationAtomicCounters);
-	 }
+         add_const("gl_MaxTessControlAtomicCounters",
+                   state->Const.MaxTessControlAtomicCounters);
+         add_const("gl_MaxTessEvaluationAtomicCounters",
+                   state->Const.MaxTessEvaluationAtomicCounters);
      }
   }

--- a/src/glsl/link_uniform_initializers.cpp
+++ b/src/glsl/link_uniform_initializers.cpp
@@ -326,9 +326,9 @@ link_set_uniform_initializers(struct gl_shader_program *prog,
            } else {
               assert(!"Explicit binding not on a sampler, UBO or atomic.");
            }
-         } else if (var->constant_value) {
+         } else if (var->constant_initializer) {
            linker::set_uniform_initializer(mem_ctx, prog, var->name,
-                                            var->type, var->constant_value,
+                                            var->type, var->constant_initializer,
                                            boolean_true);
         }
      }
--- a/src/glsl/opt_dead_code.cpp
+++ b/src/glsl/opt_dead_code.cpp
@@ -103,7 +103,7 @@ do_dead_code(exec_list *instructions, bool uniform_locations_assigned)
 	  */
         if (entry->var->data.mode == ir_var_uniform ||
             entry->var->data.mode == ir_var_shader_storage) {
-            if (uniform_locations_assigned || entry->var->constant_value)
+            if (uniform_locations_assigned || entry->var->constant_initializer)
               continue;

            /* Section 2.11.6 (Uniform Variables) of the OpenGL ES 3.0.3 spec
--- a/src/glx/Makefile.am
+++ b/src/glx/Makefile.am
@@ -46,6 +46,7 @@ AM_CFLAGS = \
 	$(EXTRA_DEFINES_XF86VIDMODE) \
 	-D_REENTRANT \
 	-DDEFAULT_DRIVER_DIR=\"$(DRI_DRIVER_SEARCH_DIR)\" \
+	-DGL_LIB_NAME=\"lib@GL_LIB@.so.1\" \
 	$(DEFINES) \
 	$(LIBDRM_CFLAGS) \
 	$(DRI2PROTO_CFLAGS) \
--- a/src/glx/dri_common.c
+++ b/src/glx/dri_common.c
@@ -73,6 +73,10 @@ dri_message(int level, const char *f, ...)
   }
 }

+#ifndef GL_LIB_NAME
+#define GL_LIB_NAME "libGL.so.1"
+#endif
+
 #ifndef DEFAULT_DRIVER_DIR
 /* this is normally defined in Mesa/configs/default with DRI_DRIVER_SEARCH_PATH */
 #define DEFAULT_DRIVER_DIR "/usr/local/lib/dri"
@@ -99,7 +103,7 @@ driOpenDriver(const char *driverName)
   int len;

   /* Attempt to make sure libGL symbols will be visible to the driver */
-   glhandle = dlopen("libGL.so.1", RTLD_NOW | RTLD_GLOBAL);
+   glhandle = dlopen(GL_LIB_NAME, RTLD_NOW | RTLD_GLOBAL);

   libPaths = NULL;
   if (geteuid() == getuid()) {
--- a/src/glx/glxcmds.c
+++ b/src/glx/glxcmds.c
@@ -2646,7 +2646,11 @@ _X_EXPORT void (*glXGetProcAddressARB(const GLubyte * procName)) (void)
 */
 _X_EXPORT void (*glXGetProcAddress(const GLubyte * procName)) (void)
 #if defined(__GNUC__) && !defined(GLX_ALIAS_UNSUPPORTED)
+# if defined(USE_MGL_NAMESPACE)
+   __attribute__ ((alias("mglXGetProcAddressARB")));
+# else
   __attribute__ ((alias("glXGetProcAddressARB")));
+# endif
 #else
 {
   return glXGetProcAddressARB(procName);
--- a/src/glx/glxextensions.h
+++ b/src/glx/glxextensions.h
@@ -281,11 +281,17 @@ typedef void (*PFNGLXDISABLEEXTENSIONPROC) (const char *name);
 # define GLX_ALIAS_VOID(real_func, proto_args, args, aliased_func)
 #else
 # if defined(__GNUC__) && !defined(GLX_ALIAS_UNSUPPORTED)
-#  define GLX_ALIAS(return_type, real_func, proto_args, args, aliased_func) \
+/* GLX_ALIAS and GLX_ALIAS_VOID both expand to the macro GLX_ALIAS2. Using the
+ * extra expansion means that the name mangling macros in glx_mangle.h will
+ * apply before stringification, so the alias attribute will have a string like
+ * "mglXFoo" instead of "glXFoo". */
+#  define GLX_ALIAS2(return_type, real_func, proto_args, args, aliased_func) \
   return_type  real_func  proto_args                                   \
   __attribute__ ((alias( # aliased_func ) ));
+#  define GLX_ALIAS(return_type, real_func, proto_args, args, aliased_func) \
+   GLX_ALIAS2(return_type, real_func, proto_args, args, aliased_func)
 #  define GLX_ALIAS_VOID(real_func, proto_args, args, aliased_func) \
-   GLX_ALIAS(void, real_func, proto_args, args, aliased_func)
+   GLX_ALIAS2(void, real_func, proto_args, args, aliased_func)
 # else
 #  define GLX_ALIAS(return_type, real_func, proto_args, args, aliased_func) \
   return_type  real_func  proto_args                                   \
--- a/src/mapi/mapi_glapi.c
+++ b/src/mapi/mapi_glapi.c
@@ -175,7 +175,7 @@ _glapi_get_stub(const char *name, int generate)
   const struct mapi_stub *stub;

 #ifdef USE_MGL_NAMESPACE
-   if (name)
+   if (name && name[0] == 'm')
      name++;
 #endif

--- a/src/mesa/Android.libmesa_dricore.mk
+++ b/src/mesa/Android.libmesa_dricore.mk
@@ -50,7 +50,7 @@ endif # MESA_ENABLE_ASM
 ifeq ($(ARCH_X86_HAVE_SSE4_1),true)
 LOCAL_SRC_FILES += \
 	main/streaming-load-memcpy.c \
-	mesa/main/sse_minmax.c
+	main/sse_minmax.c
 LOCAL_CFLAGS := \
 	-msse4.1 \
       -DUSE_SSE41
--- a/src/mesa/drivers/common/meta_blit.c
+++ b/src/mesa/drivers/common/meta_blit.c
@@ -71,9 +71,7 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx,
   char *sample_map_str = rzalloc_size(mem_ctx, 1);
   char *sample_map_expr = rzalloc_size(mem_ctx, 1);
   char *texel_fetch_macro = rzalloc_size(mem_ctx, 1);
-   const char *vs_source;
   const char *sampler_array_suffix = "";
-   const char *texcoord_type = "vec2";
   float y_scale;
   enum blit_msaa_shader shader_index;

@@ -99,7 +97,6 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx,
      shader_index += BLIT_2X_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_SCALED_RESOLVE -
                      BLIT_2X_MSAA_SHADER_2D_MULTISAMPLE_SCALED_RESOLVE;
      sampler_array_suffix = "Array";
-      texcoord_type = "vec3";
   }

   if (blit->msaa_shaders[shader_index]) {
@@ -150,28 +147,37 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx,
                          "   const int sample_map[%d] = int[%d](%s);\n",
                          samples, samples, sample_map_str);

-   ralloc_asprintf_append(&texel_fetch_macro,
-                          "#define TEXEL_FETCH(coord) texelFetch(texSampler, i%s(coord), %s);\n",
-                          texcoord_type, sample_number);
+   if (target == GL_TEXTURE_2D_MULTISAMPLE) {
+      ralloc_asprintf_append(&texel_fetch_macro,
+                             "#define TEXEL_FETCH(coord) texelFetch(texSampler, ivec2(coord), %s);\n",
+                             sample_number);
+   } else {
+      ralloc_asprintf_append(&texel_fetch_macro,
+                             "#define TEXEL_FETCH(coord) texelFetch(texSampler, ivec3(coord, layer), %s);\n",
+                             sample_number);
+   }

-   vs_source = ralloc_asprintf(mem_ctx,
+   static const char vs_source[] =
                               "#version 130\n"
                               "in vec2 position;\n"
-                               "in %s textureCoords;\n"
-                               "out %s texCoords;\n"
+                               "in vec3 textureCoords;\n"
+                               "out vec2 texCoords;\n"
+                               "flat out int layer;\n"
                               "void main()\n"
                               "{\n"
-                               "   texCoords = textureCoords;\n"
+                               "   texCoords = textureCoords.xy;\n"
+                               "   layer = int(textureCoords.z);\n"
                               "   gl_Position = vec4(position, 0.0, 1.0);\n"
-                               "}\n",
-                               texcoord_type,
-                               texcoord_type);
+                               "}\n"
+      ;
+
   fs_source = ralloc_asprintf(mem_ctx,
                               "#version 130\n"
                               "#extension GL_ARB_texture_multisample : enable\n"
                               "uniform sampler2DMS%s texSampler;\n"
                               "uniform float src_width, src_height;\n"
-                               "in %s texCoords;\n"
+                               "in vec2 texCoords;\n"
+                               "flat in int layer;\n"
                               "out vec4 out_color;\n"
                               "\n"
                               "void main()\n"
@@ -212,7 +218,6 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx,
                               "   out_color = mix(x_0_color, x_1_color, interp.y);\n"
                               "}\n",
                               sampler_array_suffix,
-                               texcoord_type,
                               sample_map_expr,
                               y_scale,
                               1.0f / y_scale,
--- a/src/mesa/drivers/dri/i915/i830_context.h
+++ b/src/mesa/drivers/dri/i915/i830_context.h
@@ -42,10 +42,10 @@
 #define I830_UPLOAD_STIPPLE          0x4
 #define I830_UPLOAD_INVARIENT        0x8
 #define I830_UPLOAD_RASTER_RULES     0x10
-#define I830_UPLOAD_TEX(i)           (0x10<<(i))
-#define I830_UPLOAD_TEXBLEND(i)      (0x100<<(i))
-#define I830_UPLOAD_TEX_ALL          (0x0f0)
-#define I830_UPLOAD_TEXBLEND_ALL     (0xf00)
+#define I830_UPLOAD_TEX(i)           (0x0100<<(i))
+#define I830_UPLOAD_TEXBLEND(i)      (0x1000<<(i))
+#define I830_UPLOAD_TEX_ALL          (0x0f00)
+#define I830_UPLOAD_TEXBLEND_ALL     (0xf000)

 /* State structure offsets - these will probably disappear.
 */
--- a/src/mesa/drivers/dri/i915/i915_context.h
+++ b/src/mesa/drivers/dri/i915/i915_context.h
@@ -115,6 +115,8 @@ enum {
   I915_RASTER_RULES_SETUP_SIZE,
 };

+#define I915_TEX_UNITS 8
+
 #define I915_MAX_CONSTANT      32
 #define I915_CONSTANT_SIZE     (2+(4*I915_MAX_CONSTANT))

@@ -194,7 +196,8 @@ struct i915_fragment_program

   /* Helpers for i915_fragprog.c:
    */
-   GLuint wpos_tex;
+   uint8_t texcoord_mapping[I915_TEX_UNITS];
+   uint8_t wpos_tex;
   bool depth_written;

   struct
@@ -205,15 +208,6 @@ struct i915_fragment_program
   GLuint nr_params;
 };

-
-
-
-
-
-
-#define I915_TEX_UNITS 8
-
-
 struct i915_hw_state
 {
   GLuint Ctx[I915_CTX_SETUP_SIZE];
--- a/src/mesa/drivers/dri/i915/i915_fragprog.c
+++ b/src/mesa/drivers/dri/i915/i915_fragprog.c
@@ -72,6 +72,22 @@ static const GLfloat cos_constants[4] = { 1.0,
   -1.0 / (6 * 5 * 4 * 3 * 2 * 1)
 };

+/* texcoord_mapping[unit] = index | TEXCOORD_{TEX,VAR} */
+#define TEXCOORD_TEX (0<<7)
+#define TEXCOORD_VAR (1<<7)
+
+static unsigned
+get_texcoord_mapping(struct i915_fragment_program *p, uint8_t texcoord)
+{
+   for (unsigned i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
+      if (p->texcoord_mapping[i] == texcoord)
+         return i;
+   }
+
+   /* blah */
+   return p->ctx->Const.MaxTextureCoordUnits - 1;
+}
+
 /**
 * Retrieve a ureg for the given source register.  Will emit
 * constants, apply swizzling and negation as needed.
@@ -82,6 +98,7 @@ src_vector(struct i915_fragment_program *p,
           const struct gl_fragment_program *program)
 {
   GLuint src;
+   unsigned unit;

   switch (source->File) {

@@ -119,8 +136,10 @@ src_vector(struct i915_fragment_program *p,
      case VARYING_SLOT_TEX5:
      case VARYING_SLOT_TEX6:
      case VARYING_SLOT_TEX7:
+         unit = get_texcoord_mapping(p, (source->Index -
+                                         VARYING_SLOT_TEX0) | TEXCOORD_TEX);
         src = i915_emit_decl(p, REG_TYPE_T,
-                              T_TEX0 + (source->Index - VARYING_SLOT_TEX0),
+                              T_TEX0 + unit,
                              D0_CHANNEL_ALL);
 	 break;

@@ -132,8 +151,10 @@ src_vector(struct i915_fragment_program *p,
      case VARYING_SLOT_VAR0 + 5:
      case VARYING_SLOT_VAR0 + 6:
      case VARYING_SLOT_VAR0 + 7:
+         unit = get_texcoord_mapping(p, (source->Index -
+                                         VARYING_SLOT_VAR0) | TEXCOORD_VAR);
         src = i915_emit_decl(p, REG_TYPE_T,
-                              T_TEX0 + (source->Index - VARYING_SLOT_VAR0),
+                              T_TEX0 + unit,
                              D0_CHANNEL_ALL);
         break;

@@ -1176,27 +1197,54 @@ fixup_depth_write(struct i915_fragment_program *p)
   }
 }

+static void
+check_texcoord_mapping(struct i915_fragment_program *p)
+{
+   GLbitfield64 inputs = p->FragProg.Base.InputsRead;
+   unsigned unit = 0;
+
+   for (unsigned i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
+      if (inputs & VARYING_BIT_TEX(i)) {
+         if (unit >= p->ctx->Const.MaxTextureCoordUnits) {
+            unit++;
+            break;
+         }
+         p->texcoord_mapping[unit++] = i | TEXCOORD_TEX;
+      }
+      if (inputs & VARYING_BIT_VAR(i)) {
+         if (unit >= p->ctx->Const.MaxTextureCoordUnits) {
+            unit++;
+            break;
+         }
+         p->texcoord_mapping[unit++] = i | TEXCOORD_VAR;
+      }
+   }
+
+   if (unit > p->ctx->Const.MaxTextureCoordUnits)
+      i915_program_error(p, "Too many texcoord units");
+}

 static void
 check_wpos(struct i915_fragment_program *p)
 {
   GLbitfield64 inputs = p->FragProg.Base.InputsRead;
   GLint i;
+   unsigned unit = 0;

   p->wpos_tex = -1;

+   if ((inputs & VARYING_BIT_POS) == 0)
+      return;
+
   for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
-      if (inputs & (VARYING_BIT_TEX(i) | VARYING_BIT_VAR(i)))
-         continue;
-      else if (inputs & VARYING_BIT_POS) {
-         p->wpos_tex = i;
-         inputs &= ~VARYING_BIT_POS;
-      }
+      unit += !!(inputs & VARYING_BIT_TEX(i));
+      unit += !!(inputs & VARYING_BIT_VAR(i));
   }

-   if (inputs & VARYING_BIT_POS) {
+   if (unit < p->ctx->Const.MaxTextureCoordUnits)
+      p->wpos_tex = unit;
+   else
      i915_program_error(p, "No free texcoord for wpos value");
-   }
 }


@@ -1212,6 +1260,7 @@ translate_program(struct i915_fragment_program *p)
   }

   i915_init_program(i915, p);
+   check_texcoord_mapping(p);
   check_wpos(p);
   upload_program(p);
   fixup_depth_write(p);
@@ -1420,22 +1469,24 @@ i915ValidateFragmentProgram(struct i915_context *i915)

   for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
      if (inputsRead & VARYING_BIT_TEX(i)) {
+         int unit = get_texcoord_mapping(p, i | TEXCOORD_TEX);
         int sz = VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]->size;

-         s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
-         s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz));
+         s2 &= ~S2_TEXCOORD_FMT(unit, S2_TEXCOORD_FMT0_MASK);
+         s2 |= S2_TEXCOORD_FMT(unit, SZ_TO_HW(sz));

         EMIT_ATTR(_TNL_ATTRIB_TEX0 + i, EMIT_SZ(sz), 0, sz * 4);
      }
-      else if (inputsRead & VARYING_BIT_VAR(i)) {
+      if (inputsRead & VARYING_BIT_VAR(i)) {
+         int unit = get_texcoord_mapping(p, i | TEXCOORD_VAR);
         int sz = VB->AttribPtr[_TNL_ATTRIB_GENERIC0 + i]->size;

-         s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
-         s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz));
+         s2 &= ~S2_TEXCOORD_FMT(unit, S2_TEXCOORD_FMT0_MASK);
+         s2 |= S2_TEXCOORD_FMT(unit, SZ_TO_HW(sz));

         EMIT_ATTR(_TNL_ATTRIB_GENERIC0 + i, EMIT_SZ(sz), 0, sz * 4);
      }
-      else if (i == p->wpos_tex) {
+      if (i == p->wpos_tex) {
 	 int wpos_size = 4 * sizeof(float);
         /* If WPOS is required, duplicate the XYZ position data in an
          * unused texture coordinate:
--- a/src/mesa/drivers/dri/i915/intel_fbo.c
+++ b/src/mesa/drivers/dri/i915/intel_fbo.c
@@ -658,6 +658,11 @@ intel_blit_framebuffer_with_blitter(struct gl_context *ctx,
 {
   struct intel_context *intel = intel_context(ctx);

+   /* Sync up the state of window system buffers.  We need to do this before
+    * we go looking for the buffers.
+    */
+   intel_prepare_render(intel);
+
   if (mask & GL_COLOR_BUFFER_BIT) {
      GLint i;
      struct gl_renderbuffer *src_rb = readFb->_ColorReadBuffer;
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -1412,7 +1412,6 @@ intel_process_dri2_buffer(struct brw_context *brw,
              buffer->cpp, buffer->pitch);
   }

-   intel_miptree_release(&rb->mt);
   bo = drm_intel_bo_gem_create_from_name(brw->bufmgr, buffer_name,
                                          buffer->name);
   if (!bo) {
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1556,7 +1556,10 @@ fs_visitor::assign_vs_urb_setup()

            inst->src[i].file = HW_REG;
            inst->src[i].fixed_hw_reg =
-               retype(brw_vec8_grf(grf, 0), inst->src[i].type);
+               stride(byte_offset(retype(brw_vec8_grf(grf, 0), inst->src[i].type),
+                                  inst->src[i].subreg_offset),
+                      inst->exec_size * inst->src[i].stride,
+                      inst->exec_size, inst->src[i].stride);
         }
      }
   }
--- a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
@@ -312,13 +312,43 @@ namespace {
   }

   namespace image_validity {
+      /**
+       * Check whether the bound image is suitable for untyped access.
+       */
+      brw_predicate
+      emit_untyped_image_check(const fs_builder &bld, const fs_reg &image,
+                               brw_predicate pred)
+      {
+         const brw_device_info *devinfo = bld.shader->devinfo;
+         const fs_reg stride = offset(image, bld, BRW_IMAGE_PARAM_STRIDE_OFFSET);
+
+         if (devinfo->gen == 7 && !devinfo->is_haswell) {
+            /* Check whether the first stride component (i.e. the Bpp value)
+             * is greater than four, what on Gen7 indicates that a surface of
+             * type RAW has been bound for untyped access.  Reading or writing
+             * to a surface of type other than RAW using untyped surface
+             * messages causes a hang on IVB and VLV.
+             */
+            set_predicate(pred,
+                          bld.CMP(bld.null_reg_ud(), stride, fs_reg(4),
+                                  BRW_CONDITIONAL_G));
+
+            return BRW_PREDICATE_NORMAL;
+         } else {
+            /* More recent generations handle the format mismatch
+             * gracefully.
+             */
+            return pred;
+         }
+      }
+
      /**
       * Check whether there is an image bound at the given index and write
       * the comparison result to f0.0.  Returns an appropriate predication
       * mode to use on subsequent image operations.
       */
      brw_predicate
-      emit_surface_check(const fs_builder &bld, const fs_reg &image)
+      emit_typed_atomic_check(const fs_builder &bld, const fs_reg &image)
      {
         const brw_device_info *devinfo = bld.shader->devinfo;
         const fs_reg size = offset(image, bld, BRW_IMAGE_PARAM_SIZE_OFFSET);
@@ -895,7 +925,9 @@ namespace brw {
             * surface read on the result,
             */
            const brw_predicate pred =
-               emit_bounds_check(bld, image, saddr, dims);
+               emit_untyped_image_check(bld, image,
+                                        emit_bounds_check(bld, image,
+                                                          saddr, dims));

            /* and they don't know about surface coordinates, we need to
             * convert them to a raw memory offset.
@@ -1041,7 +1073,9 @@ namespace brw {
                * the surface write on the result,
                */
               const brw_predicate pred =
-                  emit_bounds_check(bld, image, saddr, dims);
+                  emit_untyped_image_check(bld, image,
+                                           emit_bounds_check(bld, image,
+                                                             saddr, dims));

               /* and, phew, they don't know about surface coordinates, we
                * need to convert them to a raw memory offset.
@@ -1072,7 +1106,7 @@ namespace brw {
         using namespace image_coordinates;
         using namespace surface_access;
         /* Avoid performing an atomic operation on an unbound surface. */
-         const brw_predicate pred = emit_surface_check(bld, image);
+         const brw_predicate pred = emit_typed_atomic_check(bld, image);

         /* Transform the image coordinates into actual surface coordinates. */
         const fs_reg saddr =
--- a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c
@@ -129,7 +129,7 @@ brw_upload_gs_image_surfaces(struct brw_context *brw)
      ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY];

   if (prog) {
-      /* BRW_NEW_GS_PROG_DATA, BRW_NEW_IMAGE_UNITS */
+      /* BRW_NEW_GS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
      brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_GEOMETRY],
                                &brw->gs.base, &brw->gs.prog_data->base.base);
   }
@@ -137,6 +137,7 @@ brw_upload_gs_image_surfaces(struct brw_context *brw)

 const struct brw_tracked_state brw_gs_image_surfaces = {
   .dirty = {
+      .mesa = _NEW_TEXTURE,
      .brw = BRW_NEW_BATCH |
             BRW_NEW_GEOMETRY_PROGRAM |
             BRW_NEW_GS_PROG_DATA |
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -61,6 +61,8 @@ src_reg::src_reg(register_file file, int reg, const glsl_type *type)
      this->swizzle = brw_swizzle_for_size(type->vector_elements);
   else
      this->swizzle = BRW_SWIZZLE_XYZW;
+   if (type)
+      this->type = brw_type_for_base_type(type);
 }

 /** Generic unset register constructor. */
@@ -1105,11 +1107,13 @@ vec4_visitor::opt_register_coalesce()
 	 if (interfered)
 	    break;

-         /* If somebody else writes our destination here, we can't coalesce
-          * before that.
+         /* If somebody else writes the same channels of our destination here,
+          * we can't coalesce before that.
          */
-         if (inst->dst.in_range(scan_inst->dst, scan_inst->regs_written))
-	    break;
+         if (inst->dst.in_range(scan_inst->dst, scan_inst->regs_written) &&
+             (inst->dst.writemask & scan_inst->dst.writemask) != 0) {
+            break;
+         }

         /* Check for reads of the register we're trying to coalesce into.  We
          * can't go rewriting instructions above that to put some other value
--- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
@@ -201,7 +201,7 @@ brw_upload_vs_image_surfaces(struct brw_context *brw)
      ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX];

   if (prog) {
-      /* BRW_NEW_VS_PROG_DATA, BRW_NEW_IMAGE_UNITS */
+      /* BRW_NEW_VS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
      brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_VERTEX],
                                &brw->vs.base, &brw->vs.prog_data->base.base);
   }
@@ -209,6 +209,7 @@ brw_upload_vs_image_surfaces(struct brw_context *brw)

 const struct brw_tracked_state brw_vs_image_surfaces = {
   .dirty = {
+      .mesa = _NEW_TEXTURE,
      .brw = BRW_NEW_BATCH |
             BRW_NEW_IMAGE_UNITS |
             BRW_NEW_VERTEX_PROGRAM |
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -34,6 +34,7 @@
 #include "main/blend.h"
 #include "main/mtypes.h"
 #include "main/samplerobj.h"
+#include "main/shaderimage.h"
 #include "program/prog_parameter.h"
 #include "main/framebuffer.h"

@@ -1033,7 +1034,7 @@ brw_upload_cs_image_surfaces(struct brw_context *brw)
      ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];

   if (prog) {
-      /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS */
+      /* BRW_NEW_CS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
      brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
                                &brw->cs.base, &brw->cs.prog_data->base);
   }
@@ -1041,7 +1042,7 @@ brw_upload_cs_image_surfaces(struct brw_context *brw)

 const struct brw_tracked_state brw_cs_image_surfaces = {
   .dirty = {
-      .mesa = _NEW_PROGRAM,
+      .mesa = _NEW_TEXTURE | _NEW_PROGRAM,
      .brw = BRW_NEW_BATCH |
             BRW_NEW_CS_PROG_DATA |
             BRW_NEW_IMAGE_UNITS
@@ -1174,7 +1175,7 @@ update_image_surface(struct brw_context *brw,
                     uint32_t *surf_offset,
                     struct brw_image_param *param)
 {
-   if (u->_Valid) {
+   if (_mesa_is_image_unit_valid(&brw->ctx, u)) {
      struct gl_texture_object *obj = u->TexObj;
      const unsigned format = get_image_format(brw, u->_ActualFormat, access);

@@ -1259,7 +1260,7 @@ brw_upload_wm_image_surfaces(struct brw_context *brw)
   struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;

   if (prog) {
-      /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS */
+      /* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
      brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
                                &brw->wm.base, &brw->wm.prog_data->base);
   }
@@ -1267,6 +1268,7 @@ brw_upload_wm_image_surfaces(struct brw_context *brw)

 const struct brw_tracked_state brw_wm_image_surfaces = {
   .dirty = {
+      .mesa = _NEW_TEXTURE,
      .brw = BRW_NEW_BATCH |
             BRW_NEW_FRAGMENT_PROGRAM |
             BRW_NEW_FS_PROG_DATA |
--- a/src/mesa/drivers/dri/i965/gen7_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_gs_state.c
@@ -59,9 +59,7 @@ upload_gs_state(struct brw_context *brw)
      OUT_BATCH(((ALIGN(stage_state->sampler_count, 4)/4) <<
                 GEN6_GS_SAMPLER_COUNT_SHIFT) |
                ((brw->gs.prog_data->base.base.binding_table.size_bytes / 4) <<
-                 GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
-                (brw->is_haswell && prog_data->base.nr_image_params ?
-                 HSW_GS_UAV_ACCESS_ENABLE : 0));
+                 GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));

      if (brw->gs.prog_data->base.base.total_scratch) {
         OUT_RELOC(stage_state->scratch_bo,
--- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
@@ -126,9 +126,7 @@ upload_vs_state(struct brw_context *brw)
 	     ((ALIGN(stage_state->sampler_count, 4)/4) <<
              GEN6_VS_SAMPLER_COUNT_SHIFT) |
             ((brw->vs.prog_data->base.base.binding_table.size_bytes / 4) <<
-              GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
-             (brw->is_haswell && prog_data->base.nr_image_params ?
-              HSW_VS_UAV_ACCESS_ENABLE : 0));
+              GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));

   if (prog_data->base.total_scratch) {
      OUT_RELOC(stage_state->scratch_bo,
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@@ -113,7 +113,14 @@ upload_wm_state(struct brw_context *brw)
   else if (prog_data->base.nr_image_params)
      dw1 |= GEN7_WM_EARLY_DS_CONTROL_PSEXEC;

-   /* _NEW_BUFFERS | _NEW_COLOR */
+   /* The "UAV access enable" bits are unnecessary on HSW because they only
+    * seem to have an effect on the HW-assisted coherency mechanism which we
+    * don't need, and the rasterization-related UAV_ONLY flag and the
+    * DISPATCH_ENABLE bit can be set independently from it.
+    * C.f. gen8_upload_ps_extra().
+    *
+    * BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_FS_PROG_DATA | _NEW_BUFFERS | _NEW_COLOR
+    */
   if (brw->is_haswell &&
       !(brw_color_buffer_write_enabled(brw) || writes_depth) &&
       prog_data->base.nr_image_params)
@@ -221,9 +228,6 @@ gen7_upload_ps_state(struct brw_context *brw,
      _mesa_get_min_invocations_per_fragment(ctx, fp, false);
   assert(min_inv_per_frag >= 1);

-   if (brw->is_haswell && prog_data->base.nr_image_params)
-      dw4 |= HSW_PS_UAV_ACCESS_ENABLE;
-
   if (prog_data->prog_offset_16 || prog_data->no_8) {
      dw4 |= GEN7_PS_16_DISPATCH_ENABLE;
      if (!prog_data->no_8 && min_inv_per_frag == 1) {
--- a/src/mesa/drivers/dri/i965/gen8_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_gs_state.c
@@ -52,9 +52,7 @@ gen8_upload_gs_state(struct brw_context *brw)
                ((ALIGN(stage_state->sampler_count, 4)/4) <<
                 GEN6_GS_SAMPLER_COUNT_SHIFT) |
                ((prog_data->base.binding_table.size_bytes / 4) <<
-                 GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
-                (prog_data->base.nr_image_params ?
-                 HSW_GS_UAV_ACCESS_ENABLE : 0));
+                 GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));

      if (brw->gs.prog_data->base.base.total_scratch) {
         OUT_RELOC64(stage_state->scratch_bo,
--- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c
@@ -25,6 +25,7 @@
 #include "program/program.h"
 #include "brw_state.h"
 #include "brw_defines.h"
+#include "brw_wm.h"
 #include "intel_batchbuffer.h"

 void
@@ -61,8 +62,33 @@ gen8_upload_ps_extra(struct brw_context *brw,
   if (brw->gen >= 9 && prog_data->pulls_bary)
      dw1 |= GEN9_PSX_SHADER_PULLS_BARY;

-   if (_mesa_active_fragment_shader_has_atomic_ops(&brw->ctx) ||
-       prog_data->base.nr_image_params)
+   /* The stricter cross-primitive coherency guarantees that the hardware
+    * gives us with the "Accesses UAV" bit set for at least one shader stage
+    * and the "UAV coherency required" bit set on the 3DPRIMITIVE command are
+    * redundant within the current image, atomic counter and SSBO GL APIs,
+    * which all have very loose ordering and coherency requirements and
+    * generally rely on the application to insert explicit barriers when a
+    * shader invocation is expected to see the memory writes performed by the
+    * invocations of some previous primitive.  Regardless of the value of "UAV
+    * coherency required", the "Accesses UAV" bits will implicitly cause an in
+    * most cases useless DC flush when the lowermost stage with the bit set
+    * finishes execution.
+    *
+    * It would be nice to disable it, but in some cases we can't because on
+    * Gen8+ it also has an influence on rasterization via the PS UAV-only
+    * signal (which could be set independently from the coherency mechanism in
+    * the 3DSTATE_WM command on Gen7), and because in some cases it will
+    * determine whether the hardware skips execution of the fragment shader or
+    * not via the ThreadDispatchEnable signal.  However if we know that
+    * GEN8_PS_BLEND_HAS_WRITEABLE_RT is going to be set and
+    * GEN8_PSX_PIXEL_SHADER_NO_RT_WRITE is not set it shouldn't make any
+    * difference so we may just disable it here.
+    *
+    * BRW_NEW_FS_PROG_DATA | BRW_NEW_FRAGMENT_PROGRAM | _NEW_BUFFERS | _NEW_COLOR
+    */
+   if ((_mesa_active_fragment_shader_has_atomic_ops(&brw->ctx) ||
+        prog_data->base.nr_image_params) &&
+       !brw_color_buffer_write_enabled(brw))
      dw1 |= GEN8_PSX_SHADER_HAS_UAV;

   BEGIN_BATCH(2);
@@ -87,7 +113,7 @@ upload_ps_extra(struct brw_context *brw)

 const struct brw_tracked_state gen8_ps_extra = {
   .dirty = {
-      .mesa  = 0,
+      .mesa  = _NEW_BUFFERS | _NEW_COLOR,
      .brw   = BRW_NEW_CONTEXT |
               BRW_NEW_FRAGMENT_PROGRAM |
               BRW_NEW_FS_PROG_DATA |
--- a/src/mesa/drivers/dri/i965/gen8_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_vs_state.c
@@ -53,9 +53,7 @@ upload_vs_state(struct brw_context *brw)
             ((ALIGN(stage_state->sampler_count, 4) / 4) <<
               GEN6_VS_SAMPLER_COUNT_SHIFT) |
             ((prog_data->base.binding_table.size_bytes / 4) <<
-               GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
-             (prog_data->base.nr_image_params ?
-              HSW_VS_UAV_ACCESS_ENABLE : 0));
+               GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));

   if (prog_data->base.total_scratch) {
      OUT_RELOC64(stage_state->scratch_bo,
--- a/src/mesa/main/dlist.c
+++ b/src/mesa/main/dlist.c
@@ -1401,7 +1401,7 @@ save_BlendFunci(GLuint buf, GLenum sfactor, GLenum dfactor)
   GET_CURRENT_CONTEXT(ctx);
   Node *n;
   ASSERT_OUTSIDE_SAVE_BEGIN_END_AND_FLUSH(ctx);
-   n = alloc_instruction(ctx, OPCODE_BLEND_FUNC_SEPARATE_I, 3);
+   n = alloc_instruction(ctx, OPCODE_BLEND_FUNC_I, 3);
   if (n) {
      n[1].ui = buf;
      n[2].e = sfactor;
--- a/src/mesa/main/ff_fragment_shader.cpp
+++ b/src/mesa/main/ff_fragment_shader.cpp
@@ -976,13 +976,11 @@ static void load_texture( texenv_fragment_program *p, GLuint unit )
 						      ir_var_uniform);
   p->top_instructions->push_head(sampler);

-   /* Set the texture unit for this sampler.  The linker will pick this value
-    * up and do-the-right-thing.
-    *
-    * NOTE: The cast to int is important.  Without it, the constant will have
-    * type uint, and things later on may get confused.
+   /* Set the texture unit for this sampler in the same way that
+    * layout(binding=X) would.
    */
-   sampler->constant_value = new(p->mem_ctx) ir_constant(int(unit));
+   sampler->data.explicit_binding = true;
+   sampler->data.binding = unit;

   deref = new(p->mem_ctx) ir_dereference_variable(sampler);
   tex->set_sampler(deref, glsl_type::vec4_type);
--- a/src/mesa/main/ffvertex_prog.c
+++ b/src/mesa/main/ffvertex_prog.c
@@ -293,9 +293,10 @@ struct ureg {
   GLuint file:4;
   GLint idx:9;      /* relative addressing may be negative */
                     /* sizeof(idx) should == sizeof(prog_src_reg::Index) */
+   GLuint abs:1;
   GLuint negate:1;
   GLuint swz:12;
-   GLuint pad:6;
+   GLuint pad:5;
 };


@@ -324,6 +325,7 @@ static const struct ureg undef = {
   0,
   0,
   0,
+   0,
   0
 };

@@ -342,6 +344,7 @@ static struct ureg make_ureg(GLuint file, GLint idx)
   struct ureg reg;
   reg.file = file;
   reg.idx = idx;
+   reg.abs = 0;
   reg.negate = 0;
   reg.swz = SWIZZLE_NOOP;
   reg.pad = 0;
@@ -350,6 +353,14 @@ static struct ureg make_ureg(GLuint file, GLint idx)



+static struct ureg absolute( struct ureg reg )
+{
+   reg.abs = 1;
+   reg.negate = 0;
+   return reg;
+}
+
+
 static struct ureg negate( struct ureg reg )
 {
   reg.negate ^= 1;
@@ -526,8 +537,8 @@ static void emit_arg( struct prog_src_register *src,
   src->File = reg.file;
   src->Index = reg.idx;
   src->Swizzle = reg.swz;
+   src->Abs = reg.abs;
   src->Negate = reg.negate ? NEGATE_XYZW : NEGATE_NONE;
-   src->Abs = 0;
   src->RelAddr = 0;
   /* Check that bitfield sizes aren't exceeded */
   assert(src->Index == reg.idx);
@@ -953,7 +964,7 @@ static struct ureg calculate_light_attenuation( struct tnl_program *p,

      emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot_dir_norm);
      emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot);
-      emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W));
+      emit_op2(p, OPCODE_POW, spot, 0, absolute(spot), swizzle1(attenuation, W));
      emit_op2(p, OPCODE_MUL, att, 0, slt, spot);

      release_temp(p, spot);
--- a/src/mesa/main/glformats.c
+++ b/src/mesa/main/glformats.c
@@ -2080,6 +2080,633 @@ _mesa_es_error_check_format_and_type(GLenum format, GLenum type,
   return type_valid ? GL_NO_ERROR : GL_INVALID_OPERATION;
 }

+/**
+ * Return the simple base format for a given internal texture format.
+ * For example, given GL_LUMINANCE12_ALPHA4, return GL_LUMINANCE_ALPHA.
+ *
+ * \param ctx GL context.
+ * \param internalFormat the internal texture format token or 1, 2, 3, or 4.
+ *
+ * \return the corresponding \u base internal format (GL_ALPHA, GL_LUMINANCE,
+ * GL_LUMANCE_ALPHA, GL_INTENSITY, GL_RGB, or GL_RGBA), or -1 if invalid enum.
+ *
+ * This is the format which is used during texture application (i.e. the
+ * texture format and env mode determine the arithmetic used.
+ */
+GLint
+_mesa_base_tex_format(const struct gl_context *ctx, GLint internalFormat)
+{
+   switch (internalFormat) {
+   case GL_ALPHA:
+   case GL_ALPHA4:
+   case GL_ALPHA8:
+   case GL_ALPHA12:
+   case GL_ALPHA16:
+      return (ctx->API != API_OPENGL_CORE) ? GL_ALPHA : -1;
+   case 1:
+   case GL_LUMINANCE:
+   case GL_LUMINANCE4:
+   case GL_LUMINANCE8:
+   case GL_LUMINANCE12:
+   case GL_LUMINANCE16:
+      return (ctx->API != API_OPENGL_CORE) ? GL_LUMINANCE : -1;
+   case 2:
+   case GL_LUMINANCE_ALPHA:
+   case GL_LUMINANCE4_ALPHA4:
+   case GL_LUMINANCE6_ALPHA2:
+   case GL_LUMINANCE8_ALPHA8:
+   case GL_LUMINANCE12_ALPHA4:
+   case GL_LUMINANCE12_ALPHA12:
+   case GL_LUMINANCE16_ALPHA16:
+      return (ctx->API != API_OPENGL_CORE) ? GL_LUMINANCE_ALPHA : -1;
+   case GL_INTENSITY:
+   case GL_INTENSITY4:
+   case GL_INTENSITY8:
+   case GL_INTENSITY12:
+   case GL_INTENSITY16:
+      return (ctx->API != API_OPENGL_CORE) ? GL_INTENSITY : -1;
+   case 3:
+      return (ctx->API != API_OPENGL_CORE) ? GL_RGB : -1;
+   case GL_RGB:
+   case GL_R3_G3_B2:
+   case GL_RGB4:
+   case GL_RGB5:
+   case GL_RGB8:
+   case GL_RGB10:
+   case GL_RGB12:
+   case GL_RGB16:
+      return GL_RGB;
+   case 4:
+      return (ctx->API != API_OPENGL_CORE) ? GL_RGBA : -1;
+   case GL_RGBA:
+   case GL_RGBA2:
+   case GL_RGBA4:
+   case GL_RGB5_A1:
+   case GL_RGBA8:
+   case GL_RGB10_A2:
+   case GL_RGBA12:
+   case GL_RGBA16:
+      return GL_RGBA;
+   default:
+      ; /* fallthrough */
+   }
+
+   /* GL_BGRA can be an internal format *only* in OpenGL ES (1.x or 2.0).
+    */
+   if (_mesa_is_gles(ctx)) {
+      switch (internalFormat) {
+      case GL_BGRA:
+         return GL_RGBA;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.ARB_ES2_compatibility) {
+      switch (internalFormat) {
+      case GL_RGB565:
+         return GL_RGB;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.ARB_depth_texture) {
+      switch (internalFormat) {
+      case GL_DEPTH_COMPONENT:
+      case GL_DEPTH_COMPONENT16:
+      case GL_DEPTH_COMPONENT24:
+      case GL_DEPTH_COMPONENT32:
+         return GL_DEPTH_COMPONENT;
+      case GL_DEPTH_STENCIL:
+      case GL_DEPTH24_STENCIL8:
+         return GL_DEPTH_STENCIL;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.ARB_texture_stencil8) {
+      switch (internalFormat) {
+      case GL_STENCIL_INDEX:
+      case GL_STENCIL_INDEX1:
+      case GL_STENCIL_INDEX4:
+      case GL_STENCIL_INDEX8:
+      case GL_STENCIL_INDEX16:
+         return GL_STENCIL_INDEX;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   switch (internalFormat) {
+   case GL_COMPRESSED_ALPHA:
+      return GL_ALPHA;
+   case GL_COMPRESSED_LUMINANCE:
+      return GL_LUMINANCE;
+   case GL_COMPRESSED_LUMINANCE_ALPHA:
+      return GL_LUMINANCE_ALPHA;
+   case GL_COMPRESSED_INTENSITY:
+      return GL_INTENSITY;
+   case GL_COMPRESSED_RGB:
+      return GL_RGB;
+   case GL_COMPRESSED_RGBA:
+      return GL_RGBA;
+   default:
+      ; /* fallthrough */
+   }
+
+   if (ctx->Extensions.TDFX_texture_compression_FXT1) {
+      switch (internalFormat) {
+      case GL_COMPRESSED_RGB_FXT1_3DFX:
+         return GL_RGB;
+      case GL_COMPRESSED_RGBA_FXT1_3DFX:
+         return GL_RGBA;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   /* Assume that the ANGLE flag will always be set if the EXT flag is set.
+    */
+   if (ctx->Extensions.ANGLE_texture_compression_dxt) {
+      switch (internalFormat) {
+      case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+         return GL_RGB;
+      case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+      case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+      case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+         return GL_RGBA;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (_mesa_is_desktop_gl(ctx)
+       && ctx->Extensions.ANGLE_texture_compression_dxt) {
+      switch (internalFormat) {
+      case GL_RGB_S3TC:
+      case GL_RGB4_S3TC:
+         return GL_RGB;
+      case GL_RGBA_S3TC:
+      case GL_RGBA4_S3TC:
+         return GL_RGBA;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.MESA_ycbcr_texture) {
+      if (internalFormat == GL_YCBCR_MESA)
+         return GL_YCBCR_MESA;
+   }
+
+   if (ctx->Extensions.ARB_texture_float) {
+      switch (internalFormat) {
+      case GL_ALPHA16F_ARB:
+      case GL_ALPHA32F_ARB:
+         return GL_ALPHA;
+      case GL_RGBA16F_ARB:
+      case GL_RGBA32F_ARB:
+         return GL_RGBA;
+      case GL_RGB16F_ARB:
+      case GL_RGB32F_ARB:
+         return GL_RGB;
+      case GL_INTENSITY16F_ARB:
+      case GL_INTENSITY32F_ARB:
+         return GL_INTENSITY;
+      case GL_LUMINANCE16F_ARB:
+      case GL_LUMINANCE32F_ARB:
+         return GL_LUMINANCE;
+      case GL_LUMINANCE_ALPHA16F_ARB:
+      case GL_LUMINANCE_ALPHA32F_ARB:
+         return GL_LUMINANCE_ALPHA;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.EXT_texture_snorm) {
+      switch (internalFormat) {
+      case GL_RED_SNORM:
+      case GL_R8_SNORM:
+      case GL_R16_SNORM:
+         return GL_RED;
+      case GL_RG_SNORM:
+      case GL_RG8_SNORM:
+      case GL_RG16_SNORM:
+         return GL_RG;
+      case GL_RGB_SNORM:
+      case GL_RGB8_SNORM:
+      case GL_RGB16_SNORM:
+         return GL_RGB;
+      case GL_RGBA_SNORM:
+      case GL_RGBA8_SNORM:
+      case GL_RGBA16_SNORM:
+         return GL_RGBA;
+      case GL_ALPHA_SNORM:
+      case GL_ALPHA8_SNORM:
+      case GL_ALPHA16_SNORM:
+         return GL_ALPHA;
+      case GL_LUMINANCE_SNORM:
+      case GL_LUMINANCE8_SNORM:
+      case GL_LUMINANCE16_SNORM:
+         return GL_LUMINANCE;
+      case GL_LUMINANCE_ALPHA_SNORM:
+      case GL_LUMINANCE8_ALPHA8_SNORM:
+      case GL_LUMINANCE16_ALPHA16_SNORM:
+         return GL_LUMINANCE_ALPHA;
+      case GL_INTENSITY_SNORM:
+      case GL_INTENSITY8_SNORM:
+      case GL_INTENSITY16_SNORM:
+         return GL_INTENSITY;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.EXT_texture_sRGB) {
+      switch (internalFormat) {
+      case GL_SRGB_EXT:
+      case GL_SRGB8_EXT:
+      case GL_COMPRESSED_SRGB_EXT:
+         return GL_RGB;
+      case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT:
+         return ctx->Extensions.EXT_texture_compression_s3tc ? GL_RGB : -1;
+      case GL_SRGB_ALPHA_EXT:
+      case GL_SRGB8_ALPHA8_EXT:
+      case GL_COMPRESSED_SRGB_ALPHA_EXT:
+         return GL_RGBA;
+      case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT:
+      case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT:
+      case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT:
+         return ctx->Extensions.EXT_texture_compression_s3tc ? GL_RGBA : -1;
+      case GL_SLUMINANCE_ALPHA_EXT:
+      case GL_SLUMINANCE8_ALPHA8_EXT:
+      case GL_COMPRESSED_SLUMINANCE_ALPHA_EXT:
+         return GL_LUMINANCE_ALPHA;
+      case GL_SLUMINANCE_EXT:
+      case GL_SLUMINANCE8_EXT:
+      case GL_COMPRESSED_SLUMINANCE_EXT:
+         return GL_LUMINANCE;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Version >= 30 ||
+       ctx->Extensions.EXT_texture_integer) {
+      switch (internalFormat) {
+      case GL_RGBA8UI_EXT:
+      case GL_RGBA16UI_EXT:
+      case GL_RGBA32UI_EXT:
+      case GL_RGBA8I_EXT:
+      case GL_RGBA16I_EXT:
+      case GL_RGBA32I_EXT:
+      case GL_RGB10_A2UI:
+         return GL_RGBA;
+      case GL_RGB8UI_EXT:
+      case GL_RGB16UI_EXT:
+      case GL_RGB32UI_EXT:
+      case GL_RGB8I_EXT:
+      case GL_RGB16I_EXT:
+      case GL_RGB32I_EXT:
+         return GL_RGB;
+      }
+   }
+
+   if (ctx->Extensions.EXT_texture_integer) {
+      switch (internalFormat) {
+      case GL_ALPHA8UI_EXT:
+      case GL_ALPHA16UI_EXT:
+      case GL_ALPHA32UI_EXT:
+      case GL_ALPHA8I_EXT:
+      case GL_ALPHA16I_EXT:
+      case GL_ALPHA32I_EXT:
+         return GL_ALPHA;
+      case GL_INTENSITY8UI_EXT:
+      case GL_INTENSITY16UI_EXT:
+      case GL_INTENSITY32UI_EXT:
+      case GL_INTENSITY8I_EXT:
+      case GL_INTENSITY16I_EXT:
+      case GL_INTENSITY32I_EXT:
+         return GL_INTENSITY;
+      case GL_LUMINANCE8UI_EXT:
+      case GL_LUMINANCE16UI_EXT:
+      case GL_LUMINANCE32UI_EXT:
+      case GL_LUMINANCE8I_EXT:
+      case GL_LUMINANCE16I_EXT:
+      case GL_LUMINANCE32I_EXT:
+         return GL_LUMINANCE;
+      case GL_LUMINANCE_ALPHA8UI_EXT:
+      case GL_LUMINANCE_ALPHA16UI_EXT:
+      case GL_LUMINANCE_ALPHA32UI_EXT:
+      case GL_LUMINANCE_ALPHA8I_EXT:
+      case GL_LUMINANCE_ALPHA16I_EXT:
+      case GL_LUMINANCE_ALPHA32I_EXT:
+         return GL_LUMINANCE_ALPHA;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.ARB_texture_rg) {
+      switch (internalFormat) {
+      case GL_R16F:
+      case GL_R32F:
+	 if (!ctx->Extensions.ARB_texture_float)
+	    break;
+         return GL_RED;
+      case GL_R8I:
+      case GL_R8UI:
+      case GL_R16I:
+      case GL_R16UI:
+      case GL_R32I:
+      case GL_R32UI:
+	 if (ctx->Version < 30 && !ctx->Extensions.EXT_texture_integer)
+	    break;
+	 /* FALLTHROUGH */
+      case GL_R8:
+      case GL_R16:
+      case GL_RED:
+      case GL_COMPRESSED_RED:
+         return GL_RED;
+
+      case GL_RG16F:
+      case GL_RG32F:
+	 if (!ctx->Extensions.ARB_texture_float)
+	    break;
+         return GL_RG;
+      case GL_RG8I:
+      case GL_RG8UI:
+      case GL_RG16I:
+      case GL_RG16UI:
+      case GL_RG32I:
+      case GL_RG32UI:
+	 if (ctx->Version < 30 && !ctx->Extensions.EXT_texture_integer)
+	    break;
+	 /* FALLTHROUGH */
+      case GL_RG:
+      case GL_RG8:
+      case GL_RG16:
+      case GL_COMPRESSED_RG:
+         return GL_RG;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.EXT_texture_shared_exponent) {
+      switch (internalFormat) {
+      case GL_RGB9_E5_EXT:
+         return GL_RGB;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.EXT_packed_float) {
+      switch (internalFormat) {
+      case GL_R11F_G11F_B10F_EXT:
+         return GL_RGB;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.ARB_depth_buffer_float) {
+      switch (internalFormat) {
+      case GL_DEPTH_COMPONENT32F:
+         return GL_DEPTH_COMPONENT;
+      case GL_DEPTH32F_STENCIL8:
+         return GL_DEPTH_STENCIL;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.ARB_texture_compression_rgtc) {
+      switch (internalFormat) {
+      case GL_COMPRESSED_RED_RGTC1:
+      case GL_COMPRESSED_SIGNED_RED_RGTC1:
+         return GL_RED;
+      case GL_COMPRESSED_RG_RGTC2:
+      case GL_COMPRESSED_SIGNED_RG_RGTC2:
+         return GL_RG;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.EXT_texture_compression_latc) {
+      switch (internalFormat) {
+      case GL_COMPRESSED_LUMINANCE_LATC1_EXT:
+      case GL_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT:
+         return GL_LUMINANCE;
+      case GL_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT:
+      case GL_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT:
+         return GL_LUMINANCE_ALPHA;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.ATI_texture_compression_3dc) {
+      switch (internalFormat) {
+      case GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI:
+         return GL_LUMINANCE_ALPHA;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->Extensions.OES_compressed_ETC1_RGB8_texture) {
+      switch (internalFormat) {
+      case GL_ETC1_RGB8_OES:
+         return GL_RGB;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (_mesa_is_gles3(ctx) || ctx->Extensions.ARB_ES3_compatibility) {
+      switch (internalFormat) {
+      case GL_COMPRESSED_RGB8_ETC2:
+      case GL_COMPRESSED_SRGB8_ETC2:
+         return GL_RGB;
+      case GL_COMPRESSED_RGBA8_ETC2_EAC:
+      case GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC:
+      case GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
+      case GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
+         return GL_RGBA;
+      case GL_COMPRESSED_R11_EAC:
+      case GL_COMPRESSED_SIGNED_R11_EAC:
+         return GL_RED;
+      case GL_COMPRESSED_RG11_EAC:
+      case GL_COMPRESSED_SIGNED_RG11_EAC:
+         return GL_RG;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (_mesa_is_desktop_gl(ctx) &&
+       ctx->Extensions.ARB_texture_compression_bptc) {
+      switch (internalFormat) {
+      case GL_COMPRESSED_RGBA_BPTC_UNORM:
+      case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM:
+         return GL_RGBA;
+      case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT:
+      case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT:
+         return GL_RGB;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   if (ctx->API == API_OPENGLES) {
+      switch (internalFormat) {
+      case GL_PALETTE4_RGB8_OES:
+      case GL_PALETTE4_R5_G6_B5_OES:
+      case GL_PALETTE8_RGB8_OES:
+      case GL_PALETTE8_R5_G6_B5_OES:
+	 return GL_RGB;
+      case GL_PALETTE4_RGBA8_OES:
+      case GL_PALETTE8_RGB5_A1_OES:
+      case GL_PALETTE4_RGBA4_OES:
+      case GL_PALETTE4_RGB5_A1_OES:
+      case GL_PALETTE8_RGBA8_OES:
+      case GL_PALETTE8_RGBA4_OES:
+	 return GL_RGBA;
+      default:
+         ; /* fallthrough */
+      }
+   }
+
+   return -1; /* error */
+}
+
+/**
+ * Returns the effective internal format from a texture format and type.
+ * This is used by texture image operations internally for validation, when
+ * the specified internal format is a base (unsized) format.
+ *
+ * This method will only return a valid effective internal format if the
+ * combination of format, type and internal format in base form, is acceptable.
+ *
+ * If a single sized internal format is defined in the spec (OpenGL-ES 3.0.4) or
+ * in extensions, to unambiguously correspond to the given base format, then
+ * that internal format is returned as the effective. Otherwise, if the
+ * combination is accepted but a single effective format is not defined, the
+ * passed base format will be returned instead.
+ *
+ * \param format the texture format
+ * \param type the texture type
+ */
+static GLenum
+_mesa_es3_effective_internal_format_for_format_and_type(GLenum format,
+                                                        GLenum type)
+{
+   switch (type) {
+   case GL_UNSIGNED_BYTE:
+      switch (format) {
+      case GL_RGBA:
+         return GL_RGBA8;
+      case GL_RGB:
+         return GL_RGB8;
+      /* Although LUMINANCE_ALPHA, LUMINANCE and ALPHA appear in table 3.12,
+       * (section 3.8 Texturing, page 128 of the OpenGL-ES 3.0.4) as effective
+       * internal formats, they do not correspond to GL constants, so the base
+       * format is returned instead.
+       */
+      case GL_BGRA_EXT:
+      case GL_LUMINANCE_ALPHA:
+      case GL_LUMINANCE:
+      case GL_ALPHA:
+         return format;
+      }
+      break;
+
+   case GL_UNSIGNED_SHORT_4_4_4_4:
+      if (format == GL_RGBA)
+         return GL_RGBA4;
+      break;
+
+   case GL_UNSIGNED_SHORT_5_5_5_1:
+      if (format == GL_RGBA)
+         return GL_RGB5_A1;
+      break;
+
+   case GL_UNSIGNED_SHORT_5_6_5:
+      if (format == GL_RGB)
+         return GL_RGB565;
+      break;
+
+   /* OES_packed_depth_stencil */
+   case GL_UNSIGNED_INT_24_8:
+      if (format == GL_DEPTH_STENCIL)
+         return GL_DEPTH24_STENCIL8;
+      break;
+
+   case GL_FLOAT_32_UNSIGNED_INT_24_8_REV:
+      if (format == GL_DEPTH_STENCIL)
+         return GL_DEPTH32F_STENCIL8;
+      break;
+
+   case GL_UNSIGNED_SHORT:
+      if (format == GL_DEPTH_COMPONENT)
+         return GL_DEPTH_COMPONENT16;
+      break;
+
+   case GL_UNSIGNED_INT:
+      /* It can be DEPTH_COMPONENT16 or DEPTH_COMPONENT24, so just return
+       * the format.
+       */
+      if (format == GL_DEPTH_COMPONENT)
+         return format;
+      break;
+
+   /* OES_texture_float and OES_texture_half_float */
+   case GL_FLOAT:
+      if (format == GL_DEPTH_COMPONENT)
+         return GL_DEPTH_COMPONENT32F;
+      /* fall through */
+   case GL_HALF_FLOAT_OES:
+      switch (format) {
+      case GL_RGBA:
+      case GL_RGB:
+      case GL_LUMINANCE_ALPHA:
+      case GL_LUMINANCE:
+      case GL_ALPHA:
+      case GL_RED:
+      case GL_RG:
+         return format;
+      }
+      break;
+   case GL_HALF_FLOAT:
+      switch (format) {
+      case GL_RG:
+      case GL_RED:
+         return format;
+      }
+      break;
+
+   /* GL_EXT_texture_type_2_10_10_10_REV */
+   case GL_UNSIGNED_INT_2_10_10_10_REV:
+      switch (format) {
+      case GL_RGBA:
+      case GL_RGB:
+         return format;
+      }
+      break;
+
+   default:
+      /* fall through and return NONE */
+      break;
+   }
+
+   return GL_NONE;
+}

 /**
 * Do error checking of format/type combinations for OpenGL ES 3
@@ -2091,7 +2718,53 @@ _mesa_es3_error_check_format_and_type(const struct gl_context *ctx,
                                      GLenum format, GLenum type,
                                      GLenum internalFormat)
 {
+   /* If internalFormat is an unsized format, then the effective internal
+    * format derived from format and type should be used instead. Page 127,
+    * section "3.8 Texturing" of the GLES 3.0.4 spec states:
+    *
+    *    "if internalformat is a base internal format, the effective
+    *     internal format is a sized internal format that is derived
+    *     from the format and type for internal use by the GL.
+    *     Table 3.12 specifies the mapping of format and type to effective
+    *     internal formats. The effective internal format is used by the GL
+    *     for purposes such as texture completeness or type checks for
+    *     CopyTex* commands. In these cases, the GL is required to operate
+    *     as if the effective internal format was used as the internalformat
+    *     when specifying the texture data."
+    */
+   if (_mesa_is_enum_format_unsized(internalFormat)) {
+      GLenum effectiveInternalFormat =
+         _mesa_es3_effective_internal_format_for_format_and_type(format, type);
+
+      if (effectiveInternalFormat == GL_NONE)
+         return GL_INVALID_OPERATION;
+
+      GLenum baseInternalFormat;
+      if (internalFormat == GL_BGRA_EXT) {
+         /* Unfortunately, _mesa_base_tex_format returns a base format of
+          * GL_RGBA for GL_BGRA_EXT.  This makes perfect sense if you're
+          * asking the question, "what channels does this format have?"
+          * However, if we're trying to determine if two internal formats
+          * match in the ES3 sense, we actually want GL_BGRA.
+          */
+         baseInternalFormat = GL_BGRA_EXT;
+      } else {
+         baseInternalFormat =
+            _mesa_base_tex_format(ctx, effectiveInternalFormat);
+      }
+
+      if (internalFormat != baseInternalFormat)
+         return GL_INVALID_OPERATION;
+
+      internalFormat = effectiveInternalFormat;
+   }
+
   switch (format) {
+   case GL_BGRA_EXT:
+      if (type != GL_UNSIGNED_BYTE || internalFormat != GL_BGRA)
+         return GL_INVALID_OPERATION;
+      break;
+
   case GL_RGBA:
      switch (type) {
      case GL_UNSIGNED_BYTE:
--- a/src/mesa/main/glformats.h
+++ b/src/mesa/main/glformats.h
@@ -131,6 +131,8 @@ extern GLenum
 _mesa_es3_error_check_format_and_type(const struct gl_context *ctx,
                                      GLenum format, GLenum type,
                                      GLenum internalFormat);
+extern GLint
+_mesa_base_tex_format(const struct gl_context *ctx, GLint internalFormat );

 extern uint32_t
 _mesa_format_from_format_and_type(GLenum format, GLenum type);
--- a/src/mesa/main/mipmap.c
+++ b/src/mesa/main/mipmap.c
@@ -1922,11 +1922,8 @@ generate_mipmap_uncompressed(struct gl_context *ctx, GLenum target,
      }

      /* get dest gl_texture_image */
-      dstImage = _mesa_get_tex_image(ctx, texObj, target, level + 1);
-      if (!dstImage) {
-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "generating mipmaps");
-         return;
-      }
+      dstImage = _mesa_select_tex_image(texObj, target, level + 1);
+      assert(dstImage);

      if (target == GL_TEXTURE_1D_ARRAY) {
 	 srcDepth = srcHeight;
@@ -2110,7 +2107,19 @@ generate_mipmap_compressed(struct gl_context *ctx, GLenum target,
                                         srcWidth, srcHeight, srcDepth,
                                         &dstWidth, &dstHeight, &dstDepth);
      if (!nextLevel)
-	 break;
+	 goto end;
+
+      if (!_mesa_prepare_mipmap_level(ctx, texObj, level + 1,
+                                      dstWidth, dstHeight, dstDepth,
+                                      border, srcImage->InternalFormat,
+                                      srcImage->TexFormat)) {
+         /* all done */
+         goto end;
+      }
+
+      /* get dest gl_texture_image */
+      dstImage = _mesa_select_tex_image(texObj, target, level + 1);
+      assert(dstImage);

      /* Compute dst image strides and alloc memory on first iteration */
      temp_dst_row_stride = _mesa_format_row_stride(temp_format, dstWidth);
@@ -2124,13 +2133,6 @@ generate_mipmap_compressed(struct gl_context *ctx, GLenum target,
 	 }
      }

-      /* get dest gl_texture_image */
-      dstImage = _mesa_get_tex_image(ctx, texObj, target, level + 1);
-      if (!dstImage) {
-         _mesa_error(ctx, GL_OUT_OF_MEMORY, "generating mipmaps");
-         goto end;
-      }
-
      /* for 2D arrays, setup array[depth] of slice pointers */
      for (i = 0; i < srcDepth; i++) {
         temp_src_slices[i] = temp_src + temp_src_img_stride * i;
@@ -2149,14 +2151,6 @@ generate_mipmap_compressed(struct gl_context *ctx, GLenum target,
                                  dstWidth, dstHeight, dstDepth,
                                  temp_dst_slices, temp_dst_row_stride);

-      if (!_mesa_prepare_mipmap_level(ctx, texObj, level + 1,
-                                      dstWidth, dstHeight, dstDepth,
-                                      border, srcImage->InternalFormat,
-                                      srcImage->TexFormat)) {
-         /* all done */
-         goto end;
-      }
-
      /* The image space was allocated above so use glTexSubImage now */
      ctx->Driver.TexSubImage(ctx, 2, dstImage,
                              0, 0, 0, dstWidth, dstHeight, dstDepth,
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -4170,13 +4170,6 @@ struct gl_image_unit
    */
   GLboolean Layered;

-   /**
-    * GL_TRUE if the state of this image unit is valid and access from
-    * the shader is allowed.  Otherwise loads from this unit should
-    * return zero and stores should have no effect.
-    */
-   GLboolean _Valid;
-
   /**
    * Layer of the texture object bound to this unit as specified by the
    * application.
--- a/src/mesa/main/pack.c
+++ b/src/mesa/main/pack.c
@@ -1074,6 +1074,21 @@ _mesa_pack_depth_span( struct gl_context *ctx, GLuint n, GLvoid *dest,
         }
      }
      break;
+   case GL_UNSIGNED_INT_24_8:
+      {
+         const GLdouble scale = (GLdouble) 0xffffff;
+         GLuint *dst = (GLuint *) dest;
+         GLuint i;
+         for (i = 0; i < n; i++) {
+            GLuint z = (GLuint) (depthSpan[i] * scale);
+            assert(z <= 0xffffff);
+            dst[i] = (z << 8);
+         }
+         if (dstPacking->SwapBytes) {
+            _mesa_swap4( (GLuint *) dst, n );
+         }
+         break;
+      }
   case GL_UNSIGNED_INT:
      {
         GLuint *dst = (GLuint *) dest;
--- a/src/mesa/main/program_resource.c
+++ b/src/mesa/main/program_resource.c
@@ -111,11 +111,9 @@ _mesa_GetProgramInterfaceiv(GLuint program, GLenum programInterface,
      for (i = 0, *params = 0; i < shProg->NumProgramResourceList; i++) {
         if (shProg->ProgramResourceList[i].Type != programInterface)
            continue;
-         const char *name =
-            _mesa_program_resource_name(&shProg->ProgramResourceList[i]);
-         unsigned array_size =
-            _mesa_program_resource_array_size(&shProg->ProgramResourceList[i]);
-         *params = MAX2(*params, strlen(name) + (array_size ? 3 : 0) + 1);
+         unsigned len =
+            _mesa_program_resource_name_len(&shProg->ProgramResourceList[i]);
+         *params = MAX2(*params, len + 1);
      }
      break;
   case GL_MAX_NUM_ACTIVE_VARIABLES:
--- a/src/mesa/main/shader_query.cpp
+++ b/src/mesa/main/shader_query.cpp
@@ -476,7 +476,7 @@ _mesa_program_resource_array_size(struct gl_program_resource *res)
             RESOURCE_XFB(res)->Size : 0;
   case GL_PROGRAM_INPUT:
   case GL_PROGRAM_OUTPUT:
-      return RESOURCE_VAR(res)->data.max_array_access;
+      return RESOURCE_VAR(res)->type->length;
   case GL_UNIFORM:
   case GL_VERTEX_SUBROUTINE_UNIFORM:
   case GL_GEOMETRY_SUBROUTINE_UNIFORM:
@@ -661,6 +661,57 @@ _mesa_program_resource_find_index(struct gl_shader_program *shProg,
   return NULL;
 }

+/* Function returns if resource name is expected to have index
+ * appended into it.
+ *
+ *
+ * Page 61 (page 73 of the PDF) in section 2.11 of the OpenGL ES 3.0
+ * spec says:
+ *
+ *     "If the active uniform is an array, the uniform name returned in
+ *     name will always be the name of the uniform array appended with
+ *     "[0]"."
+ *
+ * The same text also appears in the OpenGL 4.2 spec.  It does not,
+ * however, appear in any previous spec.  Previous specifications are
+ * ambiguous in this regard.  However, either name can later be passed
+ * to glGetUniformLocation (and related APIs), so there shouldn't be any
+ * harm in always appending "[0]" to uniform array names.
+ *
+ * Geometry shader stage has different naming convention where the 'normal'
+ * condition is an array, therefore for variables referenced in geometry
+ * stage we do not add '[0]'.
+ *
+ * Note, that TCS outputs and TES inputs should not have index appended
+ * either.
+ */
+static bool
+add_index_to_name(struct gl_program_resource *res)
+{
+   bool add_index = !(((res->Type == GL_PROGRAM_INPUT) &&
+                       res->StageReferences & (1 << MESA_SHADER_GEOMETRY)));
+
+   /* Transform feedback varyings have array index already appended
+    * in their names.
+    */
+   if (res->Type == GL_TRANSFORM_FEEDBACK_VARYING)
+      add_index = false;
+
+   return add_index;
+}
+
+/* Get name length of a program resource. This consists of
+ * base name + 3 for '[0]' if resource is an array.
+ */
+extern unsigned
+_mesa_program_resource_name_len(struct gl_program_resource *res)
+{
+   unsigned length = strlen(_mesa_program_resource_name(res));
+   if (_mesa_program_resource_array_size(res) && add_index_to_name(res))
+      length += 3;
+   return length;
+}
+
 /* Get full name of a program resource.
 */
 bool
@@ -696,36 +747,7 @@ _mesa_get_program_resource_name(struct gl_shader_program *shProg,

   _mesa_copy_string(name, bufSize, length, _mesa_program_resource_name(res));

-   /* Page 61 (page 73 of the PDF) in section 2.11 of the OpenGL ES 3.0
-    * spec says:
-    *
-    *     "If the active uniform is an array, the uniform name returned in
-    *     name will always be the name of the uniform array appended with
-    *     "[0]"."
-    *
-    * The same text also appears in the OpenGL 4.2 spec.  It does not,
-    * however, appear in any previous spec.  Previous specifications are
-    * ambiguous in this regard.  However, either name can later be passed
-    * to glGetUniformLocation (and related APIs), so there shouldn't be any
-    * harm in always appending "[0]" to uniform array names.
-    *
-    * Geometry shader stage has different naming convention where the 'normal'
-    * condition is an array, therefore for variables referenced in geometry
-    * stage we do not add '[0]'.
-    *
-    * Note, that TCS outputs and TES inputs should not have index appended
-    * either.
-    */
-   bool add_index = !(((programInterface == GL_PROGRAM_INPUT) &&
-                       res->StageReferences & (1 << MESA_SHADER_GEOMETRY)));
-
-   /* Transform feedback varyings have array index already appended
-    * in their names.
-    */
-   if (programInterface == GL_TRANSFORM_FEEDBACK_VARYING)
-      add_index = false;
-
-   if (add_index && _mesa_program_resource_array_size(res)) {
+   if (_mesa_program_resource_array_size(res) && add_index_to_name(res)) {
      int i;

      /* The comparison is strange because *length does *NOT* include the
@@ -972,13 +994,9 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg,
      switch (res->Type) {
      case GL_ATOMIC_COUNTER_BUFFER:
         goto invalid_operation;
-      case GL_TRANSFORM_FEEDBACK_VARYING:
-         *val = strlen(_mesa_program_resource_name(res)) + 1;
-         break;
      default:
-         /* Base name +3 if array '[0]' + terminator. */
-         *val = strlen(_mesa_program_resource_name(res)) +
-            (_mesa_program_resource_array_size(res) > 0 ? 3 : 0) + 1;
+         /* Resource name length + terminator. */
+         *val = _mesa_program_resource_name_len(res) + 1;
      }
      return 1;
   case GL_TYPE:
@@ -1003,7 +1021,7 @@ _mesa_program_resource_prop(struct gl_shader_program *shProg,
            return 1;
      case GL_PROGRAM_INPUT:
      case GL_PROGRAM_OUTPUT:
-         *val = MAX2(RESOURCE_VAR(res)->type->length, 1);
+         *val = MAX2(_mesa_program_resource_array_size(res), 1);
         return 1;
      case GL_TRANSFORM_FEEDBACK_VARYING:
         *val = MAX2(RESOURCE_XFB(res)->Size, 1);
--- a/src/mesa/main/shaderapi.h
+++ b/src/mesa/main/shaderapi.h
@@ -245,6 +245,9 @@ _mesa_get_program_resource_name(struct gl_shader_program *shProg,
                                GLsizei bufSize, GLsizei *length,
                                GLchar *name, const char *caller);

+extern unsigned
+_mesa_program_resource_name_len(struct gl_program_resource *res);
+
 extern GLint
 _mesa_program_resource_location(struct gl_shader_program *shProg,
                                GLenum programInterface, const char *name);
--- a/src/mesa/main/shaderimage.c
+++ b/src/mesa/main/shaderimage.c
@@ -415,8 +415,8 @@ _mesa_init_image_units(struct gl_context *ctx)
      ctx->ImageUnits[i] = _mesa_default_image_unit(ctx);
 }

-static GLboolean
-validate_image_unit(struct gl_context *ctx, struct gl_image_unit *u)
+GLboolean
+_mesa_is_image_unit_valid(struct gl_context *ctx, struct gl_image_unit *u)
 {
   struct gl_texture_object *t = u->TexObj;
   mesa_format tex_format;
@@ -424,7 +424,8 @@ validate_image_unit(struct gl_context *ctx, struct gl_image_unit *u)
   if (!t)
      return GL_FALSE;

-   _mesa_test_texobj_completeness(ctx, t);
+   if (!t->_BaseComplete && !t->_MipmapComplete)
+       _mesa_test_texobj_completeness(ctx, t);

   if (u->Level < t->BaseLevel ||
       u->Level > t->_MaxLevel ||
@@ -473,17 +474,6 @@ validate_image_unit(struct gl_context *ctx, struct gl_image_unit *u)
   return GL_TRUE;
 }

-void
-_mesa_validate_image_units(struct gl_context *ctx)
-{
-   unsigned i;
-
-   for (i = 0; i < ctx->Const.MaxImageUnits; ++i) {
-      struct gl_image_unit *u = &ctx->ImageUnits[i];
-      u->_Valid = validate_image_unit(ctx, u);
-   }
-}
-
 static GLboolean
 validate_bind_image_texture(struct gl_context *ctx, GLuint unit,
                            GLuint texture, GLint level, GLboolean layered,
@@ -567,7 +557,6 @@ _mesa_BindImageTexture(GLuint unit, GLuint texture, GLint level,
   u->Access = access;
   u->Format = format;
   u->_ActualFormat = _mesa_get_shader_image_format(format);
-   u->_Valid = validate_image_unit(ctx, u);

   if (u->TexObj && _mesa_tex_target_is_layered(u->TexObj->Target)) {
      u->Layered = layered;
@@ -707,7 +696,6 @@ _mesa_BindImageTextures(GLuint first, GLsizei count, const GLuint *textures)
         u->Access = GL_READ_WRITE;
         u->Format = tex_format;
         u->_ActualFormat = _mesa_get_shader_image_format(tex_format);
-         u->_Valid = validate_image_unit(ctx, u);
      } else {
         /* Unbind the texture from the unit */
         _mesa_reference_texobj(&u->TexObj, NULL);
@@ -717,7 +705,6 @@ _mesa_BindImageTextures(GLuint first, GLsizei count, const GLuint *textures)
         u->Access = GL_READ_ONLY;
         u->Format = GL_R8;
         u->_ActualFormat = MESA_FORMAT_R_UNORM8;
-         u->_Valid = GL_FALSE;
      }

      /* Pass the BindImageTexture call down to the device driver */
--- a/Show More
+++ b/Show More
@@ -1 +1 @@
 .0.1
 .0.4