docs: add release notes for 11.0.6

Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
Update version to 11.0.6
2015-11-21 11:43:55 +00:00 · 2015-11-21 11:42:52 +00:00 · 2015-11-21 11:42:52 +00:00 · 2015-11-18 19:13:17 +00:00 · 2015-11-18 18:59:34 +00:00 · 2015-11-18 18:59:20 +00:00
159 changed files with 3279 additions and 585 deletions
--- a/Makefile.am
+++ b/Makefile.am
@@ -32,6 +32,7 @@ AM_DISTCHECK_CONFIGURE_FLAGS = \
 	--enable-vdpau \
 	--enable-xa \
 	--enable-xvmc \
+	--disable-llvm-shared-libs \
 	--with-egl-platforms=x11,wayland,drm \
 	--with-dri-drivers=i915,i965,nouveau,radeon,r200,swrast \
 	--with-gallium-drivers=i915,ilo,nouveau,r300,r600,radeonsi,freedreno,svga,swrast
--- a/2
+++ b/2
@@ -1 +1 @@
-11.0.2
+11.0.6
--- a/bin/.cherry-ignore
+++ b/bin/.cherry-ignore
@@ -0,0 +1,5 @@
+# The commit base differs greatly between 11.0 and master
+2832ca95ecce064c7d841a3a374c2179f56161be glsl: fix stream qualifier for blocks with an instance name
+
+# Somewhat of a mixed feature/bugfix patch, causing some 200 piglit regressions
+2b676570960277d47477822ffeccc672613f9142 gallium/swrast: fix front buffer blitting. (v2)
--- a/configure.ac
+++ b/configure.ac
@@ -106,6 +106,8 @@ AC_SYS_LARGEFILE
 LT_PREREQ([2.2])
 LT_INIT([disable-static])

+AC_CHECK_PROG(RM, rm, [rm -f])
+
 AX_PROG_BISON([],
              AS_IF([test ! -f "$srcdir/src/glsl/glcpp/glcpp-parse.c"],
                    [AC_MSG_ERROR([bison not found - unable to compile glcpp-parse.y])]))
--- a/docs/relnotes/11.0.2.html
+++ b/docs/relnotes/11.0.2.html
@@ -31,7 +31,8 @@ because compatibility contexts are not supported.

 <h2>SHA256 checksums</h2>
 <pre>
-TBD
+45170773500d6ae2f9eb93fc85efee69f7c97084411ada4eddf92f78bca56d20  mesa-11.0.2.tar.gz
+fce11fb27eb87adf1e620a76455d635c6136dfa49ae58c53b34ef8d0c7b7eae4  mesa-11.0.2.tar.xz
 </pre>


--- a/docs/relnotes/11.0.3.html
+++ b/docs/relnotes/11.0.3.html
@@ -0,0 +1,185 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 11.0.3 Release Notes / October 10, 2015</h1>
+
+<p>
+Mesa 11.0.3 is a bug fix release which fixes bugs found since the 11.0.2 release.
+</p>
+<p>
+Mesa 11.0.3 implements the OpenGL 4.1 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.1.  OpenGL
+4.1 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+c2210e3daecc10ed9fdcea500327652ed6effc2f47c4b9cee63fb08f560d7117  mesa-11.0.3.tar.gz
+ab2992eece21adc23c398720ef8c6933cb69ea42e1b2611dc09d031e17e033d6  mesa-11.0.3.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=55552">Bug 55552</a> - Compile errors with --enable-mangling</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=71789">Bug 71789</a> - [r300g] Visuals not found in (default) depth = 24</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91044">Bug 91044</a> - piglit spec/egl_khr_create_context/valid debug flag gles* fail</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91342">Bug 91342</a> - Very dark textures on some objects in indoors environments in Postal 2</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91596">Bug 91596</a> - EGL_KHR_gl_colorspace (v2) causes problem with Android-x86 GUI</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91718">Bug 91718</a> - piglit.spec.arb_shader_image_load_store.invalid causes intermittent GPU HANG</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92072">Bug 92072</a> - Wine breakage since d082c5324 (st/mesa: don't call st_validate_state in BlitFramebuffer)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92265">Bug 92265</a> - Black windows in weston after update mesa to 11.0.2-1</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Brian Paul (1):</p>
+<ul>
+  <li>st/mesa: try PIPE_BIND_RENDER_TARGET when choosing float texture formats</li>
+</ul>
+
+<p>Daniel Scharrer (1):</p>
+<ul>
+  <li>mesa: Add abs input modifier to base for POW in ffvertex_prog</li>
+</ul>
+
+<p>Emil Velikov (3):</p>
+<ul>
+  <li>docs: add sha256 checksums for 11.0.2</li>
+  <li>Revert "nouveau: make sure there's always room to emit a fence"</li>
+  <li>Update version to 11.0.3</li>
+</ul>
+
+<p>Francisco Jerez (1):</p>
+<ul>
+  <li>i965/fs: Fix hang on IVB and VLV with image format mismatch.</li>
+</ul>
+
+<p>Ian Romanick (1):</p>
+<ul>
+  <li>meta: Handle array textures in scaled MSAA blits</li>
+</ul>
+
+<p>Ilia Mirkin (6):</p>
+<ul>
+  <li>nouveau: be more careful about freeing temporary transfer buffers</li>
+  <li>nouveau: delay deleting buffer with unflushed fence</li>
+  <li>nouveau: wait to unref the transfer's bo until it's no longer used</li>
+  <li>nv30: pretend to have packed texture/surface formats</li>
+  <li>nv30: always go through translate module on big-endian</li>
+  <li>nouveau: make sure there's always room to emit a fence</li>
+</ul>
+
+<p>Jason Ekstrand (1):</p>
+<ul>
+  <li>mesa: Correctly handle GL_BGRA_EXT in ES3 format_and_type checks</li>
+</ul>
+
+<p>Kyle Brenneman (3):</p>
+<ul>
+  <li>glx: Fix build errors with --enable-mangling (v2)</li>
+  <li>mapi: Make _glapi_get_stub work with "gl" or "mgl" prefix.</li>
+  <li>glx: Don't hard-code the name "libGL.so.1" in driOpenDriver (v3)</li>
+</ul>
+
+<p>Leo Liu (1):</p>
+<ul>
+  <li>radeon/vce: fix vui time_scale zero error</li>
+</ul>
+
+<p>Marek Olšák (21):</p>
+<ul>
+  <li>st/mesa: fix front buffer regression after dropping st_validate_state in Blit</li>
+  <li>radeonsi: handle index buffer alloc failures</li>
+  <li>radeonsi: handle constant buffer alloc failures</li>
+  <li>gallium/radeon: handle buffer_map staging buffer failures better</li>
+  <li>gallium/radeon: handle buffer alloc failures in r600_draw_rectangle</li>
+  <li>gallium/radeon: add a fail path for depth MSAA texture readback</li>
+  <li>radeonsi: report alloc failure from si_shader_binary_read</li>
+  <li>radeonsi: add malloc fail paths to si_create_shader_state</li>
+  <li>radeonsi: skip drawing if the tess factor ring allocation fails</li>
+  <li>radeonsi: skip drawing if GS ring allocations fail</li>
+  <li>radeonsi: handle shader precompile failures</li>
+  <li>radeonsi: handle fixed-func TCS shader create failure</li>
+  <li>radeonsi: skip drawing if VS, TCS, TES, GS fail to compile or upload</li>
+  <li>radeonsi: skip drawing if PS fails to compile or upload</li>
+  <li>radeonsi: skip drawing if updating the scratch buffer fails</li>
+  <li>radeonsi: don't forget to update scratch relocations for LS, HS, ES shaders</li>
+  <li>radeonsi: handle dummy constant buffer allocation failure</li>
+  <li>gallium/u_blitter: handle allocation failures</li>
+  <li>radeonsi: add scratch buffer to the buffer list when it's re-allocated</li>
+  <li>st/dri: don't use _ctx in client_wait_sync</li>
+  <li>egl/dri2: don't require a context for ClientWaitSync (v2)</li>
+</ul>
+
+<p>Matthew Waters (1):</p>
+<ul>
+  <li>egl: rework handling EGL_CONTEXT_FLAGS</li>
+</ul>
+
+<p>Michel Dänzer (1):</p>
+<ul>
+  <li>st/dri: Use packed RGB formats</li>
+</ul>
+
+<p>Roland Scheidegger (1):</p>
+<ul>
+  <li>mesa: fix mipmap generation for immutable, compressed textures</li>
+</ul>
+
+<p>Tom Stellard (3):</p>
+<ul>
+  <li>gallium/radeon: Use call_once() when initailizing LLVM targets</li>
+  <li>gallivm: Allow drivers and state trackers to initialize gallivm LLVM targets v2</li>
+  <li>radeon/llvm: Initialize gallivm targets when initializing the AMDGPU target v2</li>
+</ul>
+
+<p>Varad Gautam (1):</p>
+<ul>
+  <li>egl: restore surface type before linking config to its display</li>
+</ul>
+
+<p>Ville Syrjälä (3):</p>
+<ul>
+  <li>i830: Fix collision between I830_UPLOAD_RASTER_RULES and I830_UPLOAD_TEX(0)</li>
+  <li>i915: Fix texcoord vs. varying collision in fragment programs</li>
+  <li>i915: Remember to call intel_prepare_render() before blitting</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/11.0.4.html
+++ b/docs/relnotes/11.0.4.html
@@ -0,0 +1,168 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 11.0.4 Release Notes / October 24, 2015</h1>
+
+<p>
+Mesa 11.0.4 is a bug fix release which fixes bugs found since the 11.0.3 release.
+</p>
+<p>
+Mesa 11.0.4 implements the OpenGL 4.1 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.1.  OpenGL
+4.1 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+ed412ca6a46d1bd055120e5c12806c15419ae8c4dd6d3f6ea20a83091d5c78bf  mesa-11.0.4.tar.gz
+40201bf7fc6fa12a6d9edfe870b41eb4dd6669154e3c42c48a96f70805f5483d  mesa-11.0.4.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=86281">Bug 86281</a> - brw_meta_fast_clear (brw=brw&#64;entry=0x7fffd4097a08, fb=fb&#64;entry=0x7fffd40fa900, buffers=buffers&#64;entry=2, partial_clear=partial_clear&#64;entry=false)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=86720">Bug 86720</a> - [radeon] Europa Universalis 4 freezing during game start (10.3.3+, still broken on 11.0.2)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91788">Bug 91788</a> - [HSW Regression] Synmark2_v6 Multithread performance case FPS reduced by 36%</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92304">Bug 92304</a> - [cts] cts.shaders.negative conformance tests fail</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Alejandro Piñeiro (2):</p>
+<ul>
+  <li>i965/vec4: check writemask when bailing out at register coalesce</li>
+  <li>i965/vec4: fill src_reg type using the constructor type parameter</li>
+</ul>
+
+<p>Brian Paul (2):</p>
+<ul>
+  <li>vbo: fix incorrect switch statement in init_mat_currval()</li>
+  <li>mesa: fix incorrect opcode in save_BlendFunci()</li>
+</ul>
+
+<p>Chih-Wei Huang (3):</p>
+<ul>
+  <li>mesa: android: Fix the incorrect path of sse_minmax.c</li>
+  <li>nv50/ir: use C++11 standard std::unordered_map if possible</li>
+  <li>nv30: include the header of ffs prototype</li>
+</ul>
+
+<p>Chris Wilson (1):</p>
+<ul>
+  <li>i965: Remove early release of DRI2 miptree</li>
+</ul>
+
+<p>Dave Airlie (1):</p>
+<ul>
+  <li>mesa/uniforms: fix get_uniform for doubles (v2)</li>
+</ul>
+
+<p>Emil Velikov (1):</p>
+<ul>
+  <li>docs: add sha256 checksums for 11.0.3</li>
+</ul>
+
+<p>Francisco Jerez (5):</p>
+<ul>
+  <li>i965: Don't tell the hardware about our UAV access.</li>
+  <li>mesa: Expose function to calculate whether a shader image unit is valid.</li>
+  <li>mesa: Skip redundant texture completeness checking during image validation.</li>
+  <li>i965: Use _mesa_is_image_unit_valid() instead of gl_image_unit::_Valid.</li>
+  <li>mesa: Get rid of texture-dependent image unit derived state.</li>
+</ul>
+
+<p>Ian Romanick (8):</p>
+<ul>
+  <li>glsl: Allow built-in functions as constant expressions in OpenGL ES 1.00</li>
+  <li>ff_fragment_shader: Use binding to set the sampler unit</li>
+  <li>glsl/linker: Use constant_initializer instead of constant_value to initialize uniforms</li>
+  <li>glsl: Use constant_initializer instead of constant_value to determine whether to keep an unused uniform</li>
+  <li>glsl: Only set ir_variable::constant_value for const-decorated variables</li>
+  <li>glsl: Restrict initializers for global variables to constant expression in ES</li>
+  <li>glsl: Add method to determine whether an expression contains the sequence operator</li>
+  <li>glsl: In later GLSL versions, sequence operator is cannot be a constant expression</li>
+</ul>
+
+<p>Ilia Mirkin (1):</p>
+<ul>
+  <li>nouveau: make sure there's always room to emit a fence</li>
+</ul>
+
+<p>Indrajit Das (1):</p>
+<ul>
+  <li>st/va: Used correct parameter to derive the value of the "h" variable in vlVaCreateImage</li>
+</ul>
+
+<p>Jonathan Gray (1):</p>
+<ul>
+  <li>configure.ac: ensure RM is set</li>
+</ul>
+
+<p>Krzysztof Sobiecki (1):</p>
+<ul>
+  <li>st/fbo: use pipe_surface_release instead of pipe_surface_reference</li>
+</ul>
+
+<p>Leo Liu (1):</p>
+<ul>
+  <li>st/omx/dec/h264: fix field picture type 0 poc disorder</li>
+</ul>
+
+<p>Marek Olšák (3):</p>
+<ul>
+  <li>st/mesa: fix clip state dependencies</li>
+  <li>radeonsi: fix a GS copy shader leak</li>
+  <li>gallium: add PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT</li>
+</ul>
+
+<p>Nicolai Hähnle (1):</p>
+<ul>
+  <li>u_vbuf: fix vb slot assignment for translated buffers</li>
+</ul>
+
+<p>Rob Clark (1):</p>
+<ul>
+  <li>freedreno/a3xx: cache-flush is needed after MEM_WRITE</li>
+</ul>
+
+<p>Tapani Pälli (3):</p>
+<ul>
+  <li>mesa: add GL_UNSIGNED_INT_24_8 to _mesa_pack_depth_span</li>
+  <li>mesa: Set api prefix to version string when overriding version</li>
+  <li>mesa: fix ARRAY_SIZE query for GetProgramResourceiv</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/11.0.5.html
+++ b/docs/relnotes/11.0.5.html
@@ -0,0 +1,174 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 11.0.5 Release Notes / November 11, 2015</h1>
+
+<p>
+Mesa 11.0.5 is a bug fix release which fixes bugs found since the 11.0.4 release.
+</p>
+<p>
+Mesa 11.0.5 implements the OpenGL 4.1 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.1.  OpenGL
+4.1 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+8495ef5c06f7f726452462b7d408a5b40048373ff908f2283a3b4d1f49b45ee6  mesa-11.0.5.tar.gz
+9c255a2a6695fcc6ef4a279e1df0aeaf417dc142f39ee59dfb533d80494bb67a  mesa-11.0.5.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91993">Bug 91993</a> - Graphical glitch in Astromenace (open-source game).</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92214">Bug 92214</a> - Flightgear crashes during splashboot with R600 driver, LLVM 3.7.0 and mesa 11.0.2</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92437">Bug 92437</a> - osmesa: Expose GL entry points for Windows build, via .def file</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92476">Bug 92476</a> - [cts] ES2-CTS.gtf.GL2ExtensionTests.egl_image.egl_image fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92623">Bug 92623</a> - Differences in prog_data ignored when caching fragment programs (causes hangs)</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Alex Deucher (1):</p>
+<ul>
+  <li>radeon/uvd: don't expose HEVC on old UVD hw (v3)</li>
+</ul>
+
+<p>Ben Widawsky (1):</p>
+<ul>
+  <li>i965/skl: Add GT4 PCI IDs</li>
+</ul>
+
+<p>Emil Velikov (4):</p>
+<ul>
+  <li>docs: add sha256 checksums for 11.0.4</li>
+  <li>cherry-ignore: ignore a possible wrong nomination</li>
+  <li>Revert "mesa/glformats: Undo code changes from _mesa_base_tex_format() move"</li>
+  <li>Update version to 11.0.5</li>
+</ul>
+
+<p>Emmanuel Gil Peyrot (1):</p>
+<ul>
+  <li>gbm.h: Add a missing stddef.h include for size_t.</li>
+</ul>
+
+<p>Eric Anholt (1):</p>
+<ul>
+  <li>vc4: When the create ioctl fails, free our cache and try again.</li>
+</ul>
+
+<p>Ian Romanick (1):</p>
+<ul>
+  <li>i965: Fix is-renderable check in intel_image_target_renderbuffer_storage</li>
+</ul>
+
+<p>Ilia Mirkin (3):</p>
+<ul>
+  <li>nvc0: respect edgeflag attribute width</li>
+  <li>nouveau: set MaxDrawBuffers to the same value as MaxColorAttachments</li>
+  <li>nouveau: relax fence emit space assert</li>
+</ul>
+
+<p>Ivan Kalvachev (1):</p>
+<ul>
+  <li>r600g: Fix special negative immediate constants when using ABS modifier.</li>
+</ul>
+
+<p>Jason Ekstrand (2):</p>
+<ul>
+  <li>nir/lower_vec_to_movs: Pass the shader around directly</li>
+  <li>nir: Report progress from lower_vec_to_movs().</li>
+</ul>
+
+<p>Jose Fonseca (2):</p>
+<ul>
+  <li>gallivm: Translate all util_cpu_caps bits to LLVM attributes.</li>
+  <li>gallivm: Explicitly disable unsupported CPU features.</li>
+</ul>
+
+<p>Julien Isorce (4):</p>
+<ul>
+  <li>st/va: pass picture desc to begin and decode</li>
+  <li>nvc0: fix crash when nv50_miptree_from_handle fails</li>
+  <li>st/va: do not destroy old buffer when new one failed</li>
+  <li>st/va: add more errors checks in vlVaBufferSetNumElements and vlVaMapBuffer</li>
+</ul>
+
+<p>Kenneth Graunke (6):</p>
+<ul>
+  <li>i965: Fix missing BRW_NEW_*_PROG_DATA flagging caused by cache reuse.</li>
+  <li>nir: Report progress from nir_split_var_copies().</li>
+  <li>nir: Properly invalidate metadata in nir_split_var_copies().</li>
+  <li>nir: Properly invalidate metadata in nir_opt_copy_prop().</li>
+  <li>nir: Properly invalidate metadata in nir_lower_vec_to_movs().</li>
+  <li>nir: Properly invalidate metadata in nir_opt_remove_phis().</li>
+</ul>
+
+<p>Marek Olšák (1):</p>
+<ul>
+  <li>radeonsi: add register definitions for Stoney</li>
+</ul>
+
+<p>Nanley Chery (1):</p>
+<ul>
+  <li>mesa/glformats: Undo code changes from _mesa_base_tex_format() move</li>
+</ul>
+
+<p>Nicolai Hähnle (1):</p>
+<ul>
+  <li>st/mesa: fix mipmap generation for immutable textures with incomplete pyramids</li>
+</ul>
+
+<p>Nigel Stewart (1):</p>
+<ul>
+  <li>osmesa: Expose GL entry points for Windows build via DEF file.</li>
+</ul>
+
+<p>Roland Scheidegger (1):</p>
+<ul>
+  <li>gallivm: disable f16c when not using AVX</li>
+</ul>
+
+<p>Samuel Li (2):</p>
+<ul>
+  <li>radeonsi: add support for Stoney asics (v3)</li>
+  <li>radeonsi: add Stoney pci ids</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/11.0.6.html
+++ b/docs/relnotes/11.0.6.html
@@ -0,0 +1,144 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 11.0.6 Release Notes / November 21, 2015</h1>
+
+<p>
+Mesa 11.0.6 is a bug fix release which fixes bugs found since the 11.0.5 release.
+</p>
+<p>
+Mesa 11.0.6 implements the OpenGL 4.1 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.1.  OpenGL
+4.1 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+TBD
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91780">Bug 91780</a> - Rendering issues with geometry shader</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92588">Bug 92588</a> - [HSW,BDW,BSW,SKL-Y][GLES 3.1 CTS] ES31-CTS.arrays_of_arrays.InteractionFunctionCalls2 - assert</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92738">Bug 92738</a> - Randon R7 240 doesn't work on 16KiB page size platform</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92860">Bug 92860</a> - [radeonsi][bisected] st/mesa: implement ARB_copy_image - Corruption in ARK Survival Evolved</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92900">Bug 92900</a> - [regression bisected] About 700 piglit regressions is what could go wrong</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Alex Deucher (1):</p>
+<ul>
+  <li>radeonsi: enable optimal raster config setting for fiji (v2)</li>
+</ul>
+
+<p>Ben Widawsky (1):</p>
+<ul>
+  <li>i965/skl/gt4: Fix URB programming restriction.</li>
+</ul>
+
+<p>Boyuan Zhang (2):</p>
+<ul>
+  <li>st/vaapi: fix vaapi VC-1 simple/main corruption v2</li>
+  <li>radeon/uvd: fix VC-1 simple/main profile decode v2</li>
+</ul>
+
+<p>Dave Airlie (1):</p>
+<ul>
+  <li>r600: initialised PGM_RESOURCES_2 for ES/GS</li>
+</ul>
+
+<p>Emil Velikov (4):</p>
+<ul>
+  <li>docs: add sha256 checksums for 11.0.5</li>
+  <li>cherry-ignore: add the swrast front buffer support</li>
+  <li>automake: use static llvm for make distcheck</li>
+  <li>Update version to 11.0.6</li>
+</ul>
+
+<p>Eric Anholt (3):</p>
+<ul>
+  <li>vc4: Return GL_OUT_OF_MEMORY when buffer allocation fails.</li>
+  <li>vc4: Return NULL when we can't make our shadow for a sampler view.</li>
+  <li>vc4: Add support for nir_op_uge, using the carry bit on QPU_A_SUB.</li>
+</ul>
+
+<p>Ian Romanick (2):</p>
+<ul>
+  <li>meta/generate_mipmap: Don't leak the sampler object</li>
+  <li>meta/generate_mipmap: Only modify the draw framebuffer binding in fallback_required</li>
+</ul>
+
+<p>Ilia Mirkin (2):</p>
+<ul>
+  <li>mesa/copyimage: allow width/height to not be multiples of block</li>
+  <li>nouveau: don't expose HEVC decoding support</li>
+</ul>
+
+<p>Jason Ekstrand (1):</p>
+<ul>
+  <li>nir/vars_to_ssa: Rework copy set handling in lower_copies_to_load_store</li>
+</ul>
+
+<p>Kenneth Graunke (1):</p>
+<ul>
+  <li>glsl: Allow implicit int -&gt; uint conversions for the % operator.</li>
+</ul>
+
+<p>Marek Olšák (1):</p>
+<ul>
+  <li>radeonsi: initialize SX_PS_DOWNCONVERT to 0 on Stoney</li>
+</ul>
+
+<p>Michel Dänzer (1):</p>
+<ul>
+  <li>winsys/radeon: Use CPU page size instead of hardcoding 4096 bytes v3</li>
+</ul>
+
+<p>Oded Gabbay (1):</p>
+<ul>
+  <li>llvmpipe: use simple coeffs calc for 128bit vectors</li>
+</ul>
+
+<p>Roland Scheidegger (2):</p>
+<ul>
+  <li>radeon: fix bgrx8/xrgb8 blits</li>
+  <li>r200: fix bgrx8/xrgb8 blits</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/include/pci_ids/i965_pci_ids.h
+++ b/include/pci_ids/i965_pci_ids.h
@@ -124,6 +124,10 @@ CHIPSET(0x1921, skl_gt2, "Intel(R) Skylake ULT GT2F")
 CHIPSET(0x1926, skl_gt3, "Intel(R) Skylake ULT GT3")
 CHIPSET(0x192A, skl_gt3, "Intel(R) Skylake SRV GT3")
 CHIPSET(0x192B, skl_gt3, "Intel(R) Skylake Halo GT3")
+CHIPSET(0x1932, skl_gt4, "Intel(R) Skylake GT4")
+CHIPSET(0x193A, skl_gt4, "Intel(R) Skylake GT4")
+CHIPSET(0x193B, skl_gt4, "Intel(R) Skylake GT4")
+CHIPSET(0x193D, skl_gt4, "Intel(R) Skylake GT4")
 CHIPSET(0x22B0, chv,     "Intel(R) HD Graphics (Cherryview)")
 CHIPSET(0x22B1, chv,     "Intel(R) HD Graphics (Cherryview)")
 CHIPSET(0x22B2, chv,     "Intel(R) HD Graphics (Cherryview)")
--- a/include/pci_ids/radeonsi_pci_ids.h
+++ b/include/pci_ids/radeonsi_pci_ids.h
@@ -181,3 +181,5 @@ CHIPSET(0x9876, CARRIZO_, CARRIZO)
 CHIPSET(0x9877, CARRIZO_, CARRIZO)

 CHIPSET(0x7300, FIJI_, FIJI)
+
+CHIPSET(0x98E4, STONEY_, STONEY)
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -312,6 +312,8 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,
         else
            conf->dri_single_config = dri_config;
      }
+
+      conf->base.SurfaceType = 0;
      conf->base.ConfigID = config_id;

      _eglLinkConfig(&conf->base);
@@ -2384,13 +2386,18 @@ dri2_client_wait_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync,
   unsigned wait_flags = 0;
   EGLint ret = EGL_CONDITION_SATISFIED_KHR;

-   if (flags & EGL_SYNC_FLUSH_COMMANDS_BIT_KHR)
+   /* The EGL_KHR_fence_sync spec states:
+    *
+    *    "If no context is current for the bound API,
+    *     the EGL_SYNC_FLUSH_COMMANDS_BIT_KHR bit is ignored.
+    */
+   if (dri2_ctx && flags & EGL_SYNC_FLUSH_COMMANDS_BIT_KHR)
      wait_flags |= __DRI2_FENCE_FLAG_FLUSH_COMMANDS;

   /* the sync object should take a reference while waiting */
   dri2_egl_ref_sync(dri2_sync);

-   if (dri2_dpy->fence->client_wait_sync(dri2_ctx->dri_context,
+   if (dri2_dpy->fence->client_wait_sync(dri2_ctx ? dri2_ctx->dri_context : NULL,
                                         dri2_sync->fence, wait_flags,
                                         timeout))
      dri2_sync->base.SyncStatus = EGL_SIGNALED_KHR;
--- a/src/egl/main/eglcontext.c
+++ b/src/egl/main/eglcontext.c
@@ -152,12 +152,51 @@ _eglParseContextAttribList(_EGLContext *ctx, _EGLDisplay *dpy,

         /* The EGL_KHR_create_context spec says:
          *
-          *     "Flags are only defined for OpenGL context creation, and
-          *     specifying a flags value other than zero for other types of
-          *     contexts, including OpenGL ES contexts, will generate an
-          *     error."
+          *     "If the EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR flag bit is set in
+          *     EGL_CONTEXT_FLAGS_KHR, then a <debug context> will be created.
+          *     [...]
+          *     In some cases a debug context may be identical to a non-debug
+          *     context. This bit is supported for OpenGL and OpenGL ES
+          *     contexts."
          */
-         if (api != EGL_OPENGL_API && val != 0) {
+         if ((val & EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR) &&
+             (api != EGL_OPENGL_API && api != EGL_OPENGL_ES_API)) {
+            err = EGL_BAD_ATTRIBUTE;
+            break;
+         }
+
+         /* The EGL_KHR_create_context spec says:
+          *
+          *     "If the EGL_CONTEXT_OPENGL_FORWARD_COMPATIBLE_BIT_KHR flag bit
+          *     is set in EGL_CONTEXT_FLAGS_KHR, then a <forward-compatible>
+          *     context will be created. Forward-compatible contexts are
+          *     defined only for OpenGL versions 3.0 and later. They must not
+          *     support functionality marked as <deprecated> by that version of
+          *     the API, while a non-forward-compatible context must support
+          *     all functionality in that version, deprecated or not. This bit
+          *     is supported for OpenGL contexts, and requesting a
+          *     forward-compatible context for OpenGL versions less than 3.0
+          *     will generate an error."
+          */
+         if ((val & EGL_CONTEXT_OPENGL_FORWARD_COMPATIBLE_BIT_KHR) &&
+             (api != EGL_OPENGL_API || ctx->ClientMajorVersion < 3)) {
+            err = EGL_BAD_ATTRIBUTE;
+            break;
+         }
+
+         /* The EGL_KHR_create_context_spec says:
+          *
+          *     "If the EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR bit is set in
+          *     EGL_CONTEXT_FLAGS_KHR, then a context supporting <robust buffer
+          *     access> will be created. Robust buffer access is defined in the
+          *     GL_ARB_robustness extension specification, and the resulting
+          *     context must also support either the GL_ARB_robustness
+          *     extension, or a version of OpenGL incorporating equivalent
+          *     functionality. This bit is supported for OpenGL contexts.
+          */
+         if ((val & EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR) &&
+             (api != EGL_OPENGL_API ||
+              !dpy->Extensions.EXT_create_context_robustness)) {
            err = EGL_BAD_ATTRIBUTE;
            break;
         }
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -427,6 +427,7 @@ lp_build_init(void)
       */
      util_cpu_caps.has_avx = 0;
      util_cpu_caps.has_avx2 = 0;
+      util_cpu_caps.has_f16c = 0;
   }

 #ifdef PIPE_ARCH_PPC_64
@@ -458,7 +459,9 @@ lp_build_init(void)
   util_cpu_caps.has_sse3 = 0;
   util_cpu_caps.has_ssse3 = 0;
   util_cpu_caps.has_sse4_1 = 0;
+   util_cpu_caps.has_sse4_2 = 0;
   util_cpu_caps.has_avx = 0;
+   util_cpu_caps.has_avx2 = 0;
   util_cpu_caps.has_f16c = 0;
 #endif

--- a/src/gallium/auxiliary/gallivm/lp_bld_limits.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
@@ -137,6 +137,8 @@ gallivm_get_shader_param(enum pipe_shader_cap param)
   case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
   case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
      return 0;
+   case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+      return 32;
   }
   /* if we get here, we missed a shader cap above (and should have seen
    * a compiler warning.)
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -81,6 +81,8 @@
 #  pragma pop_macro("DEBUG")
 #endif

+#include "c11/threads.h"
+#include "os/os_thread.h"
 #include "pipe/p_config.h"
 #include "util/u_debug.h"
 #include "util/u_cpu_detect.h"
@@ -103,6 +105,33 @@ static LLVMEnsureMultithreaded lLVMEnsureMultithreaded;

 }

+static once_flag init_native_targets_once_flag;
+
+static void init_native_targets()
+{
+   // If we have a native target, initialize it to ensure it is linked in and
+   // usable by the JIT.
+   llvm::InitializeNativeTarget();
+
+   llvm::InitializeNativeTargetAsmPrinter();
+
+   llvm::InitializeNativeTargetDisassembler();
+}
+
+/**
+ * The llvm target registry is not thread-safe, so drivers and state-trackers
+ * that want to initialize targets should use the gallivm_init_llvm_targets()
+ * function to safely initialize targets.
+ *
+ * LLVM targets should be initialized before the driver or state-tracker tries
+ * to access the registry.
+ */
+extern "C" void
+gallivm_init_llvm_targets(void)
+{
+   call_once(&init_native_targets_once_flag, init_native_targets);
+}
+
 extern "C" void
 lp_set_target_options(void)
 {
@@ -115,13 +144,7 @@ lp_set_target_options(void)
   llvm::DisablePrettyStackTrace = true;
 #endif

-   // If we have a native target, initialize it to ensure it is linked in and
-   // usable by the JIT.
-   llvm::InitializeNativeTarget();
-
-   llvm::InitializeNativeTargetAsmPrinter();
-
-   llvm::InitializeNativeTargetDisassembler();
+   gallivm_init_llvm_targets();
 }


@@ -474,20 +497,48 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
 #endif
   }

-   llvm::SmallVector<std::string, 1> MAttrs;
-   if (util_cpu_caps.has_avx) {
-      /*
-       * AVX feature is not automatically detected from CPUID by the X86 target
-       * yet, because the old (yet default) JIT engine is not capable of
-       * emitting the opcodes. On newer llvm versions it is and at least some
-       * versions (tested with 3.3) will emit avx opcodes without this anyway.
-       */
-      MAttrs.push_back("+avx");
-      if (util_cpu_caps.has_f16c) {
-         MAttrs.push_back("+f16c");
-      }
-      builder.setMAttrs(MAttrs);
-   }
+   llvm::SmallVector<std::string, 16> MAttrs;
+
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+   /*
+    * We need to unset attributes because sometimes LLVM mistakenly assumes
+    * certain features are present given the processor name.
+    *
+    * https://bugs.freedesktop.org/show_bug.cgi?id=92214
+    * http://llvm.org/PR25021
+    * http://llvm.org/PR19429
+    * http://llvm.org/PR16721
+    */
+   MAttrs.push_back(util_cpu_caps.has_sse    ? "+sse"    : "-sse"   );
+   MAttrs.push_back(util_cpu_caps.has_sse2   ? "+sse2"   : "-sse2"  );
+   MAttrs.push_back(util_cpu_caps.has_sse3   ? "+sse3"   : "-sse3"  );
+   MAttrs.push_back(util_cpu_caps.has_ssse3  ? "+ssse3"  : "-ssse3" );
+#if HAVE_LLVM >= 0x0304
+   MAttrs.push_back(util_cpu_caps.has_sse4_1 ? "+sse4.1" : "-sse4.1");
+#else
+   MAttrs.push_back(util_cpu_caps.has_sse4_1 ? "+sse41"  : "-sse41" );
+#endif
+#if HAVE_LLVM >= 0x0304
+   MAttrs.push_back(util_cpu_caps.has_sse4_2 ? "+sse4.2" : "-sse4.2");
+#else
+   MAttrs.push_back(util_cpu_caps.has_sse4_2 ? "+sse42"  : "-sse42" );
+#endif
+   /*
+    * AVX feature is not automatically detected from CPUID by the X86 target
+    * yet, because the old (yet default) JIT engine is not capable of
+    * emitting the opcodes. On newer llvm versions it is and at least some
+    * versions (tested with 3.3) will emit avx opcodes without this anyway.
+    */
+   MAttrs.push_back(util_cpu_caps.has_avx  ? "+avx"  : "-avx");
+   MAttrs.push_back(util_cpu_caps.has_f16c ? "+f16c" : "-f16c");
+   MAttrs.push_back(util_cpu_caps.has_avx2 ? "+avx2" : "-avx2");
+#endif
+
+#if defined(PIPE_ARCH_PPC)
+   MAttrs.push_back(util_cpu_caps.has_altivec ? "+altivec" : "-altivec");
+#endif
+
+   builder.setMAttrs(MAttrs);

 #if HAVE_LLVM >= 0x0305
   StringRef MCPU = llvm::sys::getHostCPUName();
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.h
@@ -41,6 +41,8 @@ extern "C" {

 struct lp_generated_code;

+extern void
+gallivm_init_llvm_targets(void);

 extern void
 lp_set_target_options(void);
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -463,6 +463,8 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param)
   case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
   case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
      return 0;
+   case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+      return 32;
   }
   /* if we get here, we missed a shader cap above (and should have seen
    * a compiler warning.)
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -1190,6 +1190,8 @@ static void blitter_draw(struct blitter_context_priv *ctx,

   u_upload_data(ctx->upload, 0, sizeof(ctx->vertices), ctx->vertices,
                 &vb.buffer_offset, &vb.buffer);
+   if (!vb.buffer)
+      return;
   u_upload_unmap(ctx->upload);

   pipe->set_vertex_buffers(pipe, ctx->base.vb_slot, 1, &vb);
@@ -2089,6 +2091,9 @@ void util_blitter_clear_buffer(struct blitter_context *blitter,

   u_upload_data(ctx->upload, 0, num_channels*4, clear_value,
                 &vb.buffer_offset, &vb.buffer);
+   if (!vb.buffer)
+      goto out;
+
   vb.stride = 0;

   blitter_set_running_flag(ctx);
@@ -2112,6 +2117,7 @@ void util_blitter_clear_buffer(struct blitter_context *blitter,

   util_draw_arrays(pipe, PIPE_PRIM_POINTS, 0, size / 4);

+out:
   blitter_restore_vertex_states(ctx);
   blitter_restore_render_cond(ctx);
   blitter_unset_running_flag(ctx);
--- a/src/gallium/auxiliary/util/u_vbuf.c
+++ b/src/gallium/auxiliary/util/u_vbuf.c
@@ -545,6 +545,7 @@ u_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr,

         index = ffs(unused_vb_mask) - 1;
         fallback_vbs[type] = index;
+         unused_vb_mask &= ~(1 << index);
         /*printf("found slot=%i for type=%i\n", index, type);*/
      }
   }
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -355,6 +355,10 @@ to be 0.
  are supported.
 * ``PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE``: Whether the driver doesn't
  ignore tgsi_declaration_range::Last for shader inputs and outputs.
+* ``PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT``: This is the maximum number
+  of iterations that loops are allowed to have to be unrolled. It is only
+  a hint to state trackers. Whether any loops will be unrolled is not
+  guaranteed.


 .. _pipe_compute_cap:
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -828,11 +828,7 @@ fd3_emit_restore(struct fd_context *ctx)
 	OUT_RING(ring, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(0) |
 			A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(0));

-	OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
-	OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(0));
-	OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(0) |
-			A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(INVALIDATE) |
-			A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE);
+	fd3_emit_cache_flush(ctx, ring);

 	OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
 	OUT_RING(ring, 0x00000000);                  /* GRAS_CL_CLIP_CNTL */
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
@@ -90,4 +90,15 @@ void fd3_emit_restore(struct fd_context *ctx);

 void fd3_emit_init(struct pipe_context *pctx);

+static inline void
+fd3_emit_cache_flush(struct fd_context *ctx, struct fd_ringbuffer *ring)
+{
+	fd_wfi(ctx, ring);
+	OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
+	OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(0));
+	OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(0) |
+			A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(INVALIDATE) |
+			A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE);
+}
+
 #endif /* FD3_EMIT_H */
--- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
@@ -558,6 +558,8 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
 	OUT_RING(ring, fui(x1));
 	OUT_RING(ring, fui(y1));

+	fd3_emit_cache_flush(ctx, ring);
+
 	for (i = 0; i < 4; i++) {
 		OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
 		OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) |
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -407,6 +407,8 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
 		return 16;
 	case PIPE_SHADER_CAP_PREFERRED_IR:
 		return PIPE_SHADER_IR_TGSI;
+	case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+		return 32;
 	}
 	debug_printf("unknown shader param %d\n", param);
 	return 0;
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -167,6 +167,8 @@ i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_sha
      case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
      case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
         return 0;
+      case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+         return 32;
      default:
         debug_printf("%s: Unknown cap %u.\n", __FUNCTION__, cap);
         return 0;
--- a/src/gallium/drivers/ilo/ilo_screen.c
+++ b/src/gallium/drivers/ilo/ilo_screen.c
@@ -138,6 +138,8 @@ ilo_get_shader_param(struct pipe_screen *screen, unsigned shader,
      return PIPE_SHADER_IR_TGSI;
   case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
      return 1;
+   case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+      return 32;

   default:
      return 0;
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
@@ -746,7 +746,12 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,

   pos_init(bld, x0, y0);

-   if (coeff_type.length > 4) {
+   /*
+    * Simple method (single step interpolation) may be slower if vector length
+    * is just 4, but the results are different (generally less accurate) with
+    * the other method, so always use more accurate version.
+    */
+   if (1) {
      bld->simple_interp = TRUE;
      {
         /* XXX this should use a global static table */
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -25,10 +25,24 @@

 #include <stack>
 #include <limits>
+#if __cplusplus >= 201103L
+#include <unordered_map>
+#else
 #include <tr1/unordered_map>
+#endif

 namespace nv50_ir {

+#if __cplusplus >= 201103L
+using std::hash;
+using std::unordered_map;
+#elif !defined(ANDROID)
+using std::tr1::hash;
+using std::tr1::unordered_map;
+#else
+#error Android release before Lollipop is not supported!
+#endif
+
 #define MAX_REGISTER_FILE_SIZE 256

 class RegisterSet
@@ -349,12 +363,12 @@ RegAlloc::PhiMovesPass::needNewElseBlock(BasicBlock *b, BasicBlock *p)

 struct PhiMapHash {
   size_t operator()(const std::pair<Instruction *, BasicBlock *>& val) const {
-      return std::tr1::hash<Instruction*>()(val.first) * 31 +
-         std::tr1::hash<BasicBlock*>()(val.second);
+      return hash<Instruction*>()(val.first) * 31 +
+         hash<BasicBlock*>()(val.second);
   }
 };

-typedef std::tr1::unordered_map<
+typedef unordered_map<
   std::pair<Instruction *, BasicBlock *>, Value *, PhiMapHash> PhiMap;

 // Critical edges need to be split up so that work can be inserted along
--- a/src/gallium/drivers/nouveau/nouveau_buffer.c
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
@@ -80,7 +80,12 @@ release_allocation(struct nouveau_mm_allocation **mm,
 inline void
 nouveau_buffer_release_gpu_storage(struct nv04_resource *buf)
 {
-   nouveau_bo_ref(NULL, &buf->bo);
+   if (buf->fence && buf->fence->state < NOUVEAU_FENCE_STATE_FLUSHED) {
+      nouveau_fence_work(buf->fence, nouveau_fence_unref_bo, buf->bo);
+      buf->bo = NULL;
+   } else {
+      nouveau_bo_ref(NULL, &buf->bo);
+   }

   if (buf->mm)
      release_allocation(&buf->mm, buf->fence);
@@ -281,7 +286,8 @@ nouveau_buffer_transfer_del(struct nouveau_context *nv,
 {
   if (tx->map) {
      if (likely(tx->bo)) {
-         nouveau_bo_ref(NULL, &tx->bo);
+         nouveau_fence_work(nv->screen->fence.current,
+                            nouveau_fence_unref_bo, tx->bo);
         if (tx->mm)
            release_allocation(&tx->mm, nv->screen->fence.current);
      } else {
@@ -782,7 +788,7 @@ nouveau_buffer_migrate(struct nouveau_context *nv,
      nv->copy_data(nv, buf->bo, buf->offset, new_domain,
                    bo, offset, old_domain, buf->base.width0);

-      nouveau_bo_ref(NULL, &bo);
+      nouveau_fence_work(screen->fence.current, nouveau_fence_unref_bo, bo);
      if (mm)
         release_allocation(&mm, screen->fence.current);
   } else
--- a/src/gallium/drivers/nouveau/nouveau_fence.c
+++ b/src/gallium/drivers/nouveau/nouveau_fence.c
@@ -190,8 +190,14 @@ nouveau_fence_wait(struct nouveau_fence *fence)
   /* wtf, someone is waiting on a fence in flush_notify handler? */
   assert(fence->state != NOUVEAU_FENCE_STATE_EMITTING);

-   if (fence->state < NOUVEAU_FENCE_STATE_EMITTED)
-      nouveau_fence_emit(fence);
+   if (fence->state < NOUVEAU_FENCE_STATE_EMITTED) {
+      PUSH_SPACE(screen->pushbuf, 8);
+      /* The space allocation might trigger a flush, which could emit the
+       * current fence. So check again.
+       */
+      if (fence->state < NOUVEAU_FENCE_STATE_EMITTED)
+         nouveau_fence_emit(fence);
+   }

   if (fence->state < NOUVEAU_FENCE_STATE_FLUSHED)
      if (nouveau_pushbuf_kick(screen->pushbuf, screen->pushbuf->channel))
@@ -224,10 +230,22 @@ nouveau_fence_wait(struct nouveau_fence *fence)
 void
 nouveau_fence_next(struct nouveau_screen *screen)
 {
-   if (screen->fence.current->state < NOUVEAU_FENCE_STATE_EMITTING)
-      nouveau_fence_emit(screen->fence.current);
+   if (screen->fence.current->state < NOUVEAU_FENCE_STATE_EMITTING) {
+      if (screen->fence.current->ref > 1)
+         nouveau_fence_emit(screen->fence.current);
+      else
+         return;
+   }

   nouveau_fence_ref(NULL, &screen->fence.current);

   nouveau_fence_new(screen, &screen->fence.current, false);
 }
+
+void
+nouveau_fence_unref_bo(void *data)
+{
+   struct nouveau_bo *bo = data;
+
+   nouveau_bo_ref(NULL, &bo);
+}
--- a/src/gallium/drivers/nouveau/nouveau_fence.h
+++ b/src/gallium/drivers/nouveau/nouveau_fence.h
@@ -37,6 +37,9 @@ void nouveau_fence_next(struct nouveau_screen *);
 bool nouveau_fence_wait(struct nouveau_fence *);
 bool nouveau_fence_signalled(struct nouveau_fence *);

+void nouveau_fence_unref_bo(void *data); /* generic unref bo callback */
+
+
 static inline void
 nouveau_fence_ref(struct nouveau_fence *fence, struct nouveau_fence **ref)
 {
--- a/src/gallium/drivers/nouveau/nouveau_vp3_video.c
+++ b/src/gallium/drivers/nouveau/nouveau_vp3_video.c
@@ -437,6 +437,7 @@ nouveau_vp3_screen_get_video_param(struct pipe_screen *pscreen,
      /* VP3 does not support MPEG4, VP4+ do. */
      return entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM &&
         profile >= PIPE_VIDEO_PROFILE_MPEG1 &&
+         profile < PIPE_VIDEO_PROFILE_HEVC_MAIN &&
         (!vp3 || codec != PIPE_VIDEO_FORMAT_MPEG4) &&
         firmware_present(pscreen, profile);
   case PIPE_VIDEO_CAP_NPOT_TEXTURES:
--- a/src/gallium/drivers/nouveau/nouveau_winsys.h
+++ b/src/gallium/drivers/nouveau/nouveau_winsys.h
@@ -24,6 +24,8 @@ PUSH_AVAIL(struct nouveau_pushbuf *push)
 static inline bool
 PUSH_SPACE(struct nouveau_pushbuf *push, uint32_t size)
 {
+   /* Provide a buffer so that fences always have room to be emitted */
+   size += 8;
   if (PUSH_AVAIL(push) < size)
      return nouveau_pushbuf_space(push, size, 0, 0) == 0;
   return true;
--- a/src/gallium/drivers/nouveau/nv30/nv30_format.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_format.c
@@ -78,12 +78,12 @@ nv30_format_info_table[PIPE_FORMAT_COUNT] = {
   _(B4G4R4X4_UNORM      , S___),
   _(B4G4R4A4_UNORM      , S___),
   _(B5G6R5_UNORM        , SB__),
-   _(B8G8R8X8_UNORM      , SB__),
-   _(B8G8R8X8_SRGB       , S___),
-   _(B8G8R8A8_UNORM      , SB__),
-   _(B8G8R8A8_SRGB       , S___),
+   _(BGRX8888_UNORM      , SB__),
+   _(BGRX8888_SRGB       , S___),
+   _(BGRA8888_UNORM      , SB__),
+   _(BGRA8888_SRGB       , S___),
   _(R8G8B8A8_UNORM      , __V_),
-   _(R8G8B8A8_SNORM      , S___),
+   _(RGBA8888_SNORM      , S___),
   _(DXT1_RGB            , S___),
   _(DXT1_SRGB           , S___),
   _(DXT1_RGBA           , S___),
@@ -138,8 +138,8 @@ const struct nv30_format
 nv30_format_table[PIPE_FORMAT_COUNT] = {
   R_(B5G5R5X1_UNORM    , X1R5G5B5          ),
   R_(B5G6R5_UNORM      , R5G6B5            ),
-   R_(B8G8R8X8_UNORM    , X8R8G8B8          ),
-   R_(B8G8R8A8_UNORM    , A8R8G8B8          ),
+   R_(BGRX8888_UNORM    , X8R8G8B8          ),
+   R_(BGRA8888_UNORM    , A8R8G8B8          ),
   Z_(Z16_UNORM         , Z16               ),
   Z_(X8Z24_UNORM       , Z24S8             ),
   Z_(S8_UINT_Z24_UNORM , Z24S8             ),
@@ -223,11 +223,11 @@ nv30_texfmt_table[PIPE_FORMAT_COUNT] = {
   _(B4G4R4X4_UNORM    , A4R4G4B4, 0, C, C, C, 1, 2, 1, 0, x, NONE, ____),
   _(B4G4R4A4_UNORM    , A4R4G4B4, 0, C, C, C, C, 2, 1, 0, 3, NONE, ____),
   _(B5G6R5_UNORM      , R5G6B5  , 0, C, C, C, 1, 2, 1, 0, x, NONE, ____),
-   _(B8G8R8X8_UNORM    , A8R8G8B8, 0, C, C, C, 1, 2, 1, 0, x, NONE, ____),
-   _(B8G8R8X8_SRGB     , A8R8G8B8, 0, C, C, C, 1, 2, 1, 0, x, SRGB, ____),
-   _(B8G8R8A8_UNORM    , A8R8G8B8, 0, C, C, C, C, 2, 1, 0, 3, NONE, ____),
-   _(B8G8R8A8_SRGB     , A8R8G8B8, 0, C, C, C, C, 2, 1, 0, 3, SRGB, ____),
-   _(R8G8B8A8_SNORM    , A8R8G8B8, 0, C, C, C, C, 0, 1, 2, 3, NONE, SSSS),
+   _(BGRX8888_UNORM    , A8R8G8B8, 0, C, C, C, 1, 2, 1, 0, x, NONE, ____),
+   _(BGRX8888_SRGB     , A8R8G8B8, 0, C, C, C, 1, 2, 1, 0, x, SRGB, ____),
+   _(BGRA8888_UNORM    , A8R8G8B8, 0, C, C, C, C, 2, 1, 0, 3, NONE, ____),
+   _(BGRA8888_SRGB     , A8R8G8B8, 0, C, C, C, C, 2, 1, 0, 3, SRGB, ____),
+   _(RGBA8888_SNORM    , A8R8G8B8, 0, C, C, C, C, 0, 1, 2, 3, NONE, SSSS),
   _(DXT1_RGB          , DXT1    , 0, C, C, C, 1, 2, 1, 0, x, NONE, ____),
   _(DXT1_SRGB         , DXT1    , 0, C, C, C, 1, 2, 1, 0, x, SRGB, ____),
   _(DXT1_RGBA         , DXT1    , 0, C, C, C, C, 2, 1, 0, 3, NONE, ____),
--- a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
@@ -339,10 +339,15 @@ nv30_miptree_transfer_unmap(struct pipe_context *pipe,
   struct nv30_context *nv30 = nv30_context(pipe);
   struct nv30_transfer *tx = nv30_transfer(ptx);

-   if (ptx->usage & PIPE_TRANSFER_WRITE)
+   if (ptx->usage & PIPE_TRANSFER_WRITE) {
      nv30_transfer_rect(nv30, NEAREST, &tx->tmp, &tx->img);

-   nouveau_bo_ref(NULL, &tx->tmp.bo);
+      /* Allow the copies above to finish executing before freeing the source */
+      nouveau_fence_work(nv30->screen->base.fence.current,
+                         nouveau_fence_unref_bo, tx->tmp.bo);
+   } else {
+      nouveau_bo_ref(NULL, &tx->tmp.bo);
+   }
   pipe_resource_reference(&ptx->resource, NULL);
   FREE(tx);
 }
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -261,6 +261,8 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
      case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
      case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
         return 0;
+      case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+         return 32;
      default:
         debug_printf("unknown vertex shader param %d\n", param);
         return 0;
@@ -302,6 +304,8 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
      case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
      case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
         return 0;
+      case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+         return 32;
      default:
         debug_printf("unknown fragment shader param %d\n", param);
         return 0;
@@ -345,7 +349,9 @@ nv30_screen_fence_emit(struct pipe_screen *pscreen, uint32_t *sequence)

   *sequence = ++screen->base.fence.sequence;

-   BEGIN_NV04(push, NV30_3D(FENCE_OFFSET), 2);
+   assert(PUSH_AVAIL(push) + push->rsvd_kick >= 3);
+   PUSH_DATA (push, NV30_3D_FENCE_OFFSET |
+              (2 /* size */ << 18) | (7 /* subchan */ << 13));
   PUSH_DATA (push, 0);
   PUSH_DATA (push, *sequence);
 }
--- a/src/gallium/drivers/nouveau/nv30/nv30_vbo.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_vbo.c
@@ -191,7 +191,11 @@ nv30_vbo_validate(struct nv30_context *nv30)
   if (!nv30->vertex || nv30->draw_flags)
      return;

+#ifdef PIPE_ARCH_BIG_ENDIAN
+   if (1) { /* Figure out where the buffers are getting messed up */
+#else
   if (unlikely(vertex->need_conversion)) {
+#endif
      nv30->vbo_fifo = ~0;
      nv30->vbo_user = 0;
   } else {
--- a/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c
+++ b/src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c
@@ -1,3 +1,4 @@
+#include <strings.h>
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_state.h"
--- a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
@@ -163,7 +163,10 @@ nv50_miptree_destroy(struct pipe_screen *pscreen, struct pipe_resource *pt)
 {
   struct nv50_miptree *mt = nv50_miptree(pt);

-   nouveau_bo_ref(NULL, &mt->base.bo);
+   if (mt->base.fence && mt->base.fence->state < NOUVEAU_FENCE_STATE_FLUSHED)
+      nouveau_fence_work(mt->base.fence, nouveau_fence_unref_bo, mt->base.bo);
+   else
+      nouveau_bo_ref(NULL, &mt->base.bo);

   nouveau_fence_ref(NULL, &mt->base.fence);
   nouveau_fence_ref(NULL, &mt->base.fence_wr);
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -297,6 +297,8 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
   case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
   case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
      return 0;
+   case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+      return 32;
   default:
      NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
      return 0;
@@ -386,6 +388,7 @@ nv50_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence)
   /* we need to do it after possible flush in MARK_RING */
   *sequence = ++screen->base.fence.sequence;

+   assert(PUSH_AVAIL(push) + push->rsvd_kick >= 5);
   PUSH_DATA (push, NV50_FIFO_PKHDR(NV50_3D(QUERY_ADDRESS_HIGH), 4));
   PUSH_DATAh(push, screen->fence.bo->offset);
   PUSH_DATA (push, screen->fence.bo->offset);
--- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
@@ -65,14 +65,9 @@ nv50_constbufs_validate(struct nv50_context *nv50)
               PUSH_DATA (push, (b << 12) | (i << 8) | p | 1);
            }
            while (words) {
-               unsigned nr;
-
-               if (!PUSH_SPACE(push, 16))
-                  break;
-               nr = PUSH_AVAIL(push);
-               assert(nr >= 16);
-               nr = MIN2(MIN2(nr - 3, words), NV04_PFIFO_MAX_PACKET_LEN);
+               unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN);

+               PUSH_SPACE(push, nr + 3);
               BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
               PUSH_DATA (push, (start << 8) | b);
               BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nr);
--- a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
@@ -187,14 +187,7 @@ nv50_sifc_linear_u8(struct nouveau_context *nv,
   PUSH_DATA (push, 0);

   while (count) {
-      unsigned nr;
-
-      if (!PUSH_SPACE(push, 16))
-         break;
-      nr = PUSH_AVAIL(push);
-      assert(nr >= 16);
-      nr = MIN2(count, nr - 1);
-      nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN);
+      unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN);

      BEGIN_NI04(push, NV50_2D(SIFC_DATA), nr);
      PUSH_DATAp(push, src, nr);
@@ -365,9 +358,14 @@ nv50_miptree_transfer_unmap(struct pipe_context *pctx,
            tx->rect[0].base += mt->layer_stride;
         tx->rect[1].base += tx->nblocksy * tx->base.stride;
      }
+
+      /* Allow the copies above to finish executing before freeing the source */
+      nouveau_fence_work(nv50->screen->base.fence.current,
+                         nouveau_fence_unref_bo, tx->rect[1].bo);
+   } else {
+      nouveau_bo_ref(NULL, &tx->rect[1].bo);
   }

-   nouveau_bo_ref(NULL, &tx->rect[1].bo);
   pipe_resource_reference(&transfer->resource, NULL);

   FREE(tx);
@@ -390,12 +388,9 @@ nv50_cb_push(struct nouveau_context *nv,
   nouveau_pushbuf_validate(push);

   while (words) {
-      unsigned nr;
-
-      nr = PUSH_AVAIL(push);
-      nr = MIN2(nr - 7, words);
-      nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN - 1);
+      unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN);

+      PUSH_SPACE(push, nr + 7);
      BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
      PUSH_DATAh(push, bo->offset + base);
      PUSH_DATA (push, bo->offset + base);
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c
@@ -26,7 +26,8 @@ nvc0_resource_from_handle(struct pipe_screen * screen,
   } else {
      struct pipe_resource *res = nv50_miptree_from_handle(screen,
                                                           templ, whandle);
-      nv04_resource(res)->vtbl = &nvc0_miptree_vtbl;
+      if (res)
+         nv04_resource(res)->vtbl = &nvc0_miptree_vtbl;
      return res;
   }
 }
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -310,6 +310,8 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
      return 16; /* would be 32 in linked (OpenGL-style) mode */
   case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
      return 16; /* XXX not sure if more are really safe */
+   case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+      return 32;
   default:
      NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
      return 0;
@@ -535,7 +537,8 @@ nvc0_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence)
   /* we need to do it after possible flush in MARK_RING */
   *sequence = ++screen->base.fence.sequence;

-   BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 4);
+   assert(PUSH_AVAIL(push) + push->rsvd_kick >= 5);
+   PUSH_DATA (push, NVC0_FIFO_PKHDR_SQ(NVC0_3D(QUERY_ADDRESS_HIGH), 4));
   PUSH_DATAh(push, screen->fence.bo->offset);
   PUSH_DATA (push, screen->fence.bo->offset);
   PUSH_DATA (push, *sequence);
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
@@ -188,14 +188,10 @@ nvc0_m2mf_push_linear(struct nouveau_context *nv,
   nouveau_pushbuf_validate(push);

   while (count) {
-      unsigned nr;
+      unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN);

-      if (!PUSH_SPACE(push, 16))
+      if (!PUSH_SPACE(push, nr + 9))
         break;
-      nr = PUSH_AVAIL(push);
-      assert(nr >= 16);
-      nr = MIN2(count, nr - 9);
-      nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN);

      BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2);
      PUSH_DATAh(push, dst->offset + offset);
@@ -234,14 +230,10 @@ nve4_p2mf_push_linear(struct nouveau_context *nv,
   nouveau_pushbuf_validate(push);

   while (count) {
-      unsigned nr;
+      unsigned nr = MIN2(count, (NV04_PFIFO_MAX_PACKET_LEN - 1));

-      if (!PUSH_SPACE(push, 16))
+      if (!PUSH_SPACE(push, nr + 10))
         break;
-      nr = PUSH_AVAIL(push);
-      assert(nr >= 16);
-      nr = MIN2(count, nr - 8);
-      nr = MIN2(nr, (NV04_PFIFO_MAX_PACKET_LEN - 1));

      BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_DST_ADDRESS_HIGH), 2);
      PUSH_DATAh(push, dst->offset + offset);
@@ -495,11 +487,16 @@ nvc0_miptree_transfer_unmap(struct pipe_context *pctx,
         tx->rect[1].base += tx->nblocksy * tx->base.stride;
      }
      NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_transfers_wr, 1);
+
+      /* Allow the copies above to finish executing before freeing the source */
+      nouveau_fence_work(nvc0->screen->base.fence.current,
+                         nouveau_fence_unref_bo, tx->rect[1].bo);
+   } else {
+      nouveau_bo_ref(NULL, &tx->rect[1].bo);
   }
   if (tx->base.usage & PIPE_TRANSFER_READ)
      NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_transfers_rd, 1);

-   nouveau_bo_ref(NULL, &tx->rect[1].bo);
   pipe_resource_reference(&transfer->resource, NULL);

   FREE(tx);
@@ -566,9 +563,7 @@ nvc0_cb_bo_push(struct nouveau_context *nv,
   PUSH_DATA (push, bo->offset + base);

   while (words) {
-      unsigned nr = PUSH_AVAIL(push);
-      nr = MIN2(nr, words);
-      nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN - 1);
+      unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN - 1);

      PUSH_SPACE(push, nr + 2);
      PUSH_REFN (push, bo, NOUVEAU_BO_WR | domain);
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
@@ -27,6 +27,7 @@ struct push_context {
   struct {
      bool enabled;
      bool value;
+      uint8_t width;
      unsigned stride;
      const uint8_t *data;
   } edgeflag;
@@ -53,6 +54,7 @@ nvc0_push_context_init(struct nvc0_context *nvc0, struct push_context *ctx)
   /* silence warnings */
   ctx->edgeflag.data = NULL;
   ctx->edgeflag.stride = 0;
+   ctx->edgeflag.width = 0;
 }

 static inline void
@@ -100,6 +102,7 @@ nvc0_push_map_edgeflag(struct push_context *ctx, struct nvc0_context *nvc0,
   struct nv04_resource *buf = nv04_resource(vb->buffer);

   ctx->edgeflag.stride = vb->stride;
+   ctx->edgeflag.width = util_format_get_blocksize(ve->src_format);
   if (buf) {
      unsigned offset = vb->buffer_offset + ve->src_offset;
      ctx->edgeflag.data = nouveau_resource_map_offset(&nvc0->base,
@@ -137,10 +140,17 @@ prim_restart_search_i32(const uint32_t *elts, unsigned push, uint32_t index)
 }

 static inline bool
-ef_value(const struct push_context *ctx, uint32_t index)
+ef_value_8(const struct push_context *ctx, uint32_t index)
 {
-   float *pf = (float *)&ctx->edgeflag.data[index * ctx->edgeflag.stride];
-   return *pf ? true : false;
+   uint8_t *pf = (uint8_t *)&ctx->edgeflag.data[index * ctx->edgeflag.stride];
+   return !!*pf;
+}
+
+static inline bool
+ef_value_32(const struct push_context *ctx, uint32_t index)
+{
+   uint32_t *pf = (uint32_t *)&ctx->edgeflag.data[index * ctx->edgeflag.stride];
+   return !!*pf;
 }

 static inline bool
@@ -154,7 +164,11 @@ static inline unsigned
 ef_toggle_search_i08(struct push_context *ctx, const uint8_t *elts, unsigned n)
 {
   unsigned i;
-   for (i = 0; i < n && ef_value(ctx, elts[i]) == ctx->edgeflag.value; ++i);
+   bool ef = ctx->edgeflag.value;
+   if (ctx->edgeflag.width == 1)
+      for (i = 0; i < n && ef_value_8(ctx, elts[i]) == ef; ++i);
+   else
+      for (i = 0; i < n && ef_value_32(ctx, elts[i]) == ef; ++i);
   return i;
 }

@@ -162,7 +176,11 @@ static inline unsigned
 ef_toggle_search_i16(struct push_context *ctx, const uint16_t *elts, unsigned n)
 {
   unsigned i;
-   for (i = 0; i < n && ef_value(ctx, elts[i]) == ctx->edgeflag.value; ++i);
+   bool ef = ctx->edgeflag.value;
+   if (ctx->edgeflag.width == 1)
+      for (i = 0; i < n && ef_value_8(ctx, elts[i]) == ef; ++i);
+   else
+      for (i = 0; i < n && ef_value_32(ctx, elts[i]) == ef; ++i);
   return i;
 }

@@ -170,7 +188,11 @@ static inline unsigned
 ef_toggle_search_i32(struct push_context *ctx, const uint32_t *elts, unsigned n)
 {
   unsigned i;
-   for (i = 0; i < n && ef_value(ctx, elts[i]) == ctx->edgeflag.value; ++i);
+   bool ef = ctx->edgeflag.value;
+   if (ctx->edgeflag.width == 1)
+      for (i = 0; i < n && ef_value_8(ctx, elts[i]) == ef; ++i);
+   else
+      for (i = 0; i < n && ef_value_32(ctx, elts[i]) == ef; ++i);
   return i;
 }

@@ -178,7 +200,11 @@ static inline unsigned
 ef_toggle_search_seq(struct push_context *ctx, unsigned start, unsigned n)
 {
   unsigned i;
-   for (i = 0; i < n && ef_value(ctx, start++) == ctx->edgeflag.value; ++i);
+   bool ef = ctx->edgeflag.value;
+   if (ctx->edgeflag.width == 1)
+      for (i = 0; i < n && ef_value_8(ctx, start++) == ef; ++i);
+   else
+      for (i = 0; i < n && ef_value_32(ctx, start++) == ef; ++i);
   return i;
 }

--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -300,6 +300,8 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e
        case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
        case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
            return 0;
+        case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+            return 32;
        case PIPE_SHADER_CAP_PREFERRED_IR:
            return PIPE_SHADER_IR_TGSI;
        }
@@ -356,6 +358,8 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e
        case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
        case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
            return 0;
+        case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+            return 32;
        case PIPE_SHADER_CAP_PREFERRED_IR:
            return PIPE_SHADER_IR_TGSI;
        }
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -2342,6 +2342,8 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx)

 	r600_store_context_reg(cb, R_028848_SQ_PGM_RESOURCES_2_PS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
 	r600_store_context_reg(cb, R_028864_SQ_PGM_RESOURCES_2_VS, S_028864_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
+	r600_store_context_reg(cb, R_02887C_SQ_PGM_RESOURCES_2_GS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
+	r600_store_context_reg(cb, R_028894_SQ_PGM_RESOURCES_2_ES, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
 	r600_store_context_reg(cb, R_0288A8_SQ_PGM_RESOURCES_FS, 0);

 	/* to avoid GPU doing any preloading of constant from random address */
@@ -2781,6 +2783,8 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)

 	r600_store_context_reg(cb, R_028848_SQ_PGM_RESOURCES_2_PS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
 	r600_store_context_reg(cb, R_028864_SQ_PGM_RESOURCES_2_VS, S_028864_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
+	r600_store_context_reg(cb, R_02887C_SQ_PGM_RESOURCES_2_GS, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
+	r600_store_context_reg(cb, R_028894_SQ_PGM_RESOURCES_2_ES, S_028848_SINGLE_ROUND(V_SQ_ROUND_NEAREST_EVEN));
 	r600_store_context_reg(cb, R_0288A8_SQ_PGM_RESOURCES_FS, 0);

 	/* to avoid GPU doing any preloading of constant from random address */
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -1497,6 +1497,7 @@
 #define   S_028878_UNCACHED_FIRST_INST(x)              (((x) & 0x1) << 28)
 #define   G_028878_UNCACHED_FIRST_INST(x)              (((x) >> 28) & 0x1)
 #define   C_028878_UNCACHED_FIRST_INST                 0xEFFFFFFF
+#define R_02887C_SQ_PGM_RESOURCES_2_GS                 0x02887C

 #define R_028890_SQ_PGM_RESOURCES_ES                 0x028890
 #define   S_028890_NUM_GPRS(x)                         (((x) & 0xFF) << 0)
@@ -1511,6 +1512,7 @@
 #define   S_028890_UNCACHED_FIRST_INST(x)              (((x) & 0x1) << 28)
 #define   G_028890_UNCACHED_FIRST_INST(x)              (((x) >> 28) & 0x1)
 #define   C_028890_UNCACHED_FIRST_INST                 0xEFFFFFFF
+#define R_028894_SQ_PGM_RESOURCES_2_ES                 0x028894

 #define R_028864_SQ_PGM_RESOURCES_2_VS               0x028864
 #define   S_028864_SINGLE_ROUND(x)                     (((x) & 0x3) << 0)
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -621,7 +621,7 @@ static int replace_gpr_with_pv_ps(struct r600_bytecode *bc,
 	return 0;
 }

-void r600_bytecode_special_constants(uint32_t value, unsigned *sel, unsigned *neg)
+void r600_bytecode_special_constants(uint32_t value, unsigned *sel, unsigned *neg, unsigned abs)
 {
 	switch(value) {
 	case 0:
@@ -641,11 +641,11 @@ void r600_bytecode_special_constants(uint32_t value, unsigned *sel, unsigned *ne
 		break;
 	case 0xBF800000: /* -1.0f */
 		*sel = V_SQ_ALU_SRC_1;
-		*neg ^= 1;
+		*neg ^= !abs;
 		break;
 	case 0xBF000000: /* -0.5f */
 		*sel = V_SQ_ALU_SRC_0_5;
-		*neg ^= 1;
+		*neg ^= !abs;
 		break;
 	default:
 		*sel = V_SQ_ALU_SRC_LITERAL;
@@ -1194,7 +1194,7 @@ int r600_bytecode_add_alu_type(struct r600_bytecode *bc,
 		}
 		if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL)
 			r600_bytecode_special_constants(nalu->src[i].value,
-				&nalu->src[i].sel, &nalu->src[i].neg);
+				&nalu->src[i].sel, &nalu->src[i].neg, nalu->src[i].abs);
 	}
 	if (nalu->dst.sel >= bc->ngpr) {
 		bc->ngpr = nalu->dst.sel + 1;
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -254,7 +254,7 @@ int r600_bytecode_add_cfinst(struct r600_bytecode *bc,
 int r600_bytecode_add_alu_type(struct r600_bytecode *bc,
 		const struct r600_bytecode_alu *alu, unsigned type);
 void r600_bytecode_special_constants(uint32_t value,
-		unsigned *sel, unsigned *neg);
+		unsigned *sel, unsigned *neg, unsigned abs);
 void r600_bytecode_disasm(struct r600_bytecode *bc);
 void r600_bytecode_alu_read(struct r600_bytecode *bc,
 		struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1);
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -504,6 +504,12 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
 	case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
 	case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
 		return 0;
+	case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+		/* due to a bug in the shader compiler, some loops hang
+		 * if they are not unrolled, see:
+		 *    https://bugs.freedesktop.org/show_bug.cgi?id=86720
+		 */
+		return 255;
 	}
 	return 0;
 }
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -1004,7 +1004,7 @@ static void tgsi_src(struct r600_shader_ctx *ctx,
 			(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {

 			index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
-			r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
+			r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg, r600_src->abs);
 			if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
 				return;
 		}
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -305,12 +305,11 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
 				data += box->x % R600_MAP_BUFFER_ALIGNMENT;
 				return r600_buffer_get_transfer(ctx, resource, level, usage, box,
 								ptransfer, data, staging, offset);
-			} else {
-				return NULL; /* error, shouldn't occur though */
 			}
+		} else {
+			/* At this point, the buffer is always idle (we checked it above). */
+			usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
 		}
-		/* At this point, the buffer is always idle (we checked it above). */
-		usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
 	}
 	/* Using a staging buffer in GTT for larger reads is much faster. */
 	else if ((usage & PIPE_TRANSFER_READ) &&
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -78,6 +78,9 @@ void r600_draw_rectangle(struct blitter_context *blitter,
 	 * I guess the 4th one is derived from the first 3.
 	 * The vertex specification should match u_blitter's vertex element state. */
 	u_upload_alloc(rctx->uploader, 0, sizeof(float) * 24, &offset, &buf, (void**)&vb);
+	if (!buf)
+		return;
+
 	vb[0] = x1;
 	vb[1] = y1;
 	vb[2] = depth;
@@ -412,6 +415,7 @@ static const char* r600_get_chip_name(struct r600_common_screen *rscreen)
 	case CHIP_ICELAND: return "AMD ICELAND";
 	case CHIP_CARRIZO: return "AMD CARRIZO";
 	case CHIP_FIJI: return "AMD FIJI";
+	case CHIP_STONEY: return "AMD STONEY";
 	default: return "AMD unknown";
 	}
 }
@@ -540,6 +544,11 @@ const char *r600_get_llvm_processor_name(enum radeon_family family)
 	case CHIP_ICELAND: return "iceland";
 	case CHIP_CARRIZO: return "carrizo";
 	case CHIP_FIJI: return "fiji";
+#if HAVE_LLVM <= 0x0307
+	case CHIP_STONEY: return "carrizo";
+#else
+	case CHIP_STONEY: return "stoney";
+#endif
 	default: return "";
 	}
 }
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -989,6 +989,11 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,

 			if (usage & PIPE_TRANSFER_READ) {
 				struct pipe_resource *temp = ctx->screen->resource_create(ctx->screen, &resource);
+				if (!temp) {
+					R600_ERR("failed to create a temporary depth texture\n");
+					FREE(trans);
+					return NULL;
+				}

 				r600_copy_region_with_blit(ctx, temp, 0, 0, 0, 0, texture, level, box);
 				rctx->blit_decompress_depth(ctx, (struct r600_texture*)temp, staging_depth,
--- a/src/gallium/drivers/radeon/radeon_llvm_emit.c
+++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c
@@ -25,6 +25,8 @@
 */
 #include "radeon_llvm_emit.h"
 #include "radeon_elf_util.h"
+#include "c11/threads.h"
+#include "gallivm/lp_bld_misc.h"
 #include "util/u_memory.h"
 #include "pipe/p_shader_tokens.h"

@@ -86,30 +88,29 @@ void radeon_llvm_shader_type(LLVMValueRef F, unsigned type)

 static void init_r600_target()
 {
-	static unsigned initialized = 0;
-	if (!initialized) {
+	gallivm_init_llvm_targets();
 #if HAVE_LLVM < 0x0307
-		LLVMInitializeR600TargetInfo();
-		LLVMInitializeR600Target();
-		LLVMInitializeR600TargetMC();
-		LLVMInitializeR600AsmPrinter();
+	LLVMInitializeR600TargetInfo();
+	LLVMInitializeR600Target();
+	LLVMInitializeR600TargetMC();
+	LLVMInitializeR600AsmPrinter();
 #else
-		LLVMInitializeAMDGPUTargetInfo();
-		LLVMInitializeAMDGPUTarget();
-		LLVMInitializeAMDGPUTargetMC();
-		LLVMInitializeAMDGPUAsmPrinter();
+	LLVMInitializeAMDGPUTargetInfo();
+	LLVMInitializeAMDGPUTarget();
+	LLVMInitializeAMDGPUTargetMC();
+	LLVMInitializeAMDGPUAsmPrinter();

 #endif
-		initialized = 1;
-	}
 }

+static once_flag init_r600_target_once_flag = ONCE_FLAG_INIT;
+
 LLVMTargetRef radeon_llvm_get_r600_target(const char *triple)
 {
 	LLVMTargetRef target = NULL;
 	char *err_message = NULL;

-	init_r600_target();
+	call_once(&init_r600_target_once_flag, init_r600_target);

 	if (LLVMGetTargetFromTriple(triple, &target, &err_message)) {
 		fprintf(stderr, "Cannot find target for triple %s ", triple);
--- a/src/gallium/drivers/radeon/radeon_uvd.c
+++ b/src/gallium/drivers/radeon/radeon_uvd.c
@@ -940,6 +940,12 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder,
 	dec->msg->body.decode.width_in_samples = dec->base.width;
 	dec->msg->body.decode.height_in_samples = dec->base.height;

+	if ((picture->profile == PIPE_VIDEO_PROFILE_VC1_SIMPLE) ||
+	    (picture->profile == PIPE_VIDEO_PROFILE_VC1_MAIN)) {
+		dec->msg->body.decode.width_in_samples = align(dec->msg->body.decode.width_in_samples, 16) / 16;
+		dec->msg->body.decode.height_in_samples = align(dec->msg->body.decode.height_in_samples, 16) / 16;
+	}
+
 	dec->msg->body.decode.dpb_size = dec->dpb.res->buf->size;
 	dec->msg->body.decode.bsd_size = bs_size;
 	dec->msg->body.decode.db_pitch = dec->base.width;
--- a/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
+++ b/src/gallium/drivers/radeon/radeon_vce_40_2_2.c
@@ -233,6 +233,9 @@ static void vui(struct rvce_encoder *enc)
 {
 	int i;

+	if (!enc->pic.rate_ctrl.frame_rate_num)
+		return;
+
 	RVCE_BEGIN(0x04000009); // vui
 	RVCE_CS(0x00000000); //aspectRatioInfoPresentFlag
 	RVCE_CS(0x00000000); //aspectRatioInfo.aspectRatioIdc
--- a/src/gallium/drivers/radeon/radeon_video.c
+++ b/src/gallium/drivers/radeon/radeon_video.c
@@ -205,11 +205,12 @@ int rvid_get_video_param(struct pipe_screen *screen,
 			 enum pipe_video_cap param)
 {
 	struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
+	enum pipe_video_format codec = u_reduce_video_profile(profile);

 	if (entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) {
 		switch (param) {
 		case PIPE_VIDEO_CAP_SUPPORTED:
-			return u_reduce_video_profile(profile) == PIPE_VIDEO_FORMAT_MPEG4_AVC &&
+			return codec == PIPE_VIDEO_FORMAT_MPEG4_AVC &&
 				rvce_is_fw_version_supported(rscreen);
 	        case PIPE_VIDEO_CAP_NPOT_TEXTURES:
        	        return 1;
@@ -232,38 +233,18 @@ int rvid_get_video_param(struct pipe_screen *screen,
 		}
 	}

-	/* UVD 2.x limits */
-	if (rscreen->family < CHIP_PALM) {
-		enum pipe_video_format codec = u_reduce_video_profile(profile);
-		switch (param) {
-		case PIPE_VIDEO_CAP_SUPPORTED:
-			/* no support for MPEG4 */
-			return codec != PIPE_VIDEO_FORMAT_MPEG4 &&
-			       /* FIXME: VC-1 simple/main profile is broken */
-			       profile != PIPE_VIDEO_PROFILE_VC1_SIMPLE &&
-			       profile != PIPE_VIDEO_PROFILE_VC1_MAIN;
-		case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
-		case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
-			/* MPEG2 only with shaders and no support for
-			   interlacing on R6xx style UVD */
-			return codec != PIPE_VIDEO_FORMAT_MPEG12 &&
-			       rscreen->family > CHIP_RV770;
-		default:
-			break;
-		}
-	}
-
 	switch (param) {
 	case PIPE_VIDEO_CAP_SUPPORTED:
-		switch (u_reduce_video_profile(profile)) {
+		switch (codec) {
 		case PIPE_VIDEO_FORMAT_MPEG12:
 		case PIPE_VIDEO_FORMAT_MPEG4:
 		case PIPE_VIDEO_FORMAT_MPEG4_AVC:
-			return entrypoint != PIPE_VIDEO_ENTRYPOINT_ENCODE;
+			if (rscreen->family < CHIP_PALM)
+				/* no support for MPEG4 */
+				return codec != PIPE_VIDEO_FORMAT_MPEG4;
+			return true;
 		case PIPE_VIDEO_FORMAT_VC1:
-			/* FIXME: VC-1 simple/main profile is broken */
-			return profile == PIPE_VIDEO_PROFILE_VC1_ADVANCED &&
-			       entrypoint != PIPE_VIDEO_ENTRYPOINT_ENCODE;
+			return true;
 		case PIPE_VIDEO_FORMAT_HEVC:
 			/* Carrizo only supports HEVC Main */
 			return rscreen->family >= CHIP_CARRIZO &&
@@ -280,13 +261,17 @@ int rvid_get_video_param(struct pipe_screen *screen,
 	case PIPE_VIDEO_CAP_PREFERED_FORMAT:
 		return PIPE_FORMAT_NV12;
 	case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
-		if (u_reduce_video_profile(profile) == PIPE_VIDEO_FORMAT_HEVC)
-			return false; //The hardware doesn't support interlaced HEVC.
-		return true;
 	case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
-		if (u_reduce_video_profile(profile) == PIPE_VIDEO_FORMAT_HEVC)
-			return false; //The hardware doesn't support interlaced HEVC.
-		return true;
+		if (rscreen->family < CHIP_PALM) {
+			/* MPEG2 only with shaders and no support for
+			   interlacing on R6xx style UVD */
+			return codec != PIPE_VIDEO_FORMAT_MPEG12 &&
+			       rscreen->family > CHIP_RV770;
+		} else {
+			if (u_reduce_video_profile(profile) == PIPE_VIDEO_FORMAT_HEVC)
+				return false; //The firmware doesn't support interlaced HEVC.
+			return true;
+		}
 	case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
 		return true;
 	case PIPE_VIDEO_CAP_MAX_LEVEL:
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -137,6 +137,7 @@ enum radeon_family {
    CHIP_ICELAND,
    CHIP_CARRIZO,
    CHIP_FIJI,
+    CHIP_STONEY,
    CHIP_LAST,
 };

--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -468,7 +468,8 @@ void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuf

 	u_upload_alloc(sctx->b.uploader, 0, size, const_offset,
 		       (struct pipe_resource**)rbuffer, &tmp);
-	util_memcpy_cpu_to_le32(tmp, ptr, size);
+	if (rbuffer)
+		util_memcpy_cpu_to_le32(tmp, ptr, size);
 }

 static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint slot,
@@ -500,6 +501,11 @@ static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint s
 			si_upload_const_buffer(sctx,
 					       (struct r600_resource**)&buffer, input->user_buffer,
 					       input->buffer_size, &buffer_offset);
+			if (!buffer) {
+				/* Just unbind on failure. */
+				si_set_constant_buffer(ctx, shader, slot, NULL);
+				return;
+			}
 			va = r600_resource(buffer)->gpu_address + buffer_offset;
 		} else {
 			pipe_resource_reference(&buffer, input->buffer);
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -170,6 +170,8 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void *
 	if (sctx->b.chip_class == CIK) {
 		sctx->null_const_buf.buffer = pipe_buffer_create(screen, PIPE_BIND_CONSTANT_BUFFER,
 								 PIPE_USAGE_DEFAULT, 16);
+		if (!sctx->null_const_buf.buffer)
+			goto fail;
 		sctx->null_const_buf.buffer_size = sctx->null_const_buf.buffer->width0;

 		for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
@@ -487,6 +489,8 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
 	case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
 	case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
 		return 1;
+	case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+		return 32;
 	}
 	return 0;
 }
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3829,11 +3829,14 @@ int si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader)
 {
 	const struct radeon_shader_binary *binary = &shader->binary;
 	unsigned i;
+	int r;
 	bool dump  = r600_can_dump_shader(&sscreen->b,
 		shader->selector ? shader->selector->tokens : NULL);

 	si_shader_binary_read_config(sscreen, shader, 0);
-	si_shader_binary_upload(sscreen, shader);
+	r = si_shader_binary_upload(sscreen, shader);
+	if (r)
+		return r;

 	if (dump) {
 		if (!(sscreen->b.debug_flags & DBG_NO_ASM)) {
@@ -4198,8 +4201,10 @@ out:

 void si_shader_destroy(struct pipe_context *ctx, struct si_shader *shader)
 {
-	if (shader->gs_copy_shader)
+	if (shader->gs_copy_shader) {
 		si_shader_destroy(ctx, shader->gs_copy_shader);
+		FREE(shader->gs_copy_shader);
+	}

 	if (shader->scratch_bo)
 		r600_resource_reference(&shader->scratch_bo, NULL);
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3176,6 +3176,7 @@ si_write_harvested_raster_configs(struct si_context *sctx,

 static void si_init_config(struct si_context *sctx)
 {
+	struct si_screen *sscreen = sctx->screen;
 	unsigned num_rb = MIN2(sctx->screen->b.info.r600_num_backends, 16);
 	unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
 	unsigned raster_config, raster_config_1;
@@ -3243,9 +3244,14 @@ static void si_init_config(struct si_context *sctx)
 		raster_config_1 = 0x0000002e;
 		break;
 	case CHIP_FIJI:
-		/* Fiji should be same as Hawaii, but that causes corruption in some cases */
-		raster_config = 0x16000012; /* 0x3a00161a */
-		raster_config_1 = 0x0000002a; /* 0x0000002e */
+		if (sscreen->b.info.cik_macrotile_mode_array[0] == 0x000000e8) {
+			/* old kernels with old tiling config */
+			raster_config = 0x16000012;
+			raster_config_1 = 0x0000002a;
+		} else {
+			raster_config = 0x3a00161a;
+			raster_config_1 = 0x0000002e;
+		}
 		break;
 	case CHIP_TONGA:
 		raster_config = 0x16000012;
@@ -3266,6 +3272,7 @@ static void si_init_config(struct si_context *sctx)
 		break;
 	case CHIP_KABINI:
 	case CHIP_MULLINS:
+	case CHIP_STONEY:
 		raster_config = 0x00000000;
 		raster_config_1 = 0x00000000;
 		break;
@@ -3341,5 +3348,8 @@ static void si_init_config(struct si_context *sctx)
 		si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32);
 	}

+	if (sctx->b.family == CHIP_STONEY)
+		si_pm4_set_reg(pm4, R_028754_SX_PS_DOWNCONVERT, 0);
+
 	sctx->init_config = pm4;
 }
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -274,7 +274,7 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
 			      unsigned force_level);

 /* si_state_shader.c */
-void si_update_shaders(struct si_context *sctx);
+bool si_update_shaders(struct si_context *sctx);
 void si_init_shader_functions(struct si_context *sctx);

 /* si_state_draw.c */
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -760,8 +760,8 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 	else
 		sctx->current_rast_prim = info->mode;

-	si_update_shaders(sctx);
-	if (!si_upload_shader_descriptors(sctx))
+	if (!si_update_shaders(sctx) ||
+	    !si_upload_shader_descriptors(sctx))
 		return;

 	if (info->indexed) {
@@ -783,6 +783,10 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)

 			u_upload_alloc(sctx->b.uploader, start_offset, count * 2,
 				       &out_offset, &out_buffer, &ptr);
+			if (!out_buffer) {
+				pipe_resource_reference(&ib.buffer, NULL);
+				return;
+			}

 			util_shorten_ubyte_elts_to_userptr(&sctx->b.b, &ib, 0,
 							   ib.offset + start_offset,
@@ -803,6 +807,8 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 			u_upload_data(sctx->b.uploader, start_offset, count * ib.index_size,
 				      (char*)ib.user_buffer + start_offset,
 				      &ib.offset, &ib.buffer);
+			if (!ib.buffer)
+				return;
 			/* info->start will be added by the drawing code */
 			ib.offset -= start_offset;
 		}
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -665,8 +665,16 @@ static void *si_create_shader_state(struct pipe_context *ctx,
 	struct si_shader_selector *sel = CALLOC_STRUCT(si_shader_selector);
 	int i;

+	if (!sel)
+		return NULL;
+
 	sel->type = pipe_shader_type;
 	sel->tokens = tgsi_dup_tokens(state->tokens);
+	if (!sel->tokens) {
+		FREE(sel);
+		return NULL;
+	}
+
 	sel->so = state->stream_output;
 	tgsi_scan_shader(state->tokens, &sel->info);
 	p_atomic_inc(&sscreen->b.num_shaders_created);
@@ -725,7 +733,12 @@ static void *si_create_shader_state(struct pipe_context *ctx,
 	}

 	if (sscreen->b.debug_flags & DBG_PRECOMPILE)
-		si_shader_select(ctx, sel);
+		if (si_shader_select(ctx, sel)) {
+			fprintf(stderr, "radeonsi: can't create a shader\n");
+			tgsi_free_tokens(sel->tokens);
+			FREE(sel);
+			return NULL;
+		}

 	return sel;
 }
@@ -1031,11 +1044,23 @@ static void si_init_gs_rings(struct si_context *sctx)
 	assert(!sctx->gs_rings);
 	sctx->gs_rings = CALLOC_STRUCT(si_pm4_state);

+	if (!sctx->gs_rings)
+		return;
+
 	sctx->esgs_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
 				       PIPE_USAGE_DEFAULT, esgs_ring_size);
+	if (!sctx->esgs_ring) {
+		FREE(sctx->gs_rings);
+		return;
+	}

 	sctx->gsvs_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
 					     PIPE_USAGE_DEFAULT, gsvs_ring_size);
+	if (!sctx->gsvs_ring) {
+		pipe_resource_reference(&sctx->esgs_ring, NULL);
+		FREE(sctx->gs_rings);
+		return;
+	}

 	if (sctx->b.chip_class >= CIK) {
 		if (sctx->b.chip_class >= VI) {
@@ -1094,14 +1119,16 @@ static void si_update_gs_rings(struct si_context *sctx)

 }
 /**
- * @returns 1 if \p sel has been updated to use a new scratch buffer and 0
- *          otherwise.
+ * @returns 1 if \p sel has been updated to use a new scratch buffer
+ *          0 if not
+ *          < 0 if there was a failure
 */
-static unsigned si_update_scratch_buffer(struct si_context *sctx,
+static int si_update_scratch_buffer(struct si_context *sctx,
 				    struct si_shader_selector *sel)
 {
 	struct si_shader *shader;
 	uint64_t scratch_va = sctx->scratch_buffer->gpu_address;
+	int r;

 	if (!sel)
 		return 0;
@@ -1122,7 +1149,9 @@ static unsigned si_update_scratch_buffer(struct si_context *sctx,
 	si_shader_apply_scratch_relocs(sctx, shader, scratch_va);

 	/* Replace the shader bo with a new bo that has the relocs applied. */
-	si_shader_binary_upload(sctx->screen, shader);
+	r = si_shader_binary_upload(sctx->screen, shader);
+	if (r)
+		return r;

 	/* Update the shader state to use the new shader bo. */
 	si_shader_init_pm4_state(shader);
@@ -1161,7 +1190,7 @@ static unsigned si_get_max_scratch_bytes_per_wave(struct si_context *sctx)
 	return bytes;
 }

-static void si_update_spi_tmpring_size(struct si_context *sctx)
+static bool si_update_spi_tmpring_size(struct si_context *sctx)
 {
 	unsigned current_scratch_buffer_size =
 		si_get_current_scratch_buffer_size(sctx);
@@ -1169,6 +1198,7 @@ static void si_update_spi_tmpring_size(struct si_context *sctx)
 		si_get_max_scratch_bytes_per_wave(sctx);
 	unsigned scratch_needed_size = scratch_bytes_per_wave *
 		sctx->scratch_waves;
+	int r;

 	if (scratch_needed_size > 0) {

@@ -1181,6 +1211,9 @@ static void si_update_spi_tmpring_size(struct si_context *sctx)
 			sctx->scratch_buffer =
 					si_resource_create_custom(&sctx->screen->b.b,
 	                                PIPE_USAGE_DEFAULT, scratch_needed_size);
+			if (!sctx->scratch_buffer)
+				return false;
+			sctx->emit_scratch_reloc = true;
 		}

 		/* Update the shaders, so they are using the latest scratch.  The
@@ -1188,31 +1221,57 @@ static void si_update_spi_tmpring_size(struct si_context *sctx)
 		 * last used, so we still need to try to update them, even if
 		 * they require scratch buffers smaller than the current size.
 		 */
-		if (si_update_scratch_buffer(sctx, sctx->ps_shader))
+		r = si_update_scratch_buffer(sctx, sctx->ps_shader);
+		if (r < 0)
+			return false;
+		if (r == 1)
 			si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4);
-		if (si_update_scratch_buffer(sctx, sctx->gs_shader))
+
+		r = si_update_scratch_buffer(sctx, sctx->gs_shader);
+		if (r < 0)
+			return false;
+		if (r == 1)
 			si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4);
-		if (si_update_scratch_buffer(sctx, sctx->tcs_shader))
+
+		r = si_update_scratch_buffer(sctx, sctx->tcs_shader);
+		if (r < 0)
+			return false;
+		if (r == 1)
 			si_pm4_bind_state(sctx, hs, sctx->tcs_shader->current->pm4);

 		/* VS can be bound as LS, ES, or VS. */
 		if (sctx->tes_shader) {
-			if (si_update_scratch_buffer(sctx, sctx->vs_shader))
+			r = si_update_scratch_buffer(sctx, sctx->vs_shader);
+			if (r < 0)
+				return false;
+			if (r == 1)
 				si_pm4_bind_state(sctx, ls, sctx->vs_shader->current->pm4);
 		} else if (sctx->gs_shader) {
-			if (si_update_scratch_buffer(sctx, sctx->vs_shader))
+			r = si_update_scratch_buffer(sctx, sctx->vs_shader);
+			if (r < 0)
+				return false;
+			if (r == 1)
 				si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4);
 		} else {
-			if (si_update_scratch_buffer(sctx, sctx->vs_shader))
+			r = si_update_scratch_buffer(sctx, sctx->vs_shader);
+			if (r < 0)
+				return false;
+			if (r == 1)
 				si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
 		}

 		/* TES can be bound as ES or VS. */
 		if (sctx->gs_shader) {
-			if (si_update_scratch_buffer(sctx, sctx->tes_shader))
+			r = si_update_scratch_buffer(sctx, sctx->tes_shader);
+			if (r < 0)
+				return false;
+			if (r == 1)
 				si_pm4_bind_state(sctx, es, sctx->tes_shader->current->pm4);
 		} else {
-			if (si_update_scratch_buffer(sctx, sctx->tes_shader))
+			r = si_update_scratch_buffer(sctx, sctx->tes_shader);
+			if (r < 0)
+				return false;
+			if (r == 1)
 				si_pm4_bind_state(sctx, vs, sctx->tes_shader->current->pm4);
 		}
 	}
@@ -1223,6 +1282,7 @@ static void si_update_spi_tmpring_size(struct si_context *sctx)

 	sctx->spi_tmpring_size = S_0286E8_WAVES(sctx->scratch_waves) |
 				S_0286E8_WAVESIZE(scratch_bytes_per_wave >> 10);
+	return true;
 }

 static void si_init_tess_factor_ring(struct si_context *sctx)
@@ -1230,11 +1290,20 @@ static void si_init_tess_factor_ring(struct si_context *sctx)
 	assert(!sctx->tf_state);
 	sctx->tf_state = CALLOC_STRUCT(si_pm4_state);

+	if (!sctx->tf_state)
+		return;
+
 	sctx->tf_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
 					   PIPE_USAGE_DEFAULT,
 					   32768 * sctx->screen->b.info.max_se);
+	if (!sctx->tf_ring) {
+		FREE(sctx->tf_state);
+		return;
+	}
+
 	sctx->b.clear_buffer(&sctx->b.b, sctx->tf_ring, 0,
 			     sctx->tf_ring->width0, fui(0), false);
+
 	assert(((sctx->tf_ring->width0 / 4) & C_030938_SIZE) == 0);

 	if (sctx->b.chip_class >= CIK) {
@@ -1290,7 +1359,6 @@ static void si_generate_fixed_func_tcs(struct si_context *sctx)

 	sctx->fixed_func_tcs_shader =
 		ureg_create_shader_and_destroy(ureg, &sctx->b.b);
-	assert(sctx->fixed_func_tcs_shader);
 }

 static void si_update_vgt_shader_config(struct si_context *sctx)
@@ -1338,32 +1406,49 @@ static void si_update_so(struct si_context *sctx, struct si_shader_selector *sha
 	sctx->b.streamout.stride_in_dw = shader->so.stride;
 }

-void si_update_shaders(struct si_context *sctx)
+bool si_update_shaders(struct si_context *sctx)
 {
 	struct pipe_context *ctx = (struct pipe_context*)sctx;
 	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
+	int r;

 	/* Update stages before GS. */
 	if (sctx->tes_shader) {
-		if (!sctx->tf_state)
+		if (!sctx->tf_state) {
 			si_init_tess_factor_ring(sctx);
+			if (!sctx->tf_state)
+				return false;
+		}

 		/* VS as LS */
-		si_shader_select(ctx, sctx->vs_shader);
+		r = si_shader_select(ctx, sctx->vs_shader);
+		if (r)
+			return false;
 		si_pm4_bind_state(sctx, ls, sctx->vs_shader->current->pm4);

 		if (sctx->tcs_shader) {
-			si_shader_select(ctx, sctx->tcs_shader);
+			r = si_shader_select(ctx, sctx->tcs_shader);
+			if (r)
+				return false;
 			si_pm4_bind_state(sctx, hs, sctx->tcs_shader->current->pm4);
 		} else {
-			if (!sctx->fixed_func_tcs_shader)
+			if (!sctx->fixed_func_tcs_shader) {
 				si_generate_fixed_func_tcs(sctx);
-			si_shader_select(ctx, sctx->fixed_func_tcs_shader);
+				if (!sctx->fixed_func_tcs_shader)
+					return false;
+			}
+
+			r = si_shader_select(ctx, sctx->fixed_func_tcs_shader);
+			if (r)
+				return false;
 			si_pm4_bind_state(sctx, hs,
 					  sctx->fixed_func_tcs_shader->current->pm4);
 		}

-		si_shader_select(ctx, sctx->tes_shader);
+		r = si_shader_select(ctx, sctx->tes_shader);
+		if (r)
+			return false;
+
 		if (sctx->gs_shader) {
 			/* TES as ES */
 			si_pm4_bind_state(sctx, es, sctx->tes_shader->current->pm4);
@@ -1374,24 +1459,33 @@ void si_update_shaders(struct si_context *sctx)
 		}
 	} else if (sctx->gs_shader) {
 		/* VS as ES */
-		si_shader_select(ctx, sctx->vs_shader);
+		r = si_shader_select(ctx, sctx->vs_shader);
+		if (r)
+			return false;
 		si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4);
 	} else {
 		/* VS as VS */
-		si_shader_select(ctx, sctx->vs_shader);
+		r = si_shader_select(ctx, sctx->vs_shader);
+		if (r)
+			return false;
 		si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
 		si_update_so(sctx, sctx->vs_shader);
 	}

 	/* Update GS. */
 	if (sctx->gs_shader) {
-		si_shader_select(ctx, sctx->gs_shader);
+		r = si_shader_select(ctx, sctx->gs_shader);
+		if (r)
+			return false;
 		si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4);
 		si_pm4_bind_state(sctx, vs, sctx->gs_shader->current->gs_copy_shader->pm4);
 		si_update_so(sctx, sctx->gs_shader);

-		if (!sctx->gs_rings)
+		if (!sctx->gs_rings) {
 			si_init_gs_rings(sctx);
+			if (!sctx->gs_rings)
+				return false;
+		}

 		if (sctx->emitted.named.gs_rings != sctx->gs_rings)
 			sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
@@ -1406,18 +1500,9 @@ void si_update_shaders(struct si_context *sctx)

 	si_update_vgt_shader_config(sctx);

-	si_shader_select(ctx, sctx->ps_shader);
-
-	if (!sctx->ps_shader->current) {
-		struct si_shader_selector *sel;
-
-		/* use a dummy shader if compiling the shader (variant) failed */
-		si_make_dummy_ps(sctx);
-		sel = sctx->dummy_pixel_shader;
-		si_shader_select(ctx, sel);
-		sctx->ps_shader->current = sel->current;
-	}
-
+	r = si_shader_select(ctx, sctx->ps_shader);
+	if (r)
+		return false;
 	si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4);

 	if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
@@ -1428,9 +1513,14 @@ void si_update_shaders(struct si_context *sctx)
 		si_update_spi_map(sctx);
 	}

-	if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
-	    si_pm4_state_changed(sctx, gs)) {
-		si_update_spi_tmpring_size(sctx);
+	if (si_pm4_state_changed(sctx, ls) ||
+	    si_pm4_state_changed(sctx, hs) ||
+	    si_pm4_state_changed(sctx, es) ||
+	    si_pm4_state_changed(sctx, gs) ||
+	    si_pm4_state_changed(sctx, vs) ||
+	    si_pm4_state_changed(sctx, ps)) {
+		if (!si_update_spi_tmpring_size(sctx))
+			return false;
 	}

 	if (sctx->ps_db_shader_control != sctx->ps_shader->current->db_shader_control) {
@@ -1445,6 +1535,7 @@ void si_update_shaders(struct si_context *sctx)
 		if (sctx->b.chip_class == SI)
 			si_mark_atom_dirty(sctx, &sctx->db_render_state);
 	}
+	return true;
 }

 void si_init_shader_functions(struct si_context *sctx)
--- a/src/gallium/drivers/radeonsi/sid.h
+++ b/src/gallium/drivers/radeonsi/sid.h
@@ -6732,6 +6732,9 @@
 #define   S_00B854_WAVES_PER_SH(x)                                    (((x) & 0x3F) << 0) /* mask is 0x3FF on CIK */
 #define   G_00B854_WAVES_PER_SH(x)                                    (((x) >> 0) & 0x3F) /* mask is 0x3FF on CIK */
 #define   C_00B854_WAVES_PER_SH                                       0xFFFFFFC0 /* mask is 0x3FF on CIK */
+#define   S_00B854_WAVES_PER_SH_CIK(x)                                (((x) & 0x3FF) << 0)
+#define   G_00B854_WAVES_PER_SH_CIK(x)                                (((x) >> 0) & 0x3FF)
+#define   C_00B854_WAVES_PER_SH_CIK                                   0xFFFFFC00
 #define   S_00B854_TG_PER_CU(x)                                       (((x) & 0x0F) << 12)
 #define   G_00B854_TG_PER_CU(x)                                       (((x) >> 12) & 0x0F)
 #define   C_00B854_TG_PER_CU                                          0xFFFF0FFF
@@ -8335,6 +8338,296 @@
 #define     V_028714_SPI_SHADER_UINT16_ABGR                         0x07
 #define     V_028714_SPI_SHADER_SINT16_ABGR                         0x08
 #define     V_028714_SPI_SHADER_32_ABGR                             0x09
+/* Stoney */
+#define R_028754_SX_PS_DOWNCONVERT                                      0x028754
+#define   S_028754_MRT0(x)                                            (((x) & 0x0F) << 0)
+#define   G_028754_MRT0(x)                                            (((x) >> 0) & 0x0F)
+#define   C_028754_MRT0                                               0xFFFFFFF0
+#define     V_028754_SX_RT_EXPORT_NO_CONVERSION				0
+#define     V_028754_SX_RT_EXPORT_32_R					1
+#define     V_028754_SX_RT_EXPORT_32_A					2
+#define     V_028754_SX_RT_EXPORT_10_11_11				3
+#define     V_028754_SX_RT_EXPORT_2_10_10_10				4
+#define     V_028754_SX_RT_EXPORT_8_8_8_8				5
+#define     V_028754_SX_RT_EXPORT_5_6_5					6
+#define     V_028754_SX_RT_EXPORT_1_5_5_5				7
+#define     V_028754_SX_RT_EXPORT_4_4_4_4				8
+#define     V_028754_SX_RT_EXPORT_16_16_GR				9
+#define     V_028754_SX_RT_EXPORT_16_16_AR				10
+#define   S_028754_MRT1(x)                                            (((x) & 0x0F) << 4)
+#define   G_028754_MRT1(x)                                            (((x) >> 4) & 0x0F)
+#define   C_028754_MRT1                                               0xFFFFFF0F
+#define   S_028754_MRT2(x)                                            (((x) & 0x0F) << 8)
+#define   G_028754_MRT2(x)                                            (((x) >> 8) & 0x0F)
+#define   C_028754_MRT2                                               0xFFFFF0FF
+#define   S_028754_MRT3(x)                                            (((x) & 0x0F) << 12)
+#define   G_028754_MRT3(x)                                            (((x) >> 12) & 0x0F)
+#define   C_028754_MRT3                                               0xFFFF0FFF
+#define   S_028754_MRT4(x)                                            (((x) & 0x0F) << 16)
+#define   G_028754_MRT4(x)                                            (((x) >> 16) & 0x0F)
+#define   C_028754_MRT4                                               0xFFF0FFFF
+#define   S_028754_MRT5(x)                                            (((x) & 0x0F) << 20)
+#define   G_028754_MRT5(x)                                            (((x) >> 20) & 0x0F)
+#define   C_028754_MRT5                                               0xFF0FFFFF
+#define   S_028754_MRT6(x)                                            (((x) & 0x0F) << 24)
+#define   G_028754_MRT6(x)                                            (((x) >> 24) & 0x0F)
+#define   C_028754_MRT6                                               0xF0FFFFFF
+#define   S_028754_MRT7(x)                                            (((x) & 0x0F) << 28)
+#define   G_028754_MRT7(x)                                            (((x) >> 28) & 0x0F)
+#define   C_028754_MRT7                                               0x0FFFFFFF
+#define R_028758_SX_BLEND_OPT_EPSILON                                   0x028758
+#define   S_028758_MRT0_EPSILON(x)                                    (((x) & 0x0F) << 0)
+#define   G_028758_MRT0_EPSILON(x)                                    (((x) >> 0) & 0x0F)
+#define   C_028758_MRT0_EPSILON                                       0xFFFFFFF0
+#define      V_028758_EXACT						0
+#define      V_028758_11BIT_FORMAT					1
+#define      V_028758_10BIT_FORMAT					3
+#define      V_028758_8BIT_FORMAT					7
+#define      V_028758_6BIT_FORMAT					11
+#define      V_028758_5BIT_FORMAT					13
+#define      V_028758_4BIT_FORMAT					15
+#define   S_028758_MRT1_EPSILON(x)                                    (((x) & 0x0F) << 4)
+#define   G_028758_MRT1_EPSILON(x)                                    (((x) >> 4) & 0x0F)
+#define   C_028758_MRT1_EPSILON                                       0xFFFFFF0F
+#define   S_028758_MRT2_EPSILON(x)                                    (((x) & 0x0F) << 8)
+#define   G_028758_MRT2_EPSILON(x)                                    (((x) >> 8) & 0x0F)
+#define   C_028758_MRT2_EPSILON                                       0xFFFFF0FF
+#define   S_028758_MRT3_EPSILON(x)                                    (((x) & 0x0F) << 12)
+#define   G_028758_MRT3_EPSILON(x)                                    (((x) >> 12) & 0x0F)
+#define   C_028758_MRT3_EPSILON                                       0xFFFF0FFF
+#define   S_028758_MRT4_EPSILON(x)                                    (((x) & 0x0F) << 16)
+#define   G_028758_MRT4_EPSILON(x)                                    (((x) >> 16) & 0x0F)
+#define   C_028758_MRT4_EPSILON                                       0xFFF0FFFF
+#define   S_028758_MRT5_EPSILON(x)                                    (((x) & 0x0F) << 20)
+#define   G_028758_MRT5_EPSILON(x)                                    (((x) >> 20) & 0x0F)
+#define   C_028758_MRT5_EPSILON                                       0xFF0FFFFF
+#define   S_028758_MRT6_EPSILON(x)                                    (((x) & 0x0F) << 24)
+#define   G_028758_MRT6_EPSILON(x)                                    (((x) >> 24) & 0x0F)
+#define   C_028758_MRT6_EPSILON                                       0xF0FFFFFF
+#define   S_028758_MRT7_EPSILON(x)                                    (((x) & 0x0F) << 28)
+#define   G_028758_MRT7_EPSILON(x)                                    (((x) >> 28) & 0x0F)
+#define   C_028758_MRT7_EPSILON                                       0x0FFFFFFF
+#define R_02875C_SX_BLEND_OPT_CONTROL                                   0x02875C
+#define   S_02875C_MRT0_COLOR_OPT_DISABLE(x)                          (((x) & 0x1) << 0)
+#define   G_02875C_MRT0_COLOR_OPT_DISABLE(x)                          (((x) >> 0) & 0x1)
+#define   C_02875C_MRT0_COLOR_OPT_DISABLE                             0xFFFFFFFE
+#define   S_02875C_MRT0_ALPHA_OPT_DISABLE(x)                          (((x) & 0x1) << 1)
+#define   G_02875C_MRT0_ALPHA_OPT_DISABLE(x)                          (((x) >> 1) & 0x1)
+#define   C_02875C_MRT0_ALPHA_OPT_DISABLE                             0xFFFFFFFD
+#define   S_02875C_MRT1_COLOR_OPT_DISABLE(x)                          (((x) & 0x1) << 4)
+#define   G_02875C_MRT1_COLOR_OPT_DISABLE(x)                          (((x) >> 4) & 0x1)
+#define   C_02875C_MRT1_COLOR_OPT_DISABLE                             0xFFFFFFEF
+#define   S_02875C_MRT1_ALPHA_OPT_DISABLE(x)                          (((x) & 0x1) << 5)
+#define   G_02875C_MRT1_ALPHA_OPT_DISABLE(x)                          (((x) >> 5) & 0x1)
+#define   C_02875C_MRT1_ALPHA_OPT_DISABLE                             0xFFFFFFDF
+#define   S_02875C_MRT2_COLOR_OPT_DISABLE(x)                          (((x) & 0x1) << 8)
+#define   G_02875C_MRT2_COLOR_OPT_DISABLE(x)                          (((x) >> 8) & 0x1)
+#define   C_02875C_MRT2_COLOR_OPT_DISABLE                             0xFFFFFEFF
+#define   S_02875C_MRT2_ALPHA_OPT_DISABLE(x)                          (((x) & 0x1) << 9)
+#define   G_02875C_MRT2_ALPHA_OPT_DISABLE(x)                          (((x) >> 9) & 0x1)
+#define   C_02875C_MRT2_ALPHA_OPT_DISABLE                             0xFFFFFDFF
+#define   S_02875C_MRT3_COLOR_OPT_DISABLE(x)                          (((x) & 0x1) << 12)
+#define   G_02875C_MRT3_COLOR_OPT_DISABLE(x)                          (((x) >> 12) & 0x1)
+#define   C_02875C_MRT3_COLOR_OPT_DISABLE                             0xFFFFEFFF
+#define   S_02875C_MRT3_ALPHA_OPT_DISABLE(x)                          (((x) & 0x1) << 13)
+#define   G_02875C_MRT3_ALPHA_OPT_DISABLE(x)                          (((x) >> 13) & 0x1)
+#define   C_02875C_MRT3_ALPHA_OPT_DISABLE                             0xFFFFDFFF
+#define   S_02875C_MRT4_COLOR_OPT_DISABLE(x)                          (((x) & 0x1) << 16)
+#define   G_02875C_MRT4_COLOR_OPT_DISABLE(x)                          (((x) >> 16) & 0x1)
+#define   C_02875C_MRT4_COLOR_OPT_DISABLE                             0xFFFEFFFF
+#define   S_02875C_MRT4_ALPHA_OPT_DISABLE(x)                          (((x) & 0x1) << 17)
+#define   G_02875C_MRT4_ALPHA_OPT_DISABLE(x)                          (((x) >> 17) & 0x1)
+#define   C_02875C_MRT4_ALPHA_OPT_DISABLE                             0xFFFDFFFF
+#define   S_02875C_MRT5_COLOR_OPT_DISABLE(x)                          (((x) & 0x1) << 20)
+#define   G_02875C_MRT5_COLOR_OPT_DISABLE(x)                          (((x) >> 20) & 0x1)
+#define   C_02875C_MRT5_COLOR_OPT_DISABLE                             0xFFEFFFFF
+#define   S_02875C_MRT5_ALPHA_OPT_DISABLE(x)                          (((x) & 0x1) << 21)
+#define   G_02875C_MRT5_ALPHA_OPT_DISABLE(x)                          (((x) >> 21) & 0x1)
+#define   C_02875C_MRT5_ALPHA_OPT_DISABLE                             0xFFDFFFFF
+#define   S_02875C_MRT6_COLOR_OPT_DISABLE(x)                          (((x) & 0x1) << 24)
+#define   G_02875C_MRT6_COLOR_OPT_DISABLE(x)                          (((x) >> 24) & 0x1)
+#define   C_02875C_MRT6_COLOR_OPT_DISABLE                             0xFEFFFFFF
+#define   S_02875C_MRT6_ALPHA_OPT_DISABLE(x)                          (((x) & 0x1) << 25)
+#define   G_02875C_MRT6_ALPHA_OPT_DISABLE(x)                          (((x) >> 25) & 0x1)
+#define   C_02875C_MRT6_ALPHA_OPT_DISABLE                             0xFDFFFFFF
+#define   S_02875C_MRT7_COLOR_OPT_DISABLE(x)                          (((x) & 0x1) << 28)
+#define   G_02875C_MRT7_COLOR_OPT_DISABLE(x)                          (((x) >> 28) & 0x1)
+#define   C_02875C_MRT7_COLOR_OPT_DISABLE                             0xEFFFFFFF
+#define   S_02875C_MRT7_ALPHA_OPT_DISABLE(x)                          (((x) & 0x1) << 29)
+#define   G_02875C_MRT7_ALPHA_OPT_DISABLE(x)                          (((x) >> 29) & 0x1)
+#define   C_02875C_MRT7_ALPHA_OPT_DISABLE                             0xDFFFFFFF
+#define   S_02875C_PIXEN_ZERO_OPT_DISABLE(x)                          (((x) & 0x1) << 31)
+#define   G_02875C_PIXEN_ZERO_OPT_DISABLE(x)                          (((x) >> 31) & 0x1)
+#define   C_02875C_PIXEN_ZERO_OPT_DISABLE                             0x7FFFFFFF
+#define R_028760_SX_MRT0_BLEND_OPT                                      0x028760
+#define   S_028760_COLOR_SRC_OPT(x)                                   (((x) & 0x07) << 0)
+#define   G_028760_COLOR_SRC_OPT(x)                                   (((x) >> 0) & 0x07)
+#define   C_028760_COLOR_SRC_OPT                                      0xFFFFFFF8
+#define     V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL			0
+#define     V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE			1
+#define     V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0			2
+#define     V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1			3
+#define     V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0			4
+#define     V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1			5
+#define     V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0			6
+#define     V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE		7
+#define   S_028760_COLOR_DST_OPT(x)                                   (((x) & 0x07) << 4)
+#define   G_028760_COLOR_DST_OPT(x)                                   (((x) >> 4) & 0x07)
+#define   C_028760_COLOR_DST_OPT                                      0xFFFFFF8F
+#define   S_028760_COLOR_COMB_FCN(x)                                  (((x) & 0x07) << 8)
+#define   G_028760_COLOR_COMB_FCN(x)                                  (((x) >> 8) & 0x07)
+#define   C_028760_COLOR_COMB_FCN                                     0xFFFFF8FF
+#define     V_028760_OPT_COMB_NONE					0
+#define     V_028760_OPT_COMB_ADD					1
+#define     V_028760_OPT_COMB_SUBTRACT					2
+#define     V_028760_OPT_COMB_MIN					3
+#define     V_028760_OPT_COMB_MAX					4
+#define     V_028760_OPT_COMB_REVSUBTRACT				5
+#define     V_028760_OPT_COMB_BLEND_DISABLED				6
+#define     V_028760_OPT_COMB_SAFE_ADD					7
+#define   S_028760_ALPHA_SRC_OPT(x)                                   (((x) & 0x07) << 16)
+#define   G_028760_ALPHA_SRC_OPT(x)                                   (((x) >> 16) & 0x07)
+#define   C_028760_ALPHA_SRC_OPT                                      0xFFF8FFFF
+#define   S_028760_ALPHA_DST_OPT(x)                                   (((x) & 0x07) << 20)
+#define   G_028760_ALPHA_DST_OPT(x)                                   (((x) >> 20) & 0x07)
+#define   C_028760_ALPHA_DST_OPT                                      0xFF8FFFFF
+#define   S_028760_ALPHA_COMB_FCN(x)                                  (((x) & 0x07) << 24)
+#define   G_028760_ALPHA_COMB_FCN(x)                                  (((x) >> 24) & 0x07)
+#define   C_028760_ALPHA_COMB_FCN                                     0xF8FFFFFF
+#define R_028764_SX_MRT1_BLEND_OPT                                      0x028764
+#define   S_028764_COLOR_SRC_OPT(x)                                   (((x) & 0x07) << 0)
+#define   G_028764_COLOR_SRC_OPT(x)                                   (((x) >> 0) & 0x07)
+#define   C_028764_COLOR_SRC_OPT                                      0xFFFFFFF8
+#define   S_028764_COLOR_DST_OPT(x)                                   (((x) & 0x07) << 4)
+#define   G_028764_COLOR_DST_OPT(x)                                   (((x) >> 4) & 0x07)
+#define   C_028764_COLOR_DST_OPT                                      0xFFFFFF8F
+#define   S_028764_COLOR_COMB_FCN(x)                                  (((x) & 0x07) << 8)
+#define   G_028764_COLOR_COMB_FCN(x)                                  (((x) >> 8) & 0x07)
+#define   C_028764_COLOR_COMB_FCN                                     0xFFFFF8FF
+#define   S_028764_ALPHA_SRC_OPT(x)                                   (((x) & 0x07) << 16)
+#define   G_028764_ALPHA_SRC_OPT(x)                                   (((x) >> 16) & 0x07)
+#define   C_028764_ALPHA_SRC_OPT                                      0xFFF8FFFF
+#define   S_028764_ALPHA_DST_OPT(x)                                   (((x) & 0x07) << 20)
+#define   G_028764_ALPHA_DST_OPT(x)                                   (((x) >> 20) & 0x07)
+#define   C_028764_ALPHA_DST_OPT                                      0xFF8FFFFF
+#define   S_028764_ALPHA_COMB_FCN(x)                                  (((x) & 0x07) << 24)
+#define   G_028764_ALPHA_COMB_FCN(x)                                  (((x) >> 24) & 0x07)
+#define   C_028764_ALPHA_COMB_FCN                                     0xF8FFFFFF
+#define R_028768_SX_MRT2_BLEND_OPT                                      0x028768
+#define   S_028768_COLOR_SRC_OPT(x)                                   (((x) & 0x07) << 0)
+#define   G_028768_COLOR_SRC_OPT(x)                                   (((x) >> 0) & 0x07)
+#define   C_028768_COLOR_SRC_OPT                                      0xFFFFFFF8
+#define   S_028768_COLOR_DST_OPT(x)                                   (((x) & 0x07) << 4)
+#define   G_028768_COLOR_DST_OPT(x)                                   (((x) >> 4) & 0x07)
+#define   C_028768_COLOR_DST_OPT                                      0xFFFFFF8F
+#define   S_028768_COLOR_COMB_FCN(x)                                  (((x) & 0x07) << 8)
+#define   G_028768_COLOR_COMB_FCN(x)                                  (((x) >> 8) & 0x07)
+#define   C_028768_COLOR_COMB_FCN                                     0xFFFFF8FF
+#define   S_028768_ALPHA_SRC_OPT(x)                                   (((x) & 0x07) << 16)
+#define   G_028768_ALPHA_SRC_OPT(x)                                   (((x) >> 16) & 0x07)
+#define   C_028768_ALPHA_SRC_OPT                                      0xFFF8FFFF
+#define   S_028768_ALPHA_DST_OPT(x)                                   (((x) & 0x07) << 20)
+#define   G_028768_ALPHA_DST_OPT(x)                                   (((x) >> 20) & 0x07)
+#define   C_028768_ALPHA_DST_OPT                                      0xFF8FFFFF
+#define   S_028768_ALPHA_COMB_FCN(x)                                  (((x) & 0x07) << 24)
+#define   G_028768_ALPHA_COMB_FCN(x)                                  (((x) >> 24) & 0x07)
+#define   C_028768_ALPHA_COMB_FCN                                     0xF8FFFFFF
+#define R_02876C_SX_MRT3_BLEND_OPT                                      0x02876C
+#define   S_02876C_COLOR_SRC_OPT(x)                                   (((x) & 0x07) << 0)
+#define   G_02876C_COLOR_SRC_OPT(x)                                   (((x) >> 0) & 0x07)
+#define   C_02876C_COLOR_SRC_OPT                                      0xFFFFFFF8
+#define   S_02876C_COLOR_DST_OPT(x)                                   (((x) & 0x07) << 4)
+#define   G_02876C_COLOR_DST_OPT(x)                                   (((x) >> 4) & 0x07)
+#define   C_02876C_COLOR_DST_OPT                                      0xFFFFFF8F
+#define   S_02876C_COLOR_COMB_FCN(x)                                  (((x) & 0x07) << 8)
+#define   G_02876C_COLOR_COMB_FCN(x)                                  (((x) >> 8) & 0x07)
+#define   C_02876C_COLOR_COMB_FCN                                     0xFFFFF8FF
+#define   S_02876C_ALPHA_SRC_OPT(x)                                   (((x) & 0x07) << 16)
+#define   G_02876C_ALPHA_SRC_OPT(x)                                   (((x) >> 16) & 0x07)
+#define   C_02876C_ALPHA_SRC_OPT                                      0xFFF8FFFF
+#define   S_02876C_ALPHA_DST_OPT(x)                                   (((x) & 0x07) << 20)
+#define   G_02876C_ALPHA_DST_OPT(x)                                   (((x) >> 20) & 0x07)
+#define   C_02876C_ALPHA_DST_OPT                                      0xFF8FFFFF
+#define   S_02876C_ALPHA_COMB_FCN(x)                                  (((x) & 0x07) << 24)
+#define   G_02876C_ALPHA_COMB_FCN(x)                                  (((x) >> 24) & 0x07)
+#define   C_02876C_ALPHA_COMB_FCN                                     0xF8FFFFFF
+#define R_028770_SX_MRT4_BLEND_OPT                                      0x028770
+#define   S_028770_COLOR_SRC_OPT(x)                                   (((x) & 0x07) << 0)
+#define   G_028770_COLOR_SRC_OPT(x)                                   (((x) >> 0) & 0x07)
+#define   C_028770_COLOR_SRC_OPT                                      0xFFFFFFF8
+#define   S_028770_COLOR_DST_OPT(x)                                   (((x) & 0x07) << 4)
+#define   G_028770_COLOR_DST_OPT(x)                                   (((x) >> 4) & 0x07)
+#define   C_028770_COLOR_DST_OPT                                      0xFFFFFF8F
+#define   S_028770_COLOR_COMB_FCN(x)                                  (((x) & 0x07) << 8)
+#define   G_028770_COLOR_COMB_FCN(x)                                  (((x) >> 8) & 0x07)
+#define   C_028770_COLOR_COMB_FCN                                     0xFFFFF8FF
+#define   S_028770_ALPHA_SRC_OPT(x)                                   (((x) & 0x07) << 16)
+#define   G_028770_ALPHA_SRC_OPT(x)                                   (((x) >> 16) & 0x07)
+#define   C_028770_ALPHA_SRC_OPT                                      0xFFF8FFFF
+#define   S_028770_ALPHA_DST_OPT(x)                                   (((x) & 0x07) << 20)
+#define   G_028770_ALPHA_DST_OPT(x)                                   (((x) >> 20) & 0x07)
+#define   C_028770_ALPHA_DST_OPT                                      0xFF8FFFFF
+#define   S_028770_ALPHA_COMB_FCN(x)                                  (((x) & 0x07) << 24)
+#define   G_028770_ALPHA_COMB_FCN(x)                                  (((x) >> 24) & 0x07)
+#define   C_028770_ALPHA_COMB_FCN                                     0xF8FFFFFF
+#define R_028774_SX_MRT5_BLEND_OPT                                      0x028774
+#define   S_028774_COLOR_SRC_OPT(x)                                   (((x) & 0x07) << 0)
+#define   G_028774_COLOR_SRC_OPT(x)                                   (((x) >> 0) & 0x07)
+#define   C_028774_COLOR_SRC_OPT                                      0xFFFFFFF8
+#define   S_028774_COLOR_DST_OPT(x)                                   (((x) & 0x07) << 4)
+#define   G_028774_COLOR_DST_OPT(x)                                   (((x) >> 4) & 0x07)
+#define   C_028774_COLOR_DST_OPT                                      0xFFFFFF8F
+#define   S_028774_COLOR_COMB_FCN(x)                                  (((x) & 0x07) << 8)
+#define   G_028774_COLOR_COMB_FCN(x)                                  (((x) >> 8) & 0x07)
+#define   C_028774_COLOR_COMB_FCN                                     0xFFFFF8FF
+#define   S_028774_ALPHA_SRC_OPT(x)                                   (((x) & 0x07) << 16)
+#define   G_028774_ALPHA_SRC_OPT(x)                                   (((x) >> 16) & 0x07)
+#define   C_028774_ALPHA_SRC_OPT                                      0xFFF8FFFF
+#define   S_028774_ALPHA_DST_OPT(x)                                   (((x) & 0x07) << 20)
+#define   G_028774_ALPHA_DST_OPT(x)                                   (((x) >> 20) & 0x07)
+#define   C_028774_ALPHA_DST_OPT                                      0xFF8FFFFF
+#define   S_028774_ALPHA_COMB_FCN(x)                                  (((x) & 0x07) << 24)
+#define   G_028774_ALPHA_COMB_FCN(x)                                  (((x) >> 24) & 0x07)
+#define   C_028774_ALPHA_COMB_FCN                                     0xF8FFFFFF
+#define R_028778_SX_MRT6_BLEND_OPT                                      0x028778
+#define   S_028778_COLOR_SRC_OPT(x)                                   (((x) & 0x07) << 0)
+#define   G_028778_COLOR_SRC_OPT(x)                                   (((x) >> 0) & 0x07)
+#define   C_028778_COLOR_SRC_OPT                                      0xFFFFFFF8
+#define   S_028778_COLOR_DST_OPT(x)                                   (((x) & 0x07) << 4)
+#define   G_028778_COLOR_DST_OPT(x)                                   (((x) >> 4) & 0x07)
+#define   C_028778_COLOR_DST_OPT                                      0xFFFFFF8F
+#define   S_028778_COLOR_COMB_FCN(x)                                  (((x) & 0x07) << 8)
+#define   G_028778_COLOR_COMB_FCN(x)                                  (((x) >> 8) & 0x07)
+#define   C_028778_COLOR_COMB_FCN                                     0xFFFFF8FF
+#define   S_028778_ALPHA_SRC_OPT(x)                                   (((x) & 0x07) << 16)
+#define   G_028778_ALPHA_SRC_OPT(x)                                   (((x) >> 16) & 0x07)
+#define   C_028778_ALPHA_SRC_OPT                                      0xFFF8FFFF
+#define   S_028778_ALPHA_DST_OPT(x)                                   (((x) & 0x07) << 20)
+#define   G_028778_ALPHA_DST_OPT(x)                                   (((x) >> 20) & 0x07)
+#define   C_028778_ALPHA_DST_OPT                                      0xFF8FFFFF
+#define   S_028778_ALPHA_COMB_FCN(x)                                  (((x) & 0x07) << 24)
+#define   G_028778_ALPHA_COMB_FCN(x)                                  (((x) >> 24) & 0x07)
+#define   C_028778_ALPHA_COMB_FCN                                     0xF8FFFFFF
+#define R_02877C_SX_MRT7_BLEND_OPT                                      0x02877C
+#define   S_02877C_COLOR_SRC_OPT(x)                                   (((x) & 0x07) << 0)
+#define   G_02877C_COLOR_SRC_OPT(x)                                   (((x) >> 0) & 0x07)
+#define   C_02877C_COLOR_SRC_OPT                                      0xFFFFFFF8
+#define   S_02877C_COLOR_DST_OPT(x)                                   (((x) & 0x07) << 4)
+#define   G_02877C_COLOR_DST_OPT(x)                                   (((x) >> 4) & 0x07)
+#define   C_02877C_COLOR_DST_OPT                                      0xFFFFFF8F
+#define   S_02877C_COLOR_COMB_FCN(x)                                  (((x) & 0x07) << 8)
+#define   G_02877C_COLOR_COMB_FCN(x)                                  (((x) >> 8) & 0x07)
+#define   C_02877C_COLOR_COMB_FCN                                     0xFFFFF8FF
+#define   S_02877C_ALPHA_SRC_OPT(x)                                   (((x) & 0x07) << 16)
+#define   G_02877C_ALPHA_SRC_OPT(x)                                   (((x) >> 16) & 0x07)
+#define   C_02877C_ALPHA_SRC_OPT                                      0xFFF8FFFF
+#define   S_02877C_ALPHA_DST_OPT(x)                                   (((x) & 0x07) << 20)
+#define   G_02877C_ALPHA_DST_OPT(x)                                   (((x) >> 20) & 0x07)
+#define   C_02877C_ALPHA_DST_OPT                                      0xFF8FFFFF
+#define   S_02877C_ALPHA_COMB_FCN(x)                                  (((x) & 0x07) << 24)
+#define   G_02877C_ALPHA_COMB_FCN(x)                                  (((x) >> 24) & 0x07)
+#define   C_02877C_ALPHA_COMB_FCN                                     0xF8FFFFFF
+/*        */
 #define R_028780_CB_BLEND0_CONTROL                                      0x028780
 #define   S_028780_COLOR_SRCBLEND(x)                                  (((x) & 0x1F) << 0)
 #define   G_028780_COLOR_SRCBLEND(x)                                  (((x) >> 0) & 0x1F)
@@ -8597,6 +8890,7 @@
 #define     V_028808_CB_ELIMINATE_FAST_CLEAR                        0x02
 #define     V_028808_CB_RESOLVE                                     0x03
 #define     V_028808_CB_FMASK_DECOMPRESS                            0x05
+#define     V_028808_CB_DCC_DECOMPRESS                              0x06
 #define   S_028808_ROP3(x)                                            (((x) & 0xFF) << 16)
 #define   G_028808_ROP3(x)                                            (((x) >> 16) & 0xFF)
 #define   C_028808_ROP3                                               0xFF00FFFF
@@ -8675,6 +8969,11 @@
 #define     V_02880C_EXPORT_GREATER_THAN_Z                          2
 #define     V_02880C_EXPORT_RESERVED                                3
 /*     */
+/* Stoney */
+#define   S_02880C_DUAL_QUAD_DISABLE(x)                               (((x) & 0x1) << 15)
+#define   G_02880C_DUAL_QUAD_DISABLE(x)                               (((x) >> 15) & 0x1)
+#define   C_02880C_DUAL_QUAD_DISABLE                                  0xFFFF7FFF
+/*        */
 #define R_028810_PA_CL_CLIP_CNTL                                        0x028810
 #define   S_028810_UCP_ENA_0(x)                                       (((x) & 0x1) << 0)
 #define   G_028810_UCP_ENA_0(x)                                       (((x) >> 0) & 0x1)
@@ -9256,6 +9555,9 @@
 #define     V_028A40_GS_SCENARIO_G                                  0x03
 #define     V_028A40_GS_SCENARIO_C                                  0x04
 #define     V_028A40_SPRITE_EN                                      0x05
+#define   S_028A40_RESERVED_0(x)                                      (((x) & 0x1) << 3)
+#define   G_028A40_RESERVED_0(x)                                      (((x) >> 3) & 0x1)
+#define   C_028A40_RESERVED_0                                         0xFFFFFFF7
 #define   S_028A40_CUT_MODE(x)                                        (((x) & 0x03) << 4)
 #define   G_028A40_CUT_MODE(x)                                        (((x) >> 4) & 0x03)
 #define   C_028A40_CUT_MODE                                           0xFFFFFFCF
@@ -9263,12 +9565,19 @@
 #define     V_028A40_GS_CUT_512                                     0x01
 #define     V_028A40_GS_CUT_256                                     0x02
 #define     V_028A40_GS_CUT_128                                     0x03
+#define   S_028A40_RESERVED_1(x)                                      (((x) & 0x1F) << 6)
+#define   G_028A40_RESERVED_1(x)                                      (((x) >> 6) & 0x1F)
+#define   C_028A40_RESERVED_1                                         0xFFFFF83F
 #define   S_028A40_GS_C_PACK_EN(x)                                    (((x) & 0x1) << 11)
 #define   G_028A40_GS_C_PACK_EN(x)                                    (((x) >> 11) & 0x1)
 #define   C_028A40_GS_C_PACK_EN                                       0xFFFFF7FF
+#define   S_028A40_RESERVED_2(x)                                      (((x) & 0x1) << 12)
+#define   G_028A40_RESERVED_2(x)                                      (((x) >> 12) & 0x1)
+#define   C_028A40_RESERVED_2                                         0xFFFFEFFF
 #define   S_028A40_ES_PASSTHRU(x)                                     (((x) & 0x1) << 13)
 #define   G_028A40_ES_PASSTHRU(x)                                     (((x) >> 13) & 0x1)
 #define   C_028A40_ES_PASSTHRU                                        0xFFFFDFFF
+/* SI-CIK */
 #define   S_028A40_COMPUTE_MODE(x)                                    (((x) & 0x1) << 14)
 #define   G_028A40_COMPUTE_MODE(x)                                    (((x) >> 14) & 0x1)
 #define   C_028A40_COMPUTE_MODE                                       0xFFFFBFFF
@@ -9278,6 +9587,7 @@
 #define   S_028A40_ELEMENT_INFO_EN(x)                                 (((x) & 0x1) << 16)
 #define   G_028A40_ELEMENT_INFO_EN(x)                                 (((x) >> 16) & 0x1)
 #define   C_028A40_ELEMENT_INFO_EN                                    0xFFFEFFFF
+/*        */
 #define   S_028A40_PARTIAL_THD_AT_EOI(x)                              (((x) & 0x1) << 17)
 #define   G_028A40_PARTIAL_THD_AT_EOI(x)                              (((x) >> 17) & 0x1)
 #define   C_028A40_PARTIAL_THD_AT_EOI                                 0xFFFDFFFF
@@ -9463,6 +9773,9 @@
 #define   C_028A7C_RDREQ_POLICY                                       0xFFFFFF3F
 #define     V_028A7C_VGT_POLICY_LRU                                 0x00
 #define     V_028A7C_VGT_POLICY_STREAM                              0x01
+#define   S_028A7C_RDREQ_POLICY_VI(x)                                 (((x) & 0x1) << 6)
+#define   G_028A7C_RDREQ_POLICY_VI(x)                                 (((x) >> 6) & 0x1)
+#define   C_028A7C_RDREQ_POLICY_VI                                    0xFFFFFFBF
 #define   S_028A7C_ATC(x)                                             (((x) & 0x1) << 8)
 #define   G_028A7C_ATC(x)                                             (((x) >> 8) & 0x1)
 #define   C_028A7C_ATC                                                0xFFFFFEFF
@@ -9839,6 +10152,9 @@
 #define     V_028B6C_VGT_POLICY_BYPASS                              0x02
 /*     */
 /* VI */
+#define   S_028B6C_RDREQ_POLICY_VI(x)                                 (((x) & 0x1) << 15)
+#define   G_028B6C_RDREQ_POLICY_VI(x)                                 (((x) >> 15) & 0x1)
+#define   C_028B6C_RDREQ_POLICY_VI                                    0xFFFF7FFF
 #define   S_028B6C_DISTRIBUTION_MODE(x)                               (((x) & 0x03) << 17)
 #define   G_028B6C_DISTRIBUTION_MODE(x)                               (((x) >> 17) & 0x03)
 #define   C_028B6C_DISTRIBUTION_MODE                                  0xFFF9FFFF
@@ -10441,6 +10757,12 @@
 #define   S_028C3C_AA_MASK_X1Y1(x)                                    (((x) & 0xFFFF) << 16)
 #define   G_028C3C_AA_MASK_X1Y1(x)                                    (((x) >> 16) & 0xFFFF)
 #define   C_028C3C_AA_MASK_X1Y1                                       0x0000FFFF
+/* Stoney */
+#define R_028C40_PA_SC_SHADER_CONTROL                                   0x028C40
+#define   S_028C40_REALIGN_DQUADS_AFTER_N_WAVES(x)                    (((x) & 0x03) << 0)
+#define   G_028C40_REALIGN_DQUADS_AFTER_N_WAVES(x)                    (((x) >> 0) & 0x03)
+#define   C_028C40_REALIGN_DQUADS_AFTER_N_WAVES                       0xFFFFFFFC
+/*        */
 #define R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL                            0x028C58
 #define   S_028C58_VTX_REUSE_DEPTH(x)                                 (((x) & 0xFF) << 0)
 #define   G_028C58_VTX_REUSE_DEPTH(x)                                 (((x) >> 0) & 0xFF)
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -383,6 +383,8 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
      case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
      case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
         return 0;
+      case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+         return 32;
      }
      /* If we get here, we failed to handle a cap above */
      debug_printf("Unexpected fragment shader query %u\n", param);
@@ -441,6 +443,8 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
      case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
      case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
         return 0;
+      case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+         return 32;
      }
      /* If we get here, we failed to handle a cap above */
      debug_printf("Unexpected vertex shader query %u\n", param);
--- a/src/gallium/drivers/vc4/vc4_bufmgr.c
+++ b/src/gallium/drivers/vc4/vc4_bufmgr.c
@@ -36,6 +36,9 @@

 static bool dump_stats = false;

+static void
+vc4_bo_cache_free_all(struct vc4_bo_cache *cache);
+
 static void
 vc4_bo_dump_stats(struct vc4_screen *screen)
 {
@@ -136,6 +139,8 @@ vc4_bo_alloc(struct vc4_screen *screen, uint32_t size, const char *name)
        bo->name = name;
        bo->private = true;

+        bool cleared_and_retried = false;
+retry:
        if (!using_vc4_simulator) {
                struct drm_vc4_create_bo create;
                memset(&create, 0, sizeof(create));
@@ -157,8 +162,15 @@ vc4_bo_alloc(struct vc4_screen *screen, uint32_t size, const char *name)
                assert(create.size >= size);
        }
        if (ret != 0) {
-                fprintf(stderr, "create ioctl failure\n");
-                abort();
+                if (!list_empty(&screen->bo_cache.time_list) &&
+                    !cleared_and_retried) {
+                        cleared_and_retried = true;
+                        vc4_bo_cache_free_all(&screen->bo_cache);
+                        goto retry;
+                }
+
+                free(bo);
+                return NULL;
        }

        screen->bo_count++;
@@ -248,6 +260,18 @@ free_stale_bos(struct vc4_screen *screen, time_t time)
        }
 }

+static void
+vc4_bo_cache_free_all(struct vc4_bo_cache *cache)
+{
+        pipe_mutex_lock(cache->lock);
+        list_for_each_entry_safe(struct vc4_bo, bo, &cache->time_list,
+                                 time_list) {
+                vc4_bo_remove_from_cache(cache, bo);
+                vc4_bo_free(bo);
+        }
+        pipe_mutex_unlock(cache->lock);
+}
+
 void
 vc4_bo_last_unreference_locked_timed(struct vc4_bo *bo, time_t time)
 {
@@ -600,11 +624,7 @@ vc4_bufmgr_destroy(struct pipe_screen *pscreen)
        struct vc4_screen *screen = vc4_screen(pscreen);
        struct vc4_bo_cache *cache = &screen->bo_cache;

-        list_for_each_entry_safe(struct vc4_bo, bo, &cache->time_list,
-                                 time_list) {
-                vc4_bo_remove_from_cache(cache, bo);
-                vc4_bo_free(bo);
-        }
+        vc4_bo_cache_free_all(cache);

        if (dump_stats) {
                fprintf(stderr, "BO stats after screen destroy:\n");
--- a/src/gallium/drivers/vc4/vc4_opt_algebraic.c
+++ b/src/gallium/drivers/vc4/vc4_opt_algebraic.c
@@ -143,6 +143,8 @@ qir_opt_algebraic(struct vc4_compile *c)
                case QOP_SEL_X_Y_ZC:
                case QOP_SEL_X_Y_NS:
                case QOP_SEL_X_Y_NC:
+                case QOP_SEL_X_Y_CS:
+                case QOP_SEL_X_Y_CC:
                        if (is_zero(c, inst->src[1])) {
                                /* Replace references to a 0 uniform value
                                 * with the SEL_X_0 equivalent.
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1055,6 +1055,10 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
                qir_SF(c, qir_SUB(c, src[0], src[1]));
                *dest = qir_SEL_X_0_NC(c, qir_uniform_ui(c, ~0));
                break;
+        case nir_op_uge:
+                qir_SF(c, qir_SUB(c, src[0], src[1]));
+                *dest = qir_SEL_X_0_CC(c, qir_uniform_ui(c, ~0));
+                break;
        case nir_op_ilt:
                qir_SF(c, qir_SUB(c, src[0], src[1]));
                *dest = qir_SEL_X_0_NS(c, qir_uniform_ui(c, ~0));
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -62,10 +62,14 @@ static const struct qir_op_info qir_op_info[] = {
        [QOP_SEL_X_0_NC] = { "fsel_x_0_nc", 1, 1, false, true },
        [QOP_SEL_X_0_ZS] = { "fsel_x_0_zs", 1, 1, false, true },
        [QOP_SEL_X_0_ZC] = { "fsel_x_0_zc", 1, 1, false, true },
+        [QOP_SEL_X_0_CS] = { "fsel_x_0_cs", 1, 1, false, true },
+        [QOP_SEL_X_0_CC] = { "fsel_x_0_cc", 1, 1, false, true },
        [QOP_SEL_X_Y_NS] = { "fsel_x_y_ns", 1, 2, false, true },
        [QOP_SEL_X_Y_NC] = { "fsel_x_y_nc", 1, 2, false, true },
        [QOP_SEL_X_Y_ZS] = { "fsel_x_y_zs", 1, 2, false, true },
        [QOP_SEL_X_Y_ZC] = { "fsel_x_y_zc", 1, 2, false, true },
+        [QOP_SEL_X_Y_CS] = { "fsel_x_y_cs", 1, 2, false, true },
+        [QOP_SEL_X_Y_CC] = { "fsel_x_y_cc", 1, 2, false, true },

        [QOP_RCP] = { "rcp", 1, 1, false, true },
        [QOP_RSQ] = { "rsq", 1, 1, false, true },
@@ -193,10 +197,14 @@ qir_depends_on_flags(struct qinst *inst)
        case QOP_SEL_X_0_NC:
        case QOP_SEL_X_0_ZS:
        case QOP_SEL_X_0_ZC:
+        case QOP_SEL_X_0_CS:
+        case QOP_SEL_X_0_CC:
        case QOP_SEL_X_Y_NS:
        case QOP_SEL_X_Y_NC:
        case QOP_SEL_X_Y_ZS:
        case QOP_SEL_X_Y_ZC:
+        case QOP_SEL_X_Y_CS:
+        case QOP_SEL_X_Y_CC:
                return true;
        default:
                return false;
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -91,11 +91,15 @@ enum qop {
        QOP_SEL_X_0_ZC,
        QOP_SEL_X_0_NS,
        QOP_SEL_X_0_NC,
+        QOP_SEL_X_0_CS,
+        QOP_SEL_X_0_CC,
        /* Selects the src[0] if the ns flag bit is set, otherwise src[1]. */
        QOP_SEL_X_Y_ZS,
        QOP_SEL_X_Y_ZC,
        QOP_SEL_X_Y_NS,
        QOP_SEL_X_Y_NC,
+        QOP_SEL_X_Y_CS,
+        QOP_SEL_X_Y_CC,

        QOP_FTOI,
        QOP_ITOF,
@@ -570,10 +574,14 @@ QIR_ALU1(SEL_X_0_ZS)
 QIR_ALU1(SEL_X_0_ZC)
 QIR_ALU1(SEL_X_0_NS)
 QIR_ALU1(SEL_X_0_NC)
+QIR_ALU1(SEL_X_0_CS)
+QIR_ALU1(SEL_X_0_CC)
 QIR_ALU2(SEL_X_Y_ZS)
 QIR_ALU2(SEL_X_Y_ZC)
 QIR_ALU2(SEL_X_Y_NS)
 QIR_ALU2(SEL_X_Y_NC)
+QIR_ALU2(SEL_X_Y_CS)
+QIR_ALU2(SEL_X_Y_CC)
 QIR_ALU2(FMIN)
 QIR_ALU2(FMAX)
 QIR_ALU2(FMINABS)
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -271,6 +271,8 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
                case QOP_SEL_X_0_ZC:
                case QOP_SEL_X_0_NS:
                case QOP_SEL_X_0_NC:
+                case QOP_SEL_X_0_CS:
+                case QOP_SEL_X_0_CC:
                        queue(c, qpu_a_MOV(dst, src[0]));
                        set_last_cond_add(c, qinst->op - QOP_SEL_X_0_ZS +
                                          QPU_COND_ZS);
@@ -284,6 +286,8 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
                case QOP_SEL_X_Y_ZC:
                case QOP_SEL_X_Y_NS:
                case QOP_SEL_X_Y_NC:
+                case QOP_SEL_X_Y_CS:
+                case QOP_SEL_X_Y_CC:
                        queue(c, qpu_a_MOV(dst, src[0]));
                        set_last_cond_add(c, qinst->op - QOP_SEL_X_Y_ZS +
                                          QPU_COND_ZS);
--- a/src/gallium/drivers/vc4/vc4_resource.c
+++ b/src/gallium/drivers/vc4/vc4_resource.c
@@ -35,11 +35,12 @@

 static bool miptree_debug = false;

-static void
+static bool
 vc4_resource_bo_alloc(struct vc4_resource *rsc)
 {
        struct pipe_resource *prsc = &rsc->base.b;
        struct pipe_screen *pscreen = prsc->screen;
+        struct vc4_bo *bo;

        if (miptree_debug) {
                fprintf(stderr, "alloc %p: size %d + offset %d -> %d\n",
@@ -51,12 +52,18 @@ vc4_resource_bo_alloc(struct vc4_resource *rsc)
                        rsc->cube_map_stride * (prsc->array_size - 1));
        }

-        vc4_bo_unreference(&rsc->bo);
-        rsc->bo = vc4_bo_alloc(vc4_screen(pscreen),
-                               rsc->slices[0].offset +
-                               rsc->slices[0].size +
-                               rsc->cube_map_stride * (prsc->array_size - 1),
-                               "resource");
+        bo = vc4_bo_alloc(vc4_screen(pscreen),
+                          rsc->slices[0].offset +
+                          rsc->slices[0].size +
+                          rsc->cube_map_stride * (prsc->array_size - 1),
+                          "resource");
+        if (bo) {
+                vc4_bo_unreference(&rsc->bo);
+                rsc->bo = bo;
+                return true;
+        } else {
+                return false;
+        }
 }

 static void
@@ -101,21 +108,27 @@ vc4_resource_transfer_map(struct pipe_context *pctx,
        char *buf;

        if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
-                vc4_resource_bo_alloc(rsc);
+                if (vc4_resource_bo_alloc(rsc)) {

-                /* If it might be bound as one of our vertex buffers, make
-                 * sure we re-emit vertex buffer state.
-                 */
-                if (prsc->bind & PIPE_BIND_VERTEX_BUFFER)
-                        vc4->dirty |= VC4_DIRTY_VTXBUF;
+                        /* If it might be bound as one of our vertex buffers,
+                         * make sure we re-emit vertex buffer state.
+                         */
+                        if (prsc->bind & PIPE_BIND_VERTEX_BUFFER)
+                                vc4->dirty |= VC4_DIRTY_VTXBUF;
+                } else {
+                        /* If we failed to reallocate, flush everything so
+                         * that we don't violate any syncing requirements.
+                         */
+                        vc4_flush(pctx);
+                }
        } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
                if (vc4_cl_references_bo(pctx, rsc->bo)) {
                        if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
                            prsc->last_level == 0 &&
                            prsc->width0 == box->width &&
                            prsc->height0 == box->height &&
-                            prsc->depth0 == box->depth) {
-                                vc4_resource_bo_alloc(rsc);
+                            prsc->depth0 == box->depth &&
+                            vc4_resource_bo_alloc(rsc)) {
                                if (prsc->bind & PIPE_BIND_VERTEX_BUFFER)
                                        vc4->dirty |= VC4_DIRTY_VTXBUF;
                        } else {
@@ -389,8 +402,7 @@ vc4_resource_create(struct pipe_screen *pscreen,
                rsc->vc4_format = get_resource_texture_format(prsc);

        vc4_setup_slices(rsc);
-        vc4_resource_bo_alloc(rsc);
-        if (!rsc->bo)
+        if (!vc4_resource_bo_alloc(rsc))
                goto fail;

        return prsc;
--- a/src/gallium/drivers/vc4/vc4_screen.c
+++ b/src/gallium/drivers/vc4/vc4_screen.c
@@ -334,6 +334,8 @@ vc4_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
                return VC4_MAX_TEXTURE_SAMPLERS;
        case PIPE_SHADER_CAP_PREFERRED_IR:
                return PIPE_SHADER_IR_TGSI;
+	case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+		return 32;
        default:
                fprintf(stderr, "unknown shader param %d\n", param);
                return 0;
--- a/src/gallium/drivers/vc4/vc4_state.c
+++ b/src/gallium/drivers/vc4/vc4_state.c
@@ -581,6 +581,10 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
                tmpl.last_level = cso->u.tex.last_level - cso->u.tex.first_level;

                prsc = vc4_resource_create(pctx->screen, &tmpl);
+                if (!prsc) {
+                        free(so);
+                        return NULL;
+                }
                rsc = vc4_resource(prsc);
                clone = vc4_resource(prsc);
                clone->shadow_parent = &shadow_parent->base.b;
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -674,7 +674,8 @@ enum pipe_shader_cap
   PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED, /* all rounding modes */
   PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED,
   PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED,
-   PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE
+   PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE,
+   PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT,
 };

 /**
--- a/src/gallium/state_trackers/dri/dri2.c
+++ b/src/gallium/state_trackers/dri/dri2.c
@@ -188,10 +188,10 @@ dri2_drawable_get_buffers(struct dri_drawable *drawable,
       * may occur as the stvis->color_format.
       */
      switch(format) {
-      case PIPE_FORMAT_B8G8R8A8_UNORM:
+      case PIPE_FORMAT_BGRA8888_UNORM:
 	 depth = 32;
 	 break;
-      case PIPE_FORMAT_B8G8R8X8_UNORM:
+      case PIPE_FORMAT_BGRX8888_UNORM:
 	 depth = 24;
 	 break;
      case PIPE_FORMAT_B5G6R5_UNORM:
@@ -261,13 +261,13 @@ dri_image_drawable_get_buffers(struct dri_drawable *drawable,
      case PIPE_FORMAT_B5G6R5_UNORM:
         image_format = __DRI_IMAGE_FORMAT_RGB565;
         break;
-      case PIPE_FORMAT_B8G8R8X8_UNORM:
+      case PIPE_FORMAT_BGRX8888_UNORM:
         image_format = __DRI_IMAGE_FORMAT_XRGB8888;
         break;
-      case PIPE_FORMAT_B8G8R8A8_UNORM:
+      case PIPE_FORMAT_BGRA8888_UNORM:
         image_format = __DRI_IMAGE_FORMAT_ARGB8888;
         break;
-      case PIPE_FORMAT_R8G8B8A8_UNORM:
+      case PIPE_FORMAT_RGBA8888_UNORM:
         image_format = __DRI_IMAGE_FORMAT_ABGR8888;
         break;
      default:
@@ -314,10 +314,10 @@ dri2_allocate_buffer(__DRIscreen *sPriv,

   switch (format) {
      case 32:
-         pf = PIPE_FORMAT_B8G8R8A8_UNORM;
+         pf = PIPE_FORMAT_BGRA8888_UNORM;
         break;
      case 24:
-         pf = PIPE_FORMAT_B8G8R8X8_UNORM;
+         pf = PIPE_FORMAT_BGRX8888_UNORM;
         break;
      case 16:
         pf = PIPE_FORMAT_Z16_UNORM;
@@ -724,13 +724,13 @@ dri2_create_image_from_winsys(__DRIscreen *_screen,
      pf = PIPE_FORMAT_B5G6R5_UNORM;
      break;
   case __DRI_IMAGE_FORMAT_XRGB8888:
-      pf = PIPE_FORMAT_B8G8R8X8_UNORM;
+      pf = PIPE_FORMAT_BGRX8888_UNORM;
      break;
   case __DRI_IMAGE_FORMAT_ARGB8888:
-      pf = PIPE_FORMAT_B8G8R8A8_UNORM;
+      pf = PIPE_FORMAT_BGRA8888_UNORM;
      break;
   case __DRI_IMAGE_FORMAT_ABGR8888:
-      pf = PIPE_FORMAT_R8G8B8A8_UNORM;
+      pf = PIPE_FORMAT_RGBA8888_UNORM;
      break;
   default:
      pf = PIPE_FORMAT_NONE;
@@ -845,13 +845,13 @@ dri2_create_image(__DRIscreen *_screen,
      pf = PIPE_FORMAT_B5G6R5_UNORM;
      break;
   case __DRI_IMAGE_FORMAT_XRGB8888:
-      pf = PIPE_FORMAT_B8G8R8X8_UNORM;
+      pf = PIPE_FORMAT_BGRX8888_UNORM;
      break;
   case __DRI_IMAGE_FORMAT_ARGB8888:
-      pf = PIPE_FORMAT_B8G8R8A8_UNORM;
+      pf = PIPE_FORMAT_BGRA8888_UNORM;
      break;
   case __DRI_IMAGE_FORMAT_ABGR8888:
-      pf = PIPE_FORMAT_R8G8B8A8_UNORM;
+      pf = PIPE_FORMAT_RGBA8888_UNORM;
      break;
   default:
      pf = PIPE_FORMAT_NONE;
@@ -1293,6 +1293,7 @@ dri2_load_opencl_interop(struct dri_screen *screen)
 }

 struct dri2_fence {
+   struct dri_screen *driscreen;
   struct pipe_fence_handle *pipe_fence;
   void *cl_event;
 };
@@ -1313,6 +1314,7 @@ dri2_create_fence(__DRIcontext *_ctx)
      return NULL;
   }

+   fence->driscreen = dri_screen(_ctx->driScreenPriv);
   return fence;
 }

@@ -1336,6 +1338,7 @@ dri2_get_fence_from_cl_event(__DRIscreen *_screen, intptr_t cl_event)
      return NULL;
   }

+   fence->driscreen = driscreen;
   return fence;
 }

@@ -1360,9 +1363,9 @@ static GLboolean
 dri2_client_wait_sync(__DRIcontext *_ctx, void *_fence, unsigned flags,
                      uint64_t timeout)
 {
-   struct dri_screen *driscreen = dri_screen(_ctx->driScreenPriv);
-   struct pipe_screen *screen = driscreen->base.screen;
   struct dri2_fence *fence = (struct dri2_fence*)_fence;
+   struct dri_screen *driscreen = fence->driscreen;
+   struct pipe_screen *screen = driscreen->base.screen;

   /* No need to flush. The context was flushed when the fence was created. */

--- a/src/gallium/state_trackers/dri/dri_drawable.c
+++ b/src/gallium/state_trackers/dri/dri_drawable.c
@@ -231,11 +231,11 @@ dri_set_tex_buffer2(__DRIcontext *pDRICtx, GLint target,
      if (format == __DRI_TEXTURE_FORMAT_RGB)  {
         /* only need to cover the formats recognized by dri_fill_st_visual */
         switch (internal_format) {
-         case PIPE_FORMAT_B8G8R8A8_UNORM:
-            internal_format = PIPE_FORMAT_B8G8R8X8_UNORM;
+         case PIPE_FORMAT_BGRA8888_UNORM:
+            internal_format = PIPE_FORMAT_BGRX8888_UNORM;
            break;
-         case PIPE_FORMAT_A8R8G8B8_UNORM:
-            internal_format = PIPE_FORMAT_X8R8G8B8_UNORM;
+         case PIPE_FORMAT_ARGB8888_UNORM:
+            internal_format = PIPE_FORMAT_XRGB8888_UNORM;
            break;
         default:
            break;
--- a/src/gallium/state_trackers/omx/vid_dec_h264.c
+++ b/src/gallium/state_trackers/omx/vid_dec_h264.c
@@ -753,10 +753,14 @@ static void slice_header(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp,
         priv->codec_data.h264.delta_pic_order_cnt_bottom = delta_pic_order_cnt_bottom;
      }

-      priv->picture.h264.field_order_cnt[0] = pic_order_cnt_msb + pic_order_cnt_lsb;
-      priv->picture.h264.field_order_cnt[1] = pic_order_cnt_msb + pic_order_cnt_lsb;
-      if (!priv->picture.h264.field_pic_flag)
-         priv->picture.h264.field_order_cnt[1] += priv->codec_data.h264.delta_pic_order_cnt_bottom;
+      if (!priv->picture.h264.field_pic_flag) {
+         priv->picture.h264.field_order_cnt[0] = pic_order_cnt_msb + pic_order_cnt_lsb;
+         priv->picture.h264.field_order_cnt[1] = priv->picture.h264.field_order_cnt [0] +
+                                          priv->codec_data.h264.delta_pic_order_cnt_bottom;
+      } else if (!priv->picture.h264.bottom_field_flag)
+         priv->picture.h264.field_order_cnt[0] = pic_order_cnt_msb + pic_order_cnt_lsb;
+      else
+         priv->picture.h264.field_order_cnt[1] = pic_order_cnt_msb + pic_order_cnt_lsb;

   } else if (sps->pic_order_cnt_type == 1) {
      unsigned MaxFrameNum = 1 << (sps->log2_max_frame_num_minus4 + 4);
--- a/src/gallium/state_trackers/va/buffer.c
+++ b/src/gallium/state_trackers/va/buffer.c
@@ -73,6 +73,9 @@ vlVaBufferSetNumElements(VADriverContextP ctx, VABufferID buf_id,
      return VA_STATUS_ERROR_INVALID_CONTEXT;

   buf = handle_table_get(VL_VA_DRIVER(ctx)->htab, buf_id);
+   if (!buf)
+      return VA_STATUS_ERROR_INVALID_BUFFER;
+
   buf->data = REALLOC(buf->data, buf->size * buf->num_elements,
                       buf->size * num_elements);
   buf->num_elements = num_elements;
@@ -91,6 +94,9 @@ vlVaMapBuffer(VADriverContextP ctx, VABufferID buf_id, void **pbuff)
   if (!ctx)
      return VA_STATUS_ERROR_INVALID_CONTEXT;

+   if (!pbuff)
+      return VA_STATUS_ERROR_INVALID_PARAMETER;
+
   buf = handle_table_get(VL_VA_DRIVER(ctx)->htab, buf_id);
   if (!buf)
      return VA_STATUS_ERROR_INVALID_BUFFER;
--- a/src/gallium/state_trackers/va/image.c
+++ b/src/gallium/state_trackers/va/image.c
@@ -116,7 +116,7 @@ vlVaCreateImage(VADriverContextP ctx, VAImageFormat *format, int width, int heig
   img->width = width;
   img->height = height;
   w = align(width, 2);
-   h = align(width, 2);
+   h = align(height, 2);

   switch (format->fourcc) {
   case VA_FOURCC('N','V','1','2'):
@@ -332,13 +332,20 @@ vlVaPutImage(VADriverContextP ctx, VASurfaceID surface, VAImageID image,
   if (format == PIPE_FORMAT_NONE)
      return VA_STATUS_ERROR_OPERATION_FAILED;

-   if (surf->buffer == NULL || format != surf->buffer->buffer_format) {
-      if (surf->buffer)
-         surf->buffer->destroy(surf->buffer);
+   if (format != surf->buffer->buffer_format) {
+      struct pipe_video_buffer *tmp_buf;
+      enum pipe_format old_surf_format = surf->templat.buffer_format;
+
      surf->templat.buffer_format = format;
-      surf->buffer = drv->pipe->create_video_buffer(drv->pipe, &surf->templat);
-      if (!surf->buffer)
-         return VA_STATUS_ERROR_ALLOCATION_FAILED;
+      tmp_buf = drv->pipe->create_video_buffer(drv->pipe, &surf->templat);
+
+      if (!tmp_buf) {
+          surf->templat.buffer_format = old_surf_format;
+          return VA_STATUS_ERROR_ALLOCATION_FAILED;
+      }
+
+      surf->buffer->destroy(surf->buffer);
+      surf->buffer = tmp_buf;
   }

   views = surf->buffer->get_sampler_view_planes(surf->buffer);
--- a/src/gallium/state_trackers/va/picture.c
+++ b/src/gallium/state_trackers/va/picture.c
@@ -58,7 +58,7 @@ vlVaBeginPicture(VADriverContextP ctx, VAContextID context_id, VASurfaceID rende
      return VA_STATUS_ERROR_INVALID_SURFACE;

   context->target = surf->buffer;
-   context->decoder->begin_frame(context->decoder, context->target, NULL);
+   context->decoder->begin_frame(context->decoder, context->target, &context->desc.base);

   return VA_STATUS_SUCCESS;
 }
@@ -500,8 +500,10 @@ handleVASliceDataBufferType(vlVaContext *context, vlVaBuffer *buf)
          bufHasStartcode(buf, 0x0000010b, 32))
         break;

+      if (context->decoder->profile == PIPE_VIDEO_PROFILE_VC1_ADVANCED) {
         buffers[num_buffers] = (void *const)&start_code_vc1;
         sizes[num_buffers++] = sizeof(start_code_vc1);
+      }
      break;
   case PIPE_VIDEO_FORMAT_MPEG4:
      if (bufHasStartcode(buf, 0x000001, 24))
@@ -517,7 +519,7 @@ handleVASliceDataBufferType(vlVaContext *context, vlVaBuffer *buf)
   buffers[num_buffers] = buf->data;
   sizes[num_buffers] = buf->size;
   ++num_buffers;
-   context->decoder->decode_bitstream(context->decoder, context->target, NULL,
+   context->decoder->decode_bitstream(context->decoder, context->target, &context->desc.base,
      num_buffers, (const void * const*)buffers, sizes);
 }

--- a/src/gallium/targets/opencl/Makefile.am
+++ b/src/gallium/targets/opencl/Makefile.am
@@ -35,7 +35,8 @@ lib@OPENCL_LIBNAME@_la_LIBADD = \
 	-lclangEdit \
 	-lclangLex \
 	-lclangBasic \
-	$(LLVM_LIBS)
+	$(LLVM_LIBS) \
+	$(PTHREAD_LIBS)

 nodist_EXTRA_lib@OPENCL_LIBNAME@_la_SOURCES = dummy.cpp
 lib@OPENCL_LIBNAME@_la_SOURCES =
--- a/src/gallium/targets/osmesa/osmesa.def
+++ b/src/gallium/targets/osmesa/osmesa.def
@@ -14,3 +14,340 @@ EXPORTS
 	OSMesaGetProcAddress
 	OSMesaColorClamp
 	OSMesaPostprocess
+	glAccum
+	glAlphaFunc
+	glAreTexturesResident
+	glArrayElement
+	glBegin
+	glBindTexture
+	glBitmap
+	glBlendFunc
+	glCallList
+	glCallLists
+	glClear
+	glClearAccum
+	glClearColor
+	glClearDepth
+	glClearIndex
+	glClearStencil
+	glClipPlane
+	glColor3b
+	glColor3bv
+	glColor3d
+	glColor3dv
+	glColor3f
+	glColor3fv
+	glColor3i
+	glColor3iv
+	glColor3s
+	glColor3sv
+	glColor3ub
+	glColor3ubv
+	glColor3ui
+	glColor3uiv
+	glColor3us
+	glColor3usv
+	glColor4b
+	glColor4bv
+	glColor4d
+	glColor4dv
+	glColor4f
+	glColor4fv
+	glColor4i
+	glColor4iv
+	glColor4s
+	glColor4sv
+	glColor4ub
+	glColor4ubv
+	glColor4ui
+	glColor4uiv
+	glColor4us
+	glColor4usv
+	glColorMask
+	glColorMaterial
+	glColorPointer
+	glCopyPixels
+	glCopyTexImage1D
+	glCopyTexImage2D
+	glCopyTexSubImage1D
+	glCopyTexSubImage2D
+	glCullFace
+;	glDebugEntry
+	glDeleteLists
+	glDeleteTextures
+	glDepthFunc
+	glDepthMask
+	glDepthRange
+	glDisable
+	glDisableClientState
+	glDrawArrays
+	glDrawBuffer
+	glDrawElements
+	glDrawPixels
+	glEdgeFlag
+	glEdgeFlagPointer
+	glEdgeFlagv
+	glEnable
+	glEnableClientState
+	glEnd
+	glEndList
+	glEvalCoord1d
+	glEvalCoord1dv
+	glEvalCoord1f
+	glEvalCoord1fv
+	glEvalCoord2d
+	glEvalCoord2dv
+	glEvalCoord2f
+	glEvalCoord2fv
+	glEvalMesh1
+	glEvalMesh2
+	glEvalPoint1
+	glEvalPoint2
+	glFeedbackBuffer
+	glFinish
+	glFlush
+	glFogf
+	glFogfv
+	glFogi
+	glFogiv
+	glFrontFace
+	glFrustum
+	glGenLists
+	glGenTextures
+	glGetBooleanv
+	glGetClipPlane
+	glGetDoublev
+	glGetError
+	glGetFloatv
+	glGetIntegerv
+	glGetLightfv
+	glGetLightiv
+	glGetMapdv
+	glGetMapfv
+	glGetMapiv
+	glGetMaterialfv
+	glGetMaterialiv
+	glGetPixelMapfv
+	glGetPixelMapuiv
+	glGetPixelMapusv
+	glGetPointerv
+	glGetPolygonStipple
+	glGetString
+	glGetTexEnvfv
+	glGetTexEnviv
+	glGetTexGendv
+	glGetTexGenfv
+	glGetTexGeniv
+	glGetTexImage
+	glGetTexLevelParameterfv
+	glGetTexLevelParameteriv
+	glGetTexParameterfv
+	glGetTexParameteriv
+	glHint
+	glIndexMask
+	glIndexPointer
+	glIndexd
+	glIndexdv
+	glIndexf
+	glIndexfv
+	glIndexi
+	glIndexiv
+	glIndexs
+	glIndexsv
+	glIndexub
+	glIndexubv
+	glInitNames
+	glInterleavedArrays
+	glIsEnabled
+	glIsList
+	glIsTexture
+	glLightModelf
+	glLightModelfv
+	glLightModeli
+	glLightModeliv
+	glLightf
+	glLightfv
+	glLighti
+	glLightiv
+	glLineStipple
+	glLineWidth
+	glListBase
+	glLoadIdentity
+	glLoadMatrixd
+	glLoadMatrixf
+	glLoadName
+	glLogicOp
+	glMap1d
+	glMap1f
+	glMap2d
+	glMap2f
+	glMapGrid1d
+	glMapGrid1f
+	glMapGrid2d
+	glMapGrid2f
+	glMaterialf
+	glMaterialfv
+	glMateriali
+	glMaterialiv
+	glMatrixMode
+	glMultMatrixd
+	glMultMatrixf
+	glNewList
+	glNormal3b
+	glNormal3bv
+	glNormal3d
+	glNormal3dv
+	glNormal3f
+	glNormal3fv
+	glNormal3i
+	glNormal3iv
+	glNormal3s
+	glNormal3sv
+	glNormalPointer
+	glOrtho
+	glPassThrough
+	glPixelMapfv
+	glPixelMapuiv
+	glPixelMapusv
+	glPixelStoref
+	glPixelStorei
+	glPixelTransferf
+	glPixelTransferi
+	glPixelZoom
+	glPointSize
+	glPolygonMode
+	glPolygonOffset
+	glPolygonStipple
+	glPopAttrib
+	glPopClientAttrib
+	glPopMatrix
+	glPopName
+	glPrioritizeTextures
+	glPushAttrib
+	glPushClientAttrib
+	glPushMatrix
+	glPushName
+	glRasterPos2d
+	glRasterPos2dv
+	glRasterPos2f
+	glRasterPos2fv
+	glRasterPos2i
+	glRasterPos2iv
+	glRasterPos2s
+	glRasterPos2sv
+	glRasterPos3d
+	glRasterPos3dv
+	glRasterPos3f
+	glRasterPos3fv
+	glRasterPos3i
+	glRasterPos3iv
+	glRasterPos3s
+	glRasterPos3sv
+	glRasterPos4d
+	glRasterPos4dv
+	glRasterPos4f
+	glRasterPos4fv
+	glRasterPos4i
+	glRasterPos4iv
+	glRasterPos4s
+	glRasterPos4sv
+	glReadBuffer
+	glReadPixels
+	glRectd
+	glRectdv
+	glRectf
+	glRectfv
+	glRecti
+	glRectiv
+	glRects
+	glRectsv
+	glRenderMode
+	glRotated
+	glRotatef
+	glScaled
+	glScalef
+	glScissor
+	glSelectBuffer
+	glShadeModel
+	glStencilFunc
+	glStencilMask
+	glStencilOp
+	glTexCoord1d
+	glTexCoord1dv
+	glTexCoord1f
+	glTexCoord1fv
+	glTexCoord1i
+	glTexCoord1iv
+	glTexCoord1s
+	glTexCoord1sv
+	glTexCoord2d
+	glTexCoord2dv
+	glTexCoord2f
+	glTexCoord2fv
+	glTexCoord2i
+	glTexCoord2iv
+	glTexCoord2s
+	glTexCoord2sv
+	glTexCoord3d
+	glTexCoord3dv
+	glTexCoord3f
+	glTexCoord3fv
+	glTexCoord3i
+	glTexCoord3iv
+	glTexCoord3s
+	glTexCoord3sv
+	glTexCoord4d
+	glTexCoord4dv
+	glTexCoord4f
+	glTexCoord4fv
+	glTexCoord4i
+	glTexCoord4iv
+	glTexCoord4s
+	glTexCoord4sv
+	glTexCoordPointer
+	glTexEnvf
+	glTexEnvfv
+	glTexEnvi
+	glTexEnviv
+	glTexGend
+	glTexGendv
+	glTexGenf
+	glTexGenfv
+	glTexGeni
+	glTexGeniv
+	glTexImage1D
+	glTexImage2D
+	glTexParameterf
+	glTexParameterfv
+	glTexParameteri
+	glTexParameteriv
+	glTexSubImage1D
+	glTexSubImage2D
+	glTranslated
+	glTranslatef
+	glVertex2d
+	glVertex2dv
+	glVertex2f
+	glVertex2fv
+	glVertex2i
+	glVertex2iv
+	glVertex2s
+	glVertex2sv
+	glVertex3d
+	glVertex3dv
+	glVertex3f
+	glVertex3fv
+	glVertex3i
+	glVertex3iv
+	glVertex3s
+	glVertex3sv
+	glVertex4d
+	glVertex4dv
+	glVertex4f
+	glVertex4fv
+	glVertex4i
+	glVertex4iv
+	glVertex4s
+	glVertex4sv
+	glVertexPointer
+	glViewport
--- a/src/gallium/targets/osmesa/osmesa.mingw.def
+++ b/src/gallium/targets/osmesa/osmesa.mingw.def
@@ -11,3 +11,340 @@ EXPORTS
 	OSMesaGetProcAddress = OSMesaGetProcAddress@4
 	OSMesaColorClamp = OSMesaColorClamp@4
 	OSMesaPostprocess = OSMesaPostprocess@12
+	glAccum = glAccum@8
+	glAlphaFunc = glAlphaFunc@8
+	glAreTexturesResident = glAreTexturesResident@12
+	glArrayElement = glArrayElement@4
+	glBegin = glBegin@4
+	glBindTexture = glBindTexture@8
+	glBitmap = glBitmap@28
+	glBlendFunc = glBlendFunc@8
+	glCallList = glCallList@4
+	glCallLists = glCallLists@12
+	glClear = glClear@4
+	glClearAccum = glClearAccum@16
+	glClearColor = glClearColor@16
+	glClearDepth = glClearDepth@8
+	glClearIndex = glClearIndex@4
+	glClearStencil = glClearStencil@4
+	glClipPlane = glClipPlane@8
+	glColor3b = glColor3b@12
+	glColor3bv = glColor3bv@4
+	glColor3d = glColor3d@24
+	glColor3dv = glColor3dv@4
+	glColor3f = glColor3f@12
+	glColor3fv = glColor3fv@4
+	glColor3i = glColor3i@12
+	glColor3iv = glColor3iv@4
+	glColor3s = glColor3s@12
+	glColor3sv = glColor3sv@4
+	glColor3ub = glColor3ub@12
+	glColor3ubv = glColor3ubv@4
+	glColor3ui = glColor3ui@12
+	glColor3uiv = glColor3uiv@4
+	glColor3us = glColor3us@12
+	glColor3usv = glColor3usv@4
+	glColor4b = glColor4b@16
+	glColor4bv = glColor4bv@4
+	glColor4d = glColor4d@32
+	glColor4dv = glColor4dv@4
+	glColor4f = glColor4f@16
+	glColor4fv = glColor4fv@4
+	glColor4i = glColor4i@16
+	glColor4iv = glColor4iv@4
+	glColor4s = glColor4s@16
+	glColor4sv = glColor4sv@4
+	glColor4ub = glColor4ub@16
+	glColor4ubv = glColor4ubv@4
+	glColor4ui = glColor4ui@16
+	glColor4uiv = glColor4uiv@4
+	glColor4us = glColor4us@16
+	glColor4usv = glColor4usv@4
+	glColorMask = glColorMask@16
+	glColorMaterial = glColorMaterial@8
+	glColorPointer = glColorPointer@16
+	glCopyPixels = glCopyPixels@20
+	glCopyTexImage1D = glCopyTexImage1D@28
+	glCopyTexImage2D = glCopyTexImage2D@32
+	glCopyTexSubImage1D = glCopyTexSubImage1D@24
+	glCopyTexSubImage2D = glCopyTexSubImage2D@32
+	glCullFace = glCullFace@4
+;	glDebugEntry = glDebugEntry@8
+	glDeleteLists = glDeleteLists@8
+	glDeleteTextures = glDeleteTextures@8
+	glDepthFunc = glDepthFunc@4
+	glDepthMask = glDepthMask@4
+	glDepthRange = glDepthRange@16
+	glDisable = glDisable@4
+	glDisableClientState = glDisableClientState@4
+	glDrawArrays = glDrawArrays@12
+	glDrawBuffer = glDrawBuffer@4
+	glDrawElements = glDrawElements@16
+	glDrawPixels = glDrawPixels@20
+	glEdgeFlag = glEdgeFlag@4
+	glEdgeFlagPointer = glEdgeFlagPointer@8
+	glEdgeFlagv = glEdgeFlagv@4
+	glEnable = glEnable@4
+	glEnableClientState = glEnableClientState@4
+	glEnd = glEnd@0
+	glEndList = glEndList@0
+	glEvalCoord1d = glEvalCoord1d@8
+	glEvalCoord1dv = glEvalCoord1dv@4
+	glEvalCoord1f = glEvalCoord1f@4
+	glEvalCoord1fv = glEvalCoord1fv@4
+	glEvalCoord2d = glEvalCoord2d@16
+	glEvalCoord2dv = glEvalCoord2dv@4
+	glEvalCoord2f = glEvalCoord2f@8
+	glEvalCoord2fv = glEvalCoord2fv@4
+	glEvalMesh1 = glEvalMesh1@12
+	glEvalMesh2 = glEvalMesh2@20
+	glEvalPoint1 = glEvalPoint1@4
+	glEvalPoint2 = glEvalPoint2@8
+	glFeedbackBuffer = glFeedbackBuffer@12
+	glFinish = glFinish@0
+	glFlush = glFlush@0
+	glFogf = glFogf@8
+	glFogfv = glFogfv@8
+	glFogi = glFogi@8
+	glFogiv = glFogiv@8
+	glFrontFace = glFrontFace@4
+	glFrustum = glFrustum@48
+	glGenLists = glGenLists@4
+	glGenTextures = glGenTextures@8
+	glGetBooleanv = glGetBooleanv@8
+	glGetClipPlane = glGetClipPlane@8
+	glGetDoublev = glGetDoublev@8
+	glGetError = glGetError@0
+	glGetFloatv = glGetFloatv@8
+	glGetIntegerv = glGetIntegerv@8
+	glGetLightfv = glGetLightfv@12
+	glGetLightiv = glGetLightiv@12
+	glGetMapdv = glGetMapdv@12
+	glGetMapfv = glGetMapfv@12
+	glGetMapiv = glGetMapiv@12
+	glGetMaterialfv = glGetMaterialfv@12
+	glGetMaterialiv = glGetMaterialiv@12
+	glGetPixelMapfv = glGetPixelMapfv@8
+	glGetPixelMapuiv = glGetPixelMapuiv@8
+	glGetPixelMapusv = glGetPixelMapusv@8
+	glGetPointerv = glGetPointerv@8
+	glGetPolygonStipple = glGetPolygonStipple@4
+	glGetString = glGetString@4
+	glGetTexEnvfv = glGetTexEnvfv@12
+	glGetTexEnviv = glGetTexEnviv@12
+	glGetTexGendv = glGetTexGendv@12
+	glGetTexGenfv = glGetTexGenfv@12
+	glGetTexGeniv = glGetTexGeniv@12
+	glGetTexImage = glGetTexImage@20
+	glGetTexLevelParameterfv = glGetTexLevelParameterfv@16
+	glGetTexLevelParameteriv = glGetTexLevelParameteriv@16
+	glGetTexParameterfv = glGetTexParameterfv@12
+	glGetTexParameteriv = glGetTexParameteriv@12
+	glHint = glHint@8
+	glIndexMask = glIndexMask@4
+	glIndexPointer = glIndexPointer@12
+	glIndexd = glIndexd@8
+	glIndexdv = glIndexdv@4
+	glIndexf = glIndexf@4
+	glIndexfv = glIndexfv@4
+	glIndexi = glIndexi@4
+	glIndexiv = glIndexiv@4
+	glIndexs = glIndexs@4
+	glIndexsv = glIndexsv@4
+	glIndexub = glIndexub@4
+	glIndexubv = glIndexubv@4
+	glInitNames = glInitNames@0
+	glInterleavedArrays = glInterleavedArrays@12
+	glIsEnabled = glIsEnabled@4
+	glIsList = glIsList@4
+	glIsTexture = glIsTexture@4
+	glLightModelf = glLightModelf@8
+	glLightModelfv = glLightModelfv@8
+	glLightModeli = glLightModeli@8
+	glLightModeliv = glLightModeliv@8
+	glLightf = glLightf@12
+	glLightfv = glLightfv@12
+	glLighti = glLighti@12
+	glLightiv = glLightiv@12
+	glLineStipple = glLineStipple@8
+	glLineWidth = glLineWidth@4
+	glListBase = glListBase@4
+	glLoadIdentity = glLoadIdentity@0
+	glLoadMatrixd = glLoadMatrixd@4
+	glLoadMatrixf = glLoadMatrixf@4
+	glLoadName = glLoadName@4
+	glLogicOp = glLogicOp@4
+	glMap1d = glMap1d@32
+	glMap1f = glMap1f@24
+	glMap2d = glMap2d@56
+	glMap2f = glMap2f@40
+	glMapGrid1d = glMapGrid1d@20
+	glMapGrid1f = glMapGrid1f@12
+	glMapGrid2d = glMapGrid2d@40
+	glMapGrid2f = glMapGrid2f@24
+	glMaterialf = glMaterialf@12
+	glMaterialfv = glMaterialfv@12
+	glMateriali = glMateriali@12
+	glMaterialiv = glMaterialiv@12
+	glMatrixMode = glMatrixMode@4
+	glMultMatrixd = glMultMatrixd@4
+	glMultMatrixf = glMultMatrixf@4
+	glNewList = glNewList@8
+	glNormal3b = glNormal3b@12
+	glNormal3bv = glNormal3bv@4
+	glNormal3d = glNormal3d@24
+	glNormal3dv = glNormal3dv@4
+	glNormal3f = glNormal3f@12
+	glNormal3fv = glNormal3fv@4
+	glNormal3i = glNormal3i@12
+	glNormal3iv = glNormal3iv@4
+	glNormal3s = glNormal3s@12
+	glNormal3sv = glNormal3sv@4
+	glNormalPointer = glNormalPointer@12
+	glOrtho = glOrtho@48
+	glPassThrough = glPassThrough@4
+	glPixelMapfv = glPixelMapfv@12
+	glPixelMapuiv = glPixelMapuiv@12
+	glPixelMapusv = glPixelMapusv@12
+	glPixelStoref = glPixelStoref@8
+	glPixelStorei = glPixelStorei@8
+	glPixelTransferf = glPixelTransferf@8
+	glPixelTransferi = glPixelTransferi@8
+	glPixelZoom = glPixelZoom@8
+	glPointSize = glPointSize@4
+	glPolygonMode = glPolygonMode@8
+	glPolygonOffset = glPolygonOffset@8
+	glPolygonStipple = glPolygonStipple@4
+	glPopAttrib = glPopAttrib@0
+	glPopClientAttrib = glPopClientAttrib@0
+	glPopMatrix = glPopMatrix@0
+	glPopName = glPopName@0
+	glPrioritizeTextures = glPrioritizeTextures@12
+	glPushAttrib = glPushAttrib@4
+	glPushClientAttrib = glPushClientAttrib@4
+	glPushMatrix = glPushMatrix@0
+	glPushName = glPushName@4
+	glRasterPos2d = glRasterPos2d@16
+	glRasterPos2dv = glRasterPos2dv@4
+	glRasterPos2f = glRasterPos2f@8
+	glRasterPos2fv = glRasterPos2fv@4
+	glRasterPos2i = glRasterPos2i@8
+	glRasterPos2iv = glRasterPos2iv@4
+	glRasterPos2s = glRasterPos2s@8
+	glRasterPos2sv = glRasterPos2sv@4
+	glRasterPos3d = glRasterPos3d@24
+	glRasterPos3dv = glRasterPos3dv@4
+	glRasterPos3f = glRasterPos3f@12
+	glRasterPos3fv = glRasterPos3fv@4
+	glRasterPos3i = glRasterPos3i@12
+	glRasterPos3iv = glRasterPos3iv@4
+	glRasterPos3s = glRasterPos3s@12
+	glRasterPos3sv = glRasterPos3sv@4
+	glRasterPos4d = glRasterPos4d@32
+	glRasterPos4dv = glRasterPos4dv@4
+	glRasterPos4f = glRasterPos4f@16
+	glRasterPos4fv = glRasterPos4fv@4
+	glRasterPos4i = glRasterPos4i@16
+	glRasterPos4iv = glRasterPos4iv@4
+	glRasterPos4s = glRasterPos4s@16
+	glRasterPos4sv = glRasterPos4sv@4
+	glReadBuffer = glReadBuffer@4
+	glReadPixels = glReadPixels@28
+	glRectd = glRectd@32
+	glRectdv = glRectdv@8
+	glRectf = glRectf@16
+	glRectfv = glRectfv@8
+	glRecti = glRecti@16
+	glRectiv = glRectiv@8
+	glRects = glRects@16
+	glRectsv = glRectsv@8
+	glRenderMode = glRenderMode@4
+	glRotated = glRotated@32
+	glRotatef = glRotatef@16
+	glScaled = glScaled@24
+	glScalef = glScalef@12
+	glScissor = glScissor@16
+	glSelectBuffer = glSelectBuffer@8
+	glShadeModel = glShadeModel@4
+	glStencilFunc = glStencilFunc@12
+	glStencilMask = glStencilMask@4
+	glStencilOp = glStencilOp@12
+	glTexCoord1d = glTexCoord1d@8
+	glTexCoord1dv = glTexCoord1dv@4
+	glTexCoord1f = glTexCoord1f@4
+	glTexCoord1fv = glTexCoord1fv@4
+	glTexCoord1i = glTexCoord1i@4
+	glTexCoord1iv = glTexCoord1iv@4
+	glTexCoord1s = glTexCoord1s@4
+	glTexCoord1sv = glTexCoord1sv@4
+	glTexCoord2d = glTexCoord2d@16
+	glTexCoord2dv = glTexCoord2dv@4
+	glTexCoord2f = glTexCoord2f@8
+	glTexCoord2fv = glTexCoord2fv@4
+	glTexCoord2i = glTexCoord2i@8
+	glTexCoord2iv = glTexCoord2iv@4
+	glTexCoord2s = glTexCoord2s@8
+	glTexCoord2sv = glTexCoord2sv@4
+	glTexCoord3d = glTexCoord3d@24
+	glTexCoord3dv = glTexCoord3dv@4
+	glTexCoord3f = glTexCoord3f@12
+	glTexCoord3fv = glTexCoord3fv@4
+	glTexCoord3i = glTexCoord3i@12
+	glTexCoord3iv = glTexCoord3iv@4
+	glTexCoord3s = glTexCoord3s@12
+	glTexCoord3sv = glTexCoord3sv@4
+	glTexCoord4d = glTexCoord4d@32
+	glTexCoord4dv = glTexCoord4dv@4
+	glTexCoord4f = glTexCoord4f@16
+	glTexCoord4fv = glTexCoord4fv@4
+	glTexCoord4i = glTexCoord4i@16
+	glTexCoord4iv = glTexCoord4iv@4
+	glTexCoord4s = glTexCoord4s@16
+	glTexCoord4sv = glTexCoord4sv@4
+	glTexCoordPointer = glTexCoordPointer@16
+	glTexEnvf = glTexEnvf@12
+	glTexEnvfv = glTexEnvfv@12
+	glTexEnvi = glTexEnvi@12
+	glTexEnviv = glTexEnviv@12
+	glTexGend = glTexGend@16
+	glTexGendv = glTexGendv@12
+	glTexGenf = glTexGenf@12
+	glTexGenfv = glTexGenfv@12
+	glTexGeni = glTexGeni@12
+	glTexGeniv = glTexGeniv@12
+	glTexImage1D = glTexImage1D@32
+	glTexImage2D = glTexImage2D@36
+	glTexParameterf = glTexParameterf@12
+	glTexParameterfv = glTexParameterfv@12
+	glTexParameteri = glTexParameteri@12
+	glTexParameteriv = glTexParameteriv@12
+	glTexSubImage1D = glTexSubImage1D@28
+	glTexSubImage2D = glTexSubImage2D@36
+	glTranslated = glTranslated@24
+	glTranslatef = glTranslatef@12
+	glVertex2d = glVertex2d@16
+	glVertex2dv = glVertex2dv@4
+	glVertex2f = glVertex2f@8
+	glVertex2fv = glVertex2fv@4
+	glVertex2i = glVertex2i@8
+	glVertex2iv = glVertex2iv@4
+	glVertex2s = glVertex2s@8
+	glVertex2sv = glVertex2sv@4
+	glVertex3d = glVertex3d@24
+	glVertex3dv = glVertex3dv@4
+	glVertex3f = glVertex3f@12
+	glVertex3fv = glVertex3fv@4
+	glVertex3i = glVertex3i@12
+	glVertex3iv = glVertex3iv@4
+	glVertex3s = glVertex3s@12
+	glVertex3sv = glVertex3sv@4
+	glVertex4d = glVertex4d@32
+	glVertex4dv = glVertex4dv@4
+	glVertex4f = glVertex4f@16
+	glVertex4fv = glVertex4fv@4
+	glVertex4i = glVertex4i@16
+	glVertex4iv = glVertex4iv@4
+	glVertex4s = glVertex4s@16
+	glVertex4sv = glVertex4sv@4
+	glVertexPointer = glVertexPointer@16
+	glViewport = glViewport@16
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_id.h
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_id.h
@@ -151,11 +151,15 @@ enum {

 /* CZ specific rev IDs */
 enum {
-	CZ_CARRIZO_A0      = 0x01,
+	CARRIZO_A0   = 0x01,
+    STONEY_A0    = 0x61,
 	CZ_UNKNOWN      = 0xFF
 };

 #define ASICREV_IS_CARRIZO(eChipRev) \
-	(eChipRev >= CARRIZO_A0)
+	((eChipRev >= CARRIZO_A0) && (eChipRev < STONEY_A0))
+
+#define ASICREV_IS_STONEY(eChipRev) \
+	((eChipRev >= STONEY_A0) && (eChipRev < CZ_UNKNOWN))

 #endif /* AMDGPU_ID_H */
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -226,7 +226,11 @@ static boolean do_winsys_init(struct amdgpu_winsys *ws)
      break;
   case CHIP_CARRIZO:
      ws->family = FAMILY_CZ;
-      ws->rev_id = CZ_CARRIZO_A0;
+      ws->rev_id = CARRIZO_A0;
+      break;
+   case CHIP_STONEY:
+      ws->family = FAMILY_CZ;
+      ws->rev_id = STONEY_A0;
      break;
   case CHIP_FIJI:
      ws->family = FAMILY_VI;
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -76,6 +76,9 @@ struct radeon_bomgr {
    bool va;
    uint64_t va_offset;
    struct list_head va_holes;
+
+    /* BO size alignment */
+    unsigned size_align;
 };

 static inline struct radeon_bomgr *radeon_bomgr(struct pb_manager *mgr)
@@ -164,8 +167,10 @@ static uint64_t radeon_bomgr_find_va(struct radeon_bomgr *mgr, uint64_t size, ui
    struct radeon_bo_va_hole *hole, *n;
    uint64_t offset = 0, waste = 0;

-    alignment = MAX2(alignment, 4096);
-    size = align(size, 4096);
+    /* All VM address space holes will implicitly start aligned to the
+     * size alignment, so we don't need to sanitize the alignment here
+     */
+    size = align(size, mgr->size_align);

    pipe_mutex_lock(mgr->bo_va_mutex);
    /* first look for a hole */
@@ -222,7 +227,7 @@ static void radeon_bomgr_free_va(struct radeon_bomgr *mgr, uint64_t va, uint64_t
 {
    struct radeon_bo_va_hole *hole;

-    size = align(size, 4096);
+    size = align(size, mgr->size_align);

    pipe_mutex_lock(mgr->bo_va_mutex);
    if ((va + size) == mgr->va_offset) {
@@ -333,9 +338,9 @@ static void radeon_bo_destroy(struct pb_buffer *_buf)
    pipe_mutex_destroy(bo->map_mutex);

    if (bo->initial_domain & RADEON_DOMAIN_VRAM)
-        bo->rws->allocated_vram -= align(bo->base.size, 4096);
+        bo->rws->allocated_vram -= align(bo->base.size, mgr->size_align);
    else if (bo->initial_domain & RADEON_DOMAIN_GTT)
-        bo->rws->allocated_gtt -= align(bo->base.size, 4096);
+        bo->rws->allocated_gtt -= align(bo->base.size, mgr->size_align);
    FREE(bo);
 }

@@ -620,9 +625,9 @@ static struct pb_buffer *radeon_bomgr_create_bo(struct pb_manager *_mgr,
    }

    if (rdesc->initial_domains & RADEON_DOMAIN_VRAM)
-        rws->allocated_vram += align(size, 4096);
+        rws->allocated_vram += align(size, mgr->size_align);
    else if (rdesc->initial_domains & RADEON_DOMAIN_GTT)
-        rws->allocated_gtt += align(size, 4096);
+        rws->allocated_gtt += align(size, mgr->size_align);

    return &bo->base;
 }
@@ -696,6 +701,9 @@ struct pb_manager *radeon_bomgr_create(struct radeon_drm_winsys *rws)
    mgr->va_offset = rws->va_start;
    list_inithead(&mgr->va_holes);

+    /* TTM aligns the BO size to the CPU page size */
+    mgr->size_align = sysconf(_SC_PAGESIZE);
+
    return &mgr->base;
 }

@@ -858,7 +866,7 @@ radeon_winsys_bo_create(struct radeon_winsys *rws,
     * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
     * like constant/uniform buffers, can benefit from better and more reuse.
     */
-    size = align(size, 4096);
+    size = align(size, mgr->size_align);

    /* Only set one usage bit each for domains and flags, or the cache manager
     * might consider different sets of domains / flags compatible
@@ -969,7 +977,7 @@ static struct pb_buffer *radeon_winsys_bo_from_ptr(struct radeon_winsys *rws,
        pipe_mutex_unlock(mgr->bo_handles_mutex);
    }

-    ws->allocated_gtt += align(bo->base.size, 4096);
+    ws->allocated_gtt += align(bo->base.size, mgr->size_align);

    return (struct pb_buffer*)bo;
 }
@@ -1106,9 +1114,9 @@ done:
    bo->initial_domain = radeon_bo_get_initial_domain((void*)bo);

    if (bo->initial_domain & RADEON_DOMAIN_VRAM)
-        ws->allocated_vram += align(bo->base.size, 4096);
+        ws->allocated_vram += align(bo->base.size, mgr->size_align);
    else if (bo->initial_domain & RADEON_DOMAIN_GTT)
-        ws->allocated_gtt += align(bo->base.size, 4096);
+        ws->allocated_gtt += align(bo->base.size, mgr->size_align);

    return (struct pb_buffer*)bo;

--- a/src/gbm/main/gbm.h
+++ b/src/gbm/main/gbm.h
@@ -35,6 +35,7 @@ extern "C" {

 #define __GBM__ 1

+#include <stddef.h>
 #include <stdint.h>

 /**
--- a/src/glsl/ast.h
+++ b/src/glsl/ast.h
@@ -62,6 +62,8 @@ public:
   virtual ir_rvalue *hir(exec_list *instructions,
 			  struct _mesa_glsl_parse_state *state);

+   virtual bool has_sequence_subexpression() const;
+
   /**
    * Retrieve the source location of an AST node
    *
@@ -221,6 +223,8 @@ public:
   virtual void hir_no_rvalue(exec_list *instructions,
                              struct _mesa_glsl_parse_state *state);

+   virtual bool has_sequence_subexpression() const;
+
   ir_rvalue *do_hir(exec_list *instructions,
                     struct _mesa_glsl_parse_state *state,
                     bool needs_rvalue);
@@ -299,6 +303,8 @@ public:
   virtual void hir_no_rvalue(exec_list *instructions,
                              struct _mesa_glsl_parse_state *state);

+   virtual bool has_sequence_subexpression() const;
+
 private:
   /**
    * Is this function call actually a constructor?
--- a/src/glsl/ast_function.cpp
+++ b/src/glsl/ast_function.cpp
@@ -395,13 +395,54 @@ generate_call(exec_list *instructions, ir_function_signature *sig,
      }
   }

-   /* If the function call is a constant expression, don't generate any
-    * instructions; just generate an ir_constant.
+   /* Section 4.3.2 (Const) of the GLSL 1.10.59 spec says:
    *
-    * Function calls were first allowed to be constant expressions in GLSL
-    * 1.20 and GLSL ES 3.00.
+    *     "Initializers for const declarations must be formed from literal
+    *     values, other const variables (not including function call
+    *     paramaters), or expressions of these.
+    *
+    *     Constructors may be used in such expressions, but function calls may
+    *     not."
+    *
+    * Section 4.3.3 (Constant Expressions) of the GLSL 1.20.8 spec says:
+    *
+    *     "A constant expression is one of
+    *
+    *         ...
+    *
+    *         - a built-in function call whose arguments are all constant
+    *           expressions, with the exception of the texture lookup
+    *           functions, the noise functions, and ftransform. The built-in
+    *           functions dFdx, dFdy, and fwidth must return 0 when evaluated
+    *           inside an initializer with an argument that is a constant
+    *           expression."
+    *
+    * Section 5.10 (Constant Expressions) of the GLSL ES 1.00.17 spec says:
+    *
+    *     "A constant expression is one of
+    *
+    *         ...
+    *
+    *         - a built-in function call whose arguments are all constant
+    *           expressions, with the exception of the texture lookup
+    *           functions."
+    *
+    * Section 4.3.3 (Constant Expressions) of the GLSL ES 3.00.4 spec says:
+    *
+    *     "A constant expression is one of
+    *
+    *         ...
+    *
+    *         - a built-in function call whose arguments are all constant
+    *           expressions, with the exception of the texture lookup
+    *           functions.  The built-in functions dFdx, dFdy, and fwidth must
+    *           return 0 when evaluated inside an initializer with an argument
+    *           that is a constant expression."
+    *
+    * If the function call is a constant expression, don't generate any
+    * instructions; just generate an ir_constant.
    */
-   if (state->is_version(120, 300)) {
+   if (state->is_version(120, 100)) {
      ir_constant *value = sig->constant_expression_value(actual_parameters, NULL);
      if (value != NULL) {
 	 return value;
@@ -1911,6 +1952,17 @@ ast_function_expression::hir(exec_list *instructions,
   unreachable("not reached");
 }

+bool
+ast_function_expression::has_sequence_subexpression() const
+{
+   foreach_list_typed(const ast_node, ast, link, &this->expressions) {
+      if (ast->has_sequence_subexpression())
+         return true;
+   }
+
+   return false;
+}
+
 ir_rvalue *
 ast_aggregate_initializer::hir(exec_list *instructions,
                               struct _mesa_glsl_parse_state *state)
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -482,18 +482,20 @@ bit_logic_result_type(const struct glsl_type *type_a,
 }

 static const struct glsl_type *
-modulus_result_type(const struct glsl_type *type_a,
-                    const struct glsl_type *type_b,
+modulus_result_type(ir_rvalue * &value_a, ir_rvalue * &value_b,
                    struct _mesa_glsl_parse_state *state, YYLTYPE *loc)
 {
+   const glsl_type *type_a = value_a->type;
+   const glsl_type *type_b = value_b->type;
+
   if (!state->check_version(130, 300, loc, "operator '%%' is reserved")) {
      return glsl_type::error_type;
   }

-   /* From GLSL 1.50 spec, page 56:
+   /* Section 5.9 (Expressions) of the GLSL 4.00 specification says:
+    *
    *    "The operator modulus (%) operates on signed or unsigned integers or
-    *    integer vectors. The operand types must both be signed or both be
-    *    unsigned."
+    *    integer vectors."
    */
   if (!type_a->is_integer()) {
      _mesa_glsl_error(loc, state, "LHS of operator %% must be an integer");
@@ -503,11 +505,28 @@ modulus_result_type(const struct glsl_type *type_a,
      _mesa_glsl_error(loc, state, "RHS of operator %% must be an integer");
      return glsl_type::error_type;
   }
-   if (type_a->base_type != type_b->base_type) {
+
+   /*    "If the fundamental types in the operands do not match, then the
+    *    conversions from section 4.1.10 "Implicit Conversions" are applied
+    *    to create matching types."
+    *
+    * Note that GLSL 4.00 (and GL_ARB_gpu_shader5) introduced implicit
+    * int -> uint conversion rules.  Prior to that, there were no implicit
+    * conversions.  So it's harmless to apply them universally - no implicit
+    * conversions will exist.  If the types don't match, we'll receive false,
+    * and raise an error, satisfying the GLSL 1.50 spec, page 56:
+    *
+    *    "The operand types must both be signed or unsigned."
+    */
+   if (!apply_implicit_conversion(type_a, value_b, state) &&
+       !apply_implicit_conversion(type_b, value_a, state)) {
      _mesa_glsl_error(loc, state,
-                       "operands of %% must have the same base type");
+                       "could not implicitly convert operands to "
+                       "modulus (%%) operator");
      return glsl_type::error_type;
   }
+   type_a = value_a->type;
+   type_b = value_b->type;

   /*    "The operands cannot be vectors of differing size. If one operand is
    *    a scalar and the other vector, then the scalar is applied component-
@@ -939,6 +958,12 @@ ast_node::hir(exec_list *instructions, struct _mesa_glsl_parse_state *state)
   return NULL;
 }

+bool
+ast_node::has_sequence_subexpression() const
+{
+   return false;
+}
+
 void
 ast_function_expression::hir_no_rvalue(exec_list *instructions,
                                       struct _mesa_glsl_parse_state *state)
@@ -1261,7 +1286,7 @@ ast_expression::do_hir(exec_list *instructions,
      op[0] = this->subexpressions[0]->hir(instructions, state);
      op[1] = this->subexpressions[1]->hir(instructions, state);

-      type = modulus_result_type(op[0]->type, op[1]->type, state, & loc);
+      type = modulus_result_type(op[0], op[1], state, &loc);

      assert(operations[this->oper] == ir_binop_mod);

@@ -1508,7 +1533,7 @@ ast_expression::do_hir(exec_list *instructions,
      op[0] = this->subexpressions[0]->hir(instructions, state);
      op[1] = this->subexpressions[1]->hir(instructions, state);

-      type = modulus_result_type(op[0]->type, op[1]->type, state, & loc);
+      type = modulus_result_type(op[0], op[1], state, &loc);

      assert(operations[this->oper] == ir_binop_mod);

@@ -1850,6 +1875,80 @@ ast_expression::do_hir(exec_list *instructions,
   return result;
 }

+bool
+ast_expression::has_sequence_subexpression() const
+{
+   switch (this->oper) {
+   case ast_plus:
+   case ast_neg:
+   case ast_bit_not:
+   case ast_logic_not:
+   case ast_pre_inc:
+   case ast_pre_dec:
+   case ast_post_inc:
+   case ast_post_dec:
+      return this->subexpressions[0]->has_sequence_subexpression();
+
+   case ast_assign:
+   case ast_add:
+   case ast_sub:
+   case ast_mul:
+   case ast_div:
+   case ast_mod:
+   case ast_lshift:
+   case ast_rshift:
+   case ast_less:
+   case ast_greater:
+   case ast_lequal:
+   case ast_gequal:
+   case ast_nequal:
+   case ast_equal:
+   case ast_bit_and:
+   case ast_bit_xor:
+   case ast_bit_or:
+   case ast_logic_and:
+   case ast_logic_or:
+   case ast_logic_xor:
+   case ast_array_index:
+   case ast_mul_assign:
+   case ast_div_assign:
+   case ast_add_assign:
+   case ast_sub_assign:
+   case ast_mod_assign:
+   case ast_ls_assign:
+   case ast_rs_assign:
+   case ast_and_assign:
+   case ast_xor_assign:
+   case ast_or_assign:
+      return this->subexpressions[0]->has_sequence_subexpression() ||
+             this->subexpressions[1]->has_sequence_subexpression();
+
+   case ast_conditional:
+      return this->subexpressions[0]->has_sequence_subexpression() ||
+             this->subexpressions[1]->has_sequence_subexpression() ||
+             this->subexpressions[2]->has_sequence_subexpression();
+
+   case ast_sequence:
+      return true;
+
+   case ast_field_selection:
+   case ast_identifier:
+   case ast_int_constant:
+   case ast_uint_constant:
+   case ast_float_constant:
+   case ast_bool_constant:
+   case ast_double_constant:
+      return false;
+
+   case ast_aggregate:
+      unreachable("ast_aggregate: Should never get here.");
+
+   case ast_function_call:
+      unreachable("should be handled by ast_function_expression::hir");
+   }
+
+   return false;
+}

 ir_rvalue *
 ast_expression_statement::hir(exec_list *instructions,
@@ -3146,16 +3245,72 @@ process_initializer(ir_variable *var, ast_declaration *decl,

   /* Calculate the constant value if this is a const or uniform
    * declaration.
+    *
+    * Section 4.3 (Storage Qualifiers) of the GLSL ES 1.00.17 spec says:
+    *
+    *     "Declarations of globals without a storage qualifier, or with
+    *     just the const qualifier, may include initializers, in which case
+    *     they will be initialized before the first line of main() is
+    *     executed.  Such initializers must be a constant expression."
+    *
+    * The same section of the GLSL ES 3.00.4 spec has similar language.
    */
   if (type->qualifier.flags.q.constant
-       || type->qualifier.flags.q.uniform) {
+       || type->qualifier.flags.q.uniform
+       || (state->es_shader && state->current_function == NULL)) {
      ir_rvalue *new_rhs = validate_assignment(state, initializer_loc,
                                               lhs, rhs, true);
      if (new_rhs != NULL) {
         rhs = new_rhs;

+         /* Section 4.3.3 (Constant Expressions) of the GLSL ES 3.00.4 spec
+          * says:
+          *
+          *     "A constant expression is one of
+          *
+          *        ...
+          *
+          *        - an expression formed by an operator on operands that are
+          *          all constant expressions, including getting an element of
+          *          a constant array, or a field of a constant structure, or
+          *          components of a constant vector.  However, the sequence
+          *          operator ( , ) and the assignment operators ( =, +=, ...)
+          *          are not included in the operators that can create a
+          *          constant expression."
+          *
+          * Section 12.43 (Sequence operator and constant expressions) says:
+          *
+          *     "Should the following construct be allowed?
+          *
+          *         float a[2,3];
+          *
+          *     The expression within the brackets uses the sequence operator
+          *     (',') and returns the integer 3 so the construct is declaring
+          *     a single-dimensional array of size 3.  In some languages, the
+          *     construct declares a two-dimensional array.  It would be
+          *     preferable to make this construct illegal to avoid confusion.
+          *
+          *     One possibility is to change the definition of the sequence
+          *     operator so that it does not return a constant-expression and
+          *     hence cannot be used to declare an array size.
+          *
+          *     RESOLUTION: The result of a sequence operator is not a
+          *     constant-expression."
+          *
+          * Section 4.3.3 (Constant Expressions) of the GLSL 4.30.9 spec
+          * contains language almost identical to the section 4.3.3 in the
+          * GLSL ES 3.00.4 spec.  This is a new limitation for these GLSL
+          * versions.
+          */
         ir_constant *constant_value = rhs->constant_expression_value();
-         if (!constant_value) {
+         if (!constant_value ||
+             (state->is_version(430, 300) &&
+              decl->initializer->has_sequence_subexpression())) {
+            const char *const variable_mode =
+               (type->qualifier.flags.q.constant)
+               ? "const"
+               : ((type->qualifier.flags.q.uniform) ? "uniform" : "global");
+
            /* If ARB_shading_language_420pack is enabled, initializers of
             * const-qualified local variables do not have to be constant
             * expressions. Const-qualified global variables must still be
@@ -3166,22 +3321,24 @@ process_initializer(ir_variable *var, ast_declaration *decl,
               _mesa_glsl_error(& initializer_loc, state,
                                "initializer of %s variable `%s' must be a "
                                "constant expression",
-                                (type->qualifier.flags.q.constant)
-                                ? "const" : "uniform",
+                                variable_mode,
                                decl->identifier);
               if (var->type->is_numeric()) {
                  /* Reduce cascading errors. */
-                  var->constant_value = ir_constant::zero(state, var->type);
+                  var->constant_value = type->qualifier.flags.q.constant
+                     ? ir_constant::zero(state, var->type) : NULL;
               }
            }
         } else {
            rhs = constant_value;
-            var->constant_value = constant_value;
+            var->constant_value = type->qualifier.flags.q.constant
+               ? constant_value : NULL;
         }
      } else {
         if (var->type->is_numeric()) {
            /* Reduce cascading errors. */
-            var->constant_value = ir_constant::zero(state, var->type);
+            var->constant_value = type->qualifier.flags.q.constant
+               ? ir_constant::zero(state, var->type) : NULL;
         }
      }
   }
--- a/src/glsl/link_uniform_initializers.cpp
+++ b/src/glsl/link_uniform_initializers.cpp
@@ -326,9 +326,9 @@ link_set_uniform_initializers(struct gl_shader_program *prog,
            } else {
               assert(!"Explicit binding not on a sampler, UBO or atomic.");
            }
-         } else if (var->constant_value) {
+         } else if (var->constant_initializer) {
            linker::set_uniform_initializer(mem_ctx, prog, var->name,
-                                            var->type, var->constant_value,
+                                            var->type, var->constant_initializer,
                                            boolean_true);
         }
      }
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1629,7 +1629,7 @@ void nir_dump_dom_frontier(nir_shader *shader, FILE *fp);
 void nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp);
 void nir_dump_cfg(nir_shader *shader, FILE *fp);

-void nir_split_var_copies(nir_shader *shader);
+bool nir_split_var_copies(nir_shader *shader);

 void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, void *mem_ctx);
 void nir_lower_var_copies(nir_shader *shader);
@@ -1653,7 +1653,7 @@ void nir_lower_vars_to_ssa(nir_shader *shader);

 void nir_remove_dead_variables(nir_shader *shader);

-void nir_lower_vec_to_movs(nir_shader *shader);
+bool nir_lower_vec_to_movs(nir_shader *shader);
 void nir_lower_alu_to_scalar(nir_shader *shader);
 void nir_lower_load_const_to_scalar(nir_shader *shader);

--- a/src/glsl/nir/nir_lower_vars_to_ssa.c
+++ b/src/glsl/nir/nir_lower_vars_to_ssa.c
@@ -455,7 +455,8 @@ lower_copies_to_load_store(struct deref_node *node,
         struct deref_node *arg_node =
            get_deref_node(copy->variables[i], state);

-         if (arg_node == NULL)
+         /* Only bother removing copy entries for other nodes */
+         if (arg_node == NULL || arg_node == node)
            continue;

         struct set_entry *arg_entry = _mesa_set_search(arg_node->copies, copy);
@@ -466,6 +467,8 @@ lower_copies_to_load_store(struct deref_node *node,
      nir_instr_remove(&copy->instr);
   }

+   node->copies = NULL;
+
   return true;
 }

--- a/Show More
+++ b/Show More
@@ -1 +1 @@
 .0.2
 .0.6