docs: add sha256 checksums for 11.2.2

Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
docs: add release notes for 11.2.2
2016-05-09 14:21:01 +01:00 · 2016-05-09 13:54:59 +01:00 · 2016-05-09 13:52:16 +01:00 · 2016-05-05 14:06:59 +01:00 · 2016-05-05 14:06:21 +01:00 · 2016-05-05 14:02:04 +01:00
49 changed files with 689 additions and 282 deletions
--- a/Android.common.mk
+++ b/Android.common.mk
@@ -54,6 +54,7 @@ LOCAL_CFLAGS += \
 	-DHAVE___BUILTIN_CLZLL \
 	-DHAVE___BUILTIN_UNREACHABLE \
 	-DHAVE_PTHREAD=1 \
+	-DHAVE_DLOPEN \
 	-fvisibility=hidden \
 	-Wno-sign-compare

@@ -65,7 +66,6 @@ ifeq ($(strip $(MESA_ENABLE_ASM)),true)
 ifeq ($(TARGET_ARCH),x86)
 LOCAL_CFLAGS += \
 	-DUSE_X86_ASM \
-	-DHAVE_DLOPEN \

 endif
 endif
--- a/2
+++ b/2
@@ -1 +1 @@
-11.2.1
+11.2.2
--- a/bin/.cherry-ignore
+++ b/bin/.cherry-ignore
@@ -0,0 +1,5 @@
+# Remove duplicate commit due to vulkan branch merge
+9e64a2a8e4821dd637daac54ba83895a490d4790 mesa: Fix generation of git_sha1.h.tmp for gitlinks
+
+# The functions/extension in question landed after the branchpoint.
+be5010c4b8635d0292404ac58ed0436ba6637579 glapi: fix parameter type for GetSamplerParameterIuivEXT() in es_EXT.xml
--- a/docs/relnotes/11.2.1.html
+++ b/docs/relnotes/11.2.1.html
@@ -31,7 +31,8 @@ because compatibility contexts are not supported.

 <h2>SHA256 checksums</h2>
 <pre>
-TBD
+cc2a024204564a71acc95cf262bf618fe49b1d77d351e5755eea705cadac5167  mesa-11.2.1.tar.gz
+a65207e9ae5c5f1c29f863c6a2cc98a7ab99762a24b82a248337f0ea9cfce01b  mesa-11.2.1.tar.xz
 </pre>


--- a/docs/relnotes/11.2.2.html
+++ b/docs/relnotes/11.2.2.html
@@ -0,0 +1,210 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 11.2.2 Release Notes / May 9, 2016</h1>
+
+<p>
+Mesa 11.2.2 is a bug fix release which fixes bugs found since the 11.2.1 release.
+</p>
+<p>
+Mesa 11.2.2 implements the OpenGL 4.1 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.1.  OpenGL
+4.1 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+e2453014cd2cc5337a5180cdeffe8cf24fffbb83e20a96888e2b01df868eaae6  mesa-11.2.2.tar.gz
+40e148812388ec7c6d7b6657d5a16e2e8dabba8b97ddfceea5197947647bdfb4  mesa-11.2.2.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92850">Bug 92850</a> - Segfault loading War Thunder</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93767">Bug 93767</a> - Glitches with soft shadows and MSAA in Knights of the Old Republic 2</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94955">Bug 94955</a> - Uninitialized variables leads to random segfaults (valgrind log, apitrace attached)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94994">Bug 94994</a> - OSMesaGetProcAdress always fails on mangled OSMesa</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95026">Bug 95026</a> - Alien Isolation segfault after initial loading screen/video</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95133">Bug 95133</a> - X-COM Enemy Within crashes when entering tactical mission with Bonaire</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95164">Bug 95164</a> - GLSL compiler (linker I think) emits assertion upon call to glAttachShader</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95251">Bug 95251</a> - vdpau decoder capabilities: not supported</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Boyuan Zhang (1):</p>
+<ul>
+  <li>radeon/uvd: alignment fix for decode message buffer</li>
+</ul>
+
+<p>Brian Paul (2):</p>
+<ul>
+  <li>st/mesa: fix sampler view leak in st_DrawAtlasBitmaps()</li>
+  <li>gallium/util: initialize pipe_framebuffer_state to zeros</li>
+</ul>
+
+<p>Chad Versace (1):</p>
+<ul>
+  <li>dri: Fix robust context creation via EGL attribute</li>
+</ul>
+
+<p>Egbert Eich (1):</p>
+<ul>
+  <li>dri2: Check for dummyContext to see if the glx_context is valid</li>
+</ul>
+
+<p>Emil Velikov (5):</p>
+<ul>
+  <li>docs: add sha256 checksums for 11.2.1</li>
+  <li>docs: update the sha256 checksums for 11.2.1</li>
+  <li>cherry-ignore: remove duplicate commit</li>
+  <li>cherry-ignore: ignore the GetSamplerParameterIuiv{EXT,OES} fixups</li>
+  <li>Update version to 11.2.2</li>
+</ul>
+
+<p>Eric Anholt (4):</p>
+<ul>
+  <li>vc4: Fix subimage accesses to LT textures.</li>
+  <li>vc4: Add support for rendering to cube map surfaces.</li>
+  <li>vc4: Fix tests for format supported with nr_samples == 1.</li>
+  <li>vc4: Make sure we recompile when sample_mask changes.</li>
+</ul>
+
+<p>Frederic Devernay (1):</p>
+<ul>
+  <li>glapi: fix _glapi_get_proc_address() for mangled function names</li>
+</ul>
+
+<p>Ilia Mirkin (2):</p>
+<ul>
+  <li>nvc0: fix retrieving query results into buffer for timestamps</li>
+  <li>nouveau/video: properly detect the decoder class for availability checks</li>
+</ul>
+
+<p>Jason Ekstrand (1):</p>
+<ul>
+  <li>i965/fs: Properly report regs_written from SAMPLEINFO</li>
+</ul>
+
+<p>Jonathan Gray (1):</p>
+<ul>
+  <li>egl/x11: authenticate before doing chipset id ioctls</li>
+</ul>
+
+<p>Jose Fonseca (1):</p>
+<ul>
+  <li>winsys/sw/xlib: use correct free function for xlib_dt-&gt;data</li>
+</ul>
+
+<p>Kenneth Graunke (3):</p>
+<ul>
+  <li>i965: Fix clear code for ignoring colormask for XRGB formats on Gen9+.</li>
+  <li>glsl: Convert lower_vec_index_to_swizzle to a rvalue visitor.</li>
+  <li>glsl: Lower vector_extracts to swizzles after lower_vector_derefs.</li>
+</ul>
+
+<p>Leo Liu (1):</p>
+<ul>
+  <li>radeon/uvd: fix tonga feedback buffer size</li>
+</ul>
+
+<p>Marek Olšák (1):</p>
+<ul>
+  <li>st/mesa: fix blit-based GetTexImage for non-finalized textures</li>
+</ul>
+
+<p>Nicolai Hähnle (5):</p>
+<ul>
+  <li>gallium/radeon: handle failure when mapping staging buffer</li>
+  <li>st/glsl_to_tgsi: reduce stack explosion in recursive expression visitor</li>
+  <li>gallium/radeon: fix crash in r600_set_streamout_targets</li>
+  <li>radeonsi: correct NULL-pointer check in si_upload_const_buffer</li>
+  <li>radeonsi: work around an MSAA fast stencil clear problem</li>
+</ul>
+
+<p>Oded Gabbay (4):</p>
+<ul>
+  <li>r600g/radeonsi: send endian info to format translation functions</li>
+  <li>r600g: set endianess of 16/32-bit buffers according to do_endian_swap</li>
+  <li>r600g: use do_endian_swap in color swapping functions</li>
+  <li>r600g: use do_endian_swap in texture swapping function</li>
+</ul>
+
+<p>Patrick Rudolph (1):</p>
+<ul>
+  <li>r600g: fix and optimize tgsi_cmp when using ABS and NEG modifier</li>
+</ul>
+
+<p>Roland Scheidegger (3):</p>
+<ul>
+  <li>llvmpipe: (trivial) initialize src1_alpha var to NULL</li>
+  <li>gallivm: fix bogus argument order to lp_build_sample_mipmap function</li>
+  <li>gallivm: make sampling more robust against bogus coordinates</li>
+</ul>
+
+<p>Samuel Pitoiset (6):</p>
+<ul>
+  <li>gk110/ir: do not overwrite def value with zero for EXCH ops</li>
+  <li>gk110/ir: make use of IMUL32I for all immediates</li>
+  <li>nvc0/ir: fix wrong emission of (a OP b) OP c</li>
+  <li>gk110/ir: add emission for (a OP b) OP c</li>
+  <li>nvc0: reduce GL_MAX_3D_TEXTURE_SIZE to 2048 on Kepler+</li>
+  <li>st/glsl_to_tgsi: fix potential crash when allocating temporaries</li>
+</ul>
+
+<p>Stefan Dirsch (1):</p>
+<ul>
+  <li>dri3: Check for dummyContext to see if the glx_context is valid</li>
+</ul>
+
+<p>Topi Pohjolainen (2):</p>
+<ul>
+  <li>i965/blorp/gen7: Prepare re-using for gen8</li>
+  <li>i965/blorp: Use 8k chunk size for urb allocation</li>
+</ul>
+
+<p>WuZhen (3):</p>
+<ul>
+  <li>tgsi: initialize stack allocated struct</li>
+  <li>winsys/sw/dri: use correct free function for dri_sw_dt-&gt;data</li>
+  <li>android: enable dlopen() on all architectures</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -4658,6 +4658,7 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
                                &prog->Comp.SharedSize);

      lower_vector_derefs(prog->_LinkedShaders[i]);
+      do_vec_index_to_swizzle(prog->_LinkedShaders[i]->ir);
   }

 done:
--- a/src/compiler/glsl/lower_vec_index_to_swizzle.cpp
+++ b/src/compiler/glsl/lower_vec_index_to_swizzle.cpp
@@ -30,18 +30,14 @@
 */

 #include "ir.h"
-#include "ir_visitor.h"
+#include "ir_rvalue_visitor.h"
 #include "ir_optimization.h"
 #include "compiler/glsl_types.h"
 #include "main/macros.h"

-/**
- * Visitor class for replacing expressions with ir_constant values.
- */
-
 namespace {

-class ir_vec_index_to_swizzle_visitor : public ir_hierarchical_visitor {
+class ir_vec_index_to_swizzle_visitor : public ir_rvalue_visitor {
 public:
   ir_vec_index_to_swizzle_visitor()
   {
@@ -50,30 +46,28 @@ public:

   ir_rvalue *convert_vector_extract_to_swizzle(ir_rvalue *val);

-   virtual ir_visitor_status visit_enter(ir_expression *);
-   virtual ir_visitor_status visit_enter(ir_swizzle *);
-   virtual ir_visitor_status visit_enter(ir_assignment *);
-   virtual ir_visitor_status visit_enter(ir_return *);
-   virtual ir_visitor_status visit_enter(ir_call *);
-   virtual ir_visitor_status visit_enter(ir_if *);
+   virtual void handle_rvalue(ir_rvalue **);

   bool progress;
 };

 } /* anonymous namespace */

-ir_rvalue *
-ir_vec_index_to_swizzle_visitor::convert_vector_extract_to_swizzle(ir_rvalue *ir)
+void
+ir_vec_index_to_swizzle_visitor::handle_rvalue(ir_rvalue **rv)
 {
-   ir_expression *const expr = ir->as_expression();
+   if (*rv == NULL)
+      return;
+
+   ir_expression *const expr = (*rv)->as_expression();
   if (expr == NULL || expr->operation != ir_binop_vector_extract)
-      return ir;
+      return;

   ir_constant *const idx = expr->operands[1]->constant_expression_value();
   if (idx == NULL)
-      return ir;
+      return;

-   void *ctx = ralloc_parent(ir);
+   void *ctx = ralloc_parent(expr);
   this->progress = true;

   /* Page 40 of the GLSL 1.20 spec says:
@@ -93,71 +87,7 @@ ir_vec_index_to_swizzle_visitor::convert_vector_extract_to_swizzle(ir_rvalue *ir
   const int i = CLAMP(idx->value.i[0], 0,
                       (int) expr->operands[0]->type->vector_elements - 1);

-   return new(ctx) ir_swizzle(expr->operands[0], i, 0, 0, 0, 1);
-}
-
-ir_visitor_status
-ir_vec_index_to_swizzle_visitor::visit_enter(ir_expression *ir)
-{
-   unsigned int i;
-
-   for (i = 0; i < ir->get_num_operands(); i++) {
-      ir->operands[i] = convert_vector_extract_to_swizzle(ir->operands[i]);
-   }
-
-   return visit_continue;
-}
-
-ir_visitor_status
-ir_vec_index_to_swizzle_visitor::visit_enter(ir_swizzle *ir)
-{
-   /* Can't be hit from normal GLSL, since you can't swizzle a scalar (which
-    * the result of indexing a vector is.  But maybe at some point we'll end up
-    * using swizzling of scalars for vector construction.
-    */
-   ir->val = convert_vector_extract_to_swizzle(ir->val);
-
-   return visit_continue;
-}
-
-ir_visitor_status
-ir_vec_index_to_swizzle_visitor::visit_enter(ir_assignment *ir)
-{
-   ir->rhs = convert_vector_extract_to_swizzle(ir->rhs);
-
-   return visit_continue;
-}
-
-ir_visitor_status
-ir_vec_index_to_swizzle_visitor::visit_enter(ir_call *ir)
-{
-   foreach_in_list_safe(ir_rvalue, param, &ir->actual_parameters) {
-      ir_rvalue *new_param = convert_vector_extract_to_swizzle(param);
-
-      if (new_param != param) {
-	 param->replace_with(new_param);
-      }
-   }
-
-   return visit_continue;
-}
-
-ir_visitor_status
-ir_vec_index_to_swizzle_visitor::visit_enter(ir_return *ir)
-{
-   if (ir->value) {
-      ir->value = convert_vector_extract_to_swizzle(ir->value);
-   }
-
-   return visit_continue;
-}
-
-ir_visitor_status
-ir_vec_index_to_swizzle_visitor::visit_enter(ir_if *ir)
-{
-   ir->condition = convert_vector_extract_to_swizzle(ir->condition);
-
-   return visit_continue;
+   *rv = new(ctx) ir_swizzle(expr->operands[0], i, 0, 0, 0, 1);
 }

 bool
--- a/src/egl/drivers/dri2/platform_x11.c
+++ b/src/egl/drivers/dri2/platform_x11.c
@@ -542,6 +542,55 @@ dri2_x11_flush_front_buffer(__DRIdrawable * driDrawable, void *loaderPrivate)
 #endif
 }

+static int
+dri2_x11_do_authenticate(struct dri2_egl_display *dri2_dpy, uint32_t id)
+{
+   xcb_dri2_authenticate_reply_t *authenticate;
+   xcb_dri2_authenticate_cookie_t authenticate_cookie;
+   xcb_screen_iterator_t s;
+   xcb_screen_t *screen;
+   int ret = 0;
+
+   s = xcb_setup_roots_iterator(xcb_get_setup(dri2_dpy->conn));
+
+   screen = get_xcb_screen(s, dri2_dpy->screen);
+   if (!screen) {
+      _eglLog(_EGL_WARNING, "DRI2: failed to get xcb screen");
+      return -1;
+   }
+
+   authenticate_cookie =
+      xcb_dri2_authenticate_unchecked(dri2_dpy->conn, screen->root, id);
+   authenticate =
+      xcb_dri2_authenticate_reply(dri2_dpy->conn, authenticate_cookie, NULL);
+
+   if (authenticate == NULL || !authenticate->authenticated)
+      ret = -1;
+
+   free(authenticate);
+
+   return ret;
+}
+
+static EGLBoolean
+dri2_x11_local_authenticate(struct dri2_egl_display *dri2_dpy)
+{
+#ifdef HAVE_LIBDRM
+   drm_magic_t magic;
+
+   if (drmGetMagic(dri2_dpy->fd, &magic)) {
+      _eglLog(_EGL_WARNING, "DRI2: failed to get drm magic");
+      return EGL_FALSE;
+   }
+
+   if (dri2_x11_do_authenticate(dri2_dpy, magic) < 0) {
+      _eglLog(_EGL_WARNING, "DRI2: failed to authenticate");
+      return EGL_FALSE;
+   }
+#endif
+   return EGL_TRUE;
+}
+
 static EGLBoolean
 dri2_x11_connect(struct dri2_egl_display *dri2_dpy)
 {
@@ -630,6 +679,13 @@ dri2_x11_connect(struct dri2_egl_display *dri2_dpy)
      return EGL_FALSE;
   }

+   if (!dri2_x11_local_authenticate(dri2_dpy)) {
+      close(dri2_dpy->fd);
+      free(dri2_dpy->device_name);
+      free(connect);
+      return EGL_FALSE;
+   }
+
   driver_name = xcb_dri2_connect_driver_name (connect);

   /* If Mesa knows about the appropriate driver for this fd, then trust it.
@@ -660,51 +716,8 @@ static int
 dri2_x11_authenticate(_EGLDisplay *disp, uint32_t id)
 {
   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
-   xcb_dri2_authenticate_reply_t *authenticate;
-   xcb_dri2_authenticate_cookie_t authenticate_cookie;
-   xcb_screen_iterator_t s;
-   xcb_screen_t *screen;
-   int ret = 0;

-   s = xcb_setup_roots_iterator(xcb_get_setup(dri2_dpy->conn));
-
-   screen = get_xcb_screen(s, dri2_dpy->screen);
-   if (!screen) {
-      _eglLog(_EGL_WARNING, "DRI2: failed to get xcb screen");
-      return -1;
-   }
-
-   authenticate_cookie =
-      xcb_dri2_authenticate_unchecked(dri2_dpy->conn, screen->root, id);
-   authenticate =
-      xcb_dri2_authenticate_reply(dri2_dpy->conn, authenticate_cookie, NULL);
-
-   if (authenticate == NULL || !authenticate->authenticated)
-      ret = -1;
-
-   free(authenticate);
-   
-   return ret;
-}
-
-static EGLBoolean
-dri2_x11_local_authenticate(_EGLDisplay *disp)
-{
-#ifdef HAVE_LIBDRM
-   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
-   drm_magic_t magic;
-
-   if (drmGetMagic(dri2_dpy->fd, &magic)) {
-      _eglLog(_EGL_WARNING, "DRI2: failed to get drm magic");
-      return EGL_FALSE;
-   }
-   
-   if (dri2_x11_authenticate(disp, magic) < 0) {
-      _eglLog(_EGL_WARNING, "DRI2: failed to authenticate");
-      return EGL_FALSE;
-   }
-#endif
-   return EGL_TRUE;
+   return dri2_x11_do_authenticate(dri2_dpy, id);
 }

 static EGLBoolean
@@ -1390,9 +1403,6 @@ dri2_initialize_x11_dri2(_EGLDriver *drv, _EGLDisplay *disp)
   if (!dri2_x11_connect(dri2_dpy))
      goto cleanup_conn;

-   if (!dri2_x11_local_authenticate(disp))
-      goto cleanup_fd;
-
   if (!dri2_load_driver(disp))
      goto cleanup_fd;

--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -2130,8 +2130,8 @@ lp_build_fract(struct lp_build_context *bld,


 /**
- * Prevent returning a fractional part of 1.0 for very small negative values of
- * 'a' by clamping against 0.99999(9).
+ * Prevent returning 1.0 for very small negative values of 'a' by clamping
+ * against 0.99999(9). (Will also return that value for NaNs.)
 */
 static inline LLVMValueRef
 clamp_fract(struct lp_build_context *bld, LLVMValueRef fract)
@@ -2141,13 +2141,14 @@ clamp_fract(struct lp_build_context *bld, LLVMValueRef fract)
   /* this is the largest number smaller than 1.0 representable as float */
   max = lp_build_const_vec(bld->gallivm, bld->type,
                            1.0 - 1.0/(1LL << (lp_mantissa(bld->type) + 1)));
-   return lp_build_min(bld, fract, max);
+   return lp_build_min_ext(bld, fract, max,
+                           GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
 }


 /**
 * Same as lp_build_fract, but guarantees that the result is always smaller
- * than one.
+ * than one. Will also return the smaller-than-one value for infs, NaNs.
 */
 LLVMValueRef
 lp_build_fract_safe(struct lp_build_context *bld,
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
@@ -246,6 +246,12 @@ lp_build_coord_repeat_npot_linear_int(struct lp_build_sample_context *bld,
   mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
                           PIPE_FUNC_LESS, *coord0_i, int_coord_bld->zero);
   *coord0_i = lp_build_select(int_coord_bld, mask, length_minus_one, *coord0_i);
+   /*
+    * We should never get values too large - except if coord was nan or inf,
+    * in which case things go terribly wrong...
+    * Alternatively, could use fract_safe above...
+    */
+   *coord0_i = lp_build_min(int_coord_bld, *coord0_i, length_minus_one);
 }


@@ -490,6 +496,10 @@ lp_build_sample_wrap_linear_float(struct lp_build_sample_context *bld,
         *coord1 = lp_build_add(coord_bld, coord, half);
         coord = lp_build_sub(coord_bld, coord, half);
         *weight = lp_build_fract(coord_bld, coord);
+         /*
+          * It is important for this comparison to be unordered
+          * (or need fract_safe above).
+          */
         mask = lp_build_compare(coord_bld->gallivm, coord_bld->type,
                                 PIPE_FUNC_LESS, coord, coord_bld->zero);
         *coord0 = lp_build_select(coord_bld, mask, length_minus_one, coord);
@@ -514,7 +524,8 @@ lp_build_sample_wrap_linear_float(struct lp_build_sample_context *bld,
         coord = lp_build_sub(coord_bld, coord, half);
      }
      /* clamp to [0, length - 1] */
-      coord = lp_build_min(coord_bld, coord, length_minus_one);
+      coord = lp_build_min_ext(coord_bld, coord, length_minus_one,
+                               GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
      coord = lp_build_max(coord_bld, coord, coord_bld->zero);
      *coord1 = lp_build_add(coord_bld, coord, coord_bld->one);
      /* convert to int, compute lerp weight */
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -228,11 +228,16 @@ lp_build_coord_mirror(struct lp_build_sample_context *bld,
   LLVMValueRef fract, flr, isOdd;

   lp_build_ifloor_fract(coord_bld, coord, &flr, &fract);
+   /* kill off NaNs */
+   /* XXX: not safe without arch rounding, fract can be anything. */
+   fract = lp_build_max_ext(coord_bld, fract, coord_bld->zero,
+                            GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);

   /* isOdd = flr & 1 */
   isOdd = LLVMBuildAnd(bld->gallivm->builder, flr, int_coord_bld->one, "");

   /* make coord positive or negative depending on isOdd */
+   /* XXX slight overkill masking out sign bit is unnecessary */
   coord = lp_build_set_sign(coord_bld, fract, isOdd);

   /* convert isOdd to float */
@@ -272,10 +277,15 @@ lp_build_coord_repeat_npot_linear(struct lp_build_sample_context *bld,
    * we avoided the 0.5/length division before the repeat wrap,
    * now need to fix up edge cases with selects
    */
+   /*
+    * Note we do a float (unordered) compare so we can eliminate NaNs.
+    * (Otherwise would need fract_safe above).
+    */
+   mask = lp_build_compare(coord_bld->gallivm, coord_bld->type,
+                           PIPE_FUNC_LESS, coord_f, coord_bld->zero);
+
   /* convert to int, compute lerp weight */
   lp_build_ifloor_fract(coord_bld, coord_f, coord0_i, weight_f);
-   mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
-                           PIPE_FUNC_LESS, *coord0_i, int_coord_bld->zero);
   *coord0_i = lp_build_select(int_coord_bld, mask, length_minus_one, *coord0_i);
 }

@@ -375,7 +385,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
         }

         /* clamp to length max */
-         coord = lp_build_min(coord_bld, coord, length_f);
+         coord = lp_build_min_ext(coord_bld, coord, length_f,
+                                  GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
         /* subtract 0.5 */
         coord = lp_build_sub(coord_bld, coord, half);
         /* clamp to [0, length - 0.5] */
@@ -398,7 +409,7 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
         coord = lp_build_add(coord_bld, coord, offset);
      }
      /* was: clamp to [-0.5, length + 0.5], then sub 0.5 */
-      /* can skip clamp (though might not work for very large coord values */
+      /* can skip clamp (though might not work for very large coord values) */
      coord = lp_build_sub(coord_bld, coord, half);
      /* convert to int, compute lerp weight */
      lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
@@ -465,7 +476,8 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
         coord = lp_build_abs(coord_bld, coord);

         /* clamp to length max */
-         coord = lp_build_min(coord_bld, coord, length_f);
+         coord = lp_build_min_ext(coord_bld, coord, length_f,
+                                  GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
         /* subtract 0.5 */
         coord = lp_build_sub(coord_bld, coord, half);
         /* clamp to [0, length - 0.5] */
@@ -628,9 +640,15 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,

      /* itrunc == ifloor here */
      icoord = lp_build_itrunc(coord_bld, coord);
-
-      /* clamp to [0, length - 1] */
-      icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
+      /*
+       * Use unsigned min due to possible undef values (NaNs, overflow)
+       */
+      {
+         struct lp_build_context abs_coord_bld = *int_coord_bld;
+         abs_coord_bld.type.sign = FALSE;
+         /* clamp to [0, length - 1] */
+         icoord = lp_build_min(&abs_coord_bld, icoord, length_minus_one);
+      }
      break;

   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
@@ -2256,8 +2274,8 @@ lp_build_sample_general(struct lp_build_sample_context *bld,
             * All pixels require just nearest filtering, which is way
             * cheaper than linear, hence do a separate path for that.
             */
-            lp_build_sample_mipmap(bld, PIPE_TEX_FILTER_NEAREST, FALSE,
-                                   mip_filter_for_nearest,
+            lp_build_sample_mipmap(bld, PIPE_TEX_FILTER_NEAREST,
+                                   mip_filter_for_nearest, FALSE,
                                   coords, offsets,
                                   ilevel0, ilevel1, lod_fpart,
                                   texels);
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -432,6 +432,7 @@ tgsi_dump_declaration(
   const struct tgsi_full_declaration *decl )
 {
   struct dump_ctx ctx;
+   memset(&ctx, 0, sizeof(ctx));

   ctx.dump_printf = dump_ctx_printf;

@@ -480,6 +481,7 @@ void tgsi_dump_property(
   const struct tgsi_full_property *prop )
 {
   struct dump_ctx ctx;
+   memset(&ctx, 0, sizeof(ctx));

   ctx.dump_printf = dump_ctx_printf;

@@ -511,6 +513,7 @@ tgsi_dump_immediate(
   const struct tgsi_full_immediate *imm )
 {
   struct dump_ctx ctx;
+   memset(&ctx, 0, sizeof(ctx));

   ctx.dump_printf = dump_ctx_printf;

@@ -671,6 +674,7 @@ tgsi_dump_instruction(
   uint instno )
 {
   struct dump_ctx ctx;
+   memset(&ctx, 0, sizeof(ctx));

   ctx.instno = instno;
   ctx.immno = instno;
@@ -696,6 +700,7 @@ void
 tgsi_dump_to_file(const struct tgsi_token *tokens, uint flags, FILE *file)
 {
   struct dump_ctx ctx;
+   memset(&ctx, 0, sizeof(ctx));

   ctx.iter.prolog = prolog;
   ctx.iter.iterate_instruction = iter_instruction;
@@ -766,6 +771,7 @@ tgsi_dump_str(
   size_t size)
 {
   struct str_dump_ctx ctx;
+   memset(&ctx, 0, sizeof(ctx));

   ctx.base.iter.prolog = prolog;
   ctx.base.iter.iterate_instruction = iter_instruction;
@@ -805,6 +811,7 @@ tgsi_dump_instruction_str(
   size_t size)
 {
   struct str_dump_ctx ctx;
+   memset(&ctx, 0, sizeof(ctx));

   ctx.base.instno = instno;
   ctx.base.immno = instno;
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -1521,7 +1521,7 @@ void util_blitter_blit_generic(struct blitter_context *blitter,
 {
   struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
   struct pipe_context *pipe = ctx->base.pipe;
-   struct pipe_framebuffer_state fb_state;
+   struct pipe_framebuffer_state fb_state = {0};
   enum pipe_texture_target src_target = src->texture->target;
   unsigned src_samples = src->texture->nr_samples;
   unsigned dst_samples = dst->texture->nr_samples;
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -1601,7 +1601,7 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
   LLVMValueRef fs_src[4][TGSI_NUM_CHANNELS];
   LLVMValueRef fs_src1[4][TGSI_NUM_CHANNELS];
   LLVMValueRef src_alpha[4 * 4];
-   LLVMValueRef src1_alpha[4 * 4];
+   LLVMValueRef src1_alpha[4 * 4] = { NULL };
   LLVMValueRef src_mask[4 * 4];
   LLVMValueRef src[4 * 4];
   LLVMValueRef src1[4 * 4];
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -579,7 +579,7 @@ CodeEmitterGK110::emitIMUL(const Instruction *i)
   assert(!i->src(0).mod.neg() && !i->src(1).mod.neg());
   assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());

-   if (isLIMM(i->src(1), TYPE_S32)) {
+   if (i->src(1).getFile() == FILE_IMMEDIATE) {
      emitForm_L(i, 0x280, 2, Modifier(0));

      if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
@@ -750,6 +750,32 @@ CodeEmitterGK110::emitNOT(const Instruction *i)
 void
 CodeEmitterGK110::emitLogicOp(const Instruction *i, uint8_t subOp)
 {
+   if (i->def(0).getFile() == FILE_PREDICATE) {
+      code[0] = 0x00000002 | (subOp << 27);
+      code[1] = 0x84800000;
+
+      emitPredicate(i);
+
+      defId(i->def(0), 5);
+      srcId(i->src(0), 14);
+      if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 17;
+      srcId(i->src(1), 32);
+      if (i->src(1).mod == Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 3;
+
+      if (i->defExists(1)) {
+         defId(i->def(1), 2);
+      } else {
+         code[0] |= 7 << 2;
+      }
+      // (a OP b) OP c
+      if (i->predSrc != 2 && i->srcExists(2)) {
+         code[1] |= subOp << 16;
+         srcId(i->src(2), 42);
+         if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 13;
+      } else {
+         code[1] |= 7 << 10;
+      }
+   } else
   if (isLIMM(i->src(1), TYPE_S32)) {
      emitForm_L(i, 0x200, 0, i->src(1).mod);
      code[1] |= subOp << 24;
@@ -1738,6 +1764,9 @@ uses64bitAddress(const Instruction *ldst)
 void
 CodeEmitterGK110::emitATOM(const Instruction *i)
 {
+   const bool hasDst = i->defExists(0);
+   const bool exch = i->subOp == NV50_IR_SUBOP_ATOM_EXCH;
+
   code[0] = 0x00000002;
   if (i->subOp == NV50_IR_SUBOP_ATOM_CAS)
      code[1] = 0x77800000;
@@ -1766,15 +1795,21 @@ CodeEmitterGK110::emitATOM(const Instruction *i)
   /* TODO: cas: flip bits if $r255 is used */
   srcId(i->src(1), 23);

-   if (i->defExists(0))
+   if (hasDst) {
      defId(i->def(0), 2);
-   else
+   } else
+   if (!exch) {
      code[0] |= 255 << 2;
+   }

-   const int32_t offset = SDATA(i->src(0)).offset;
-   assert(offset < 0x80000 && offset >= -0x80000);
-   code[0] |= (offset & 1) << 31;
-   code[1] |= (offset & 0xffffe) >> 1;
+   if (hasDst || !exch) {
+      const int32_t offset = SDATA(i->src(0)).offset;
+      assert(offset < 0x80000 && offset >= -0x80000);
+      code[0] |= (offset & 1) << 31;
+      code[1] |= (offset & 0xffffe) >> 1;
+   } else {
+      srcAddr32(i->src(0), 31);
+   }

   if (i->getIndirect(0, 0)) {
      srcId(i->getIndirect(0, 0), 10);
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -813,8 +813,8 @@ CodeEmitterNVC0::emitLogicOp(const Instruction *i, uint8_t subOp)
      // (a OP b) OP c
      if (i->predSrc != 2 && i->srcExists(2)) {
         code[1] |= subOp << 21;
-         srcId(i->src(2), 17);
-         if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 20;
+         srcId(i->src(2), 49);
+         if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) code[1] |= 1 << 20;
      } else {
         code[1] |= 0x000e0000;
      }
--- a/src/gallium/drivers/nouveau/nouveau_vp3_video.c
+++ b/src/gallium/drivers/nouveau/nouveau_vp3_video.c
@@ -25,6 +25,8 @@
 #include <stdio.h>
 #include <fcntl.h>

+#include <nvif/class.h>
+
 #include "nouveau_screen.h"
 #include "nouveau_context.h"
 #include "nouveau_vp3_video.h"
@@ -351,6 +353,16 @@ nouveau_vp3_load_firmware(struct nouveau_vp3_decoder *dec,
   return 0;
 }

+static const struct nouveau_mclass
+nouveau_decoder_msvld[] = {
+   { G98_MSVLD, -1 },
+   { IGT21A_MSVLD, -1 },
+   { GT212_MSVLD, -1 },
+   { GF100_MSVLD, -1 },
+   { GK104_MSVLD, -1 },
+   {}
+};
+
 static int
 firmware_present(struct pipe_screen *pscreen, enum pipe_video_profile profile)
 {
@@ -368,13 +380,7 @@ firmware_present(struct pipe_screen *pscreen, enum pipe_video_profile profile)
      struct nvc0_fifo nvc0_args = {};
      struct nve0_fifo nve0_args = {.engine = NVE0_FIFO_ENGINE_BSP};
      void *data = NULL;
-      int size, oclass;
-      if (chipset < 0xc0)
-         oclass = 0x85b1;
-      else if (chipset < 0xe0)
-         oclass = 0x90b1;
-      else
-         oclass = 0x95b1;
+      int size;

      if (chipset < 0xc0) {
         data = &nv04_data;
@@ -393,7 +399,10 @@ firmware_present(struct pipe_screen *pscreen, enum pipe_video_profile profile)
                         data, size, &channel);

      if (channel) {
-         nouveau_object_new(channel, 0, oclass, NULL, 0, &bsp);
+         ret = nouveau_object_mclass(channel, nouveau_decoder_msvld);
+         if (ret >= 0)
+            nouveau_object_new(channel, 0, nouveau_decoder_msvld[ret].oclass,
+                               NULL, 0, &bsp);
         if (bsp)
            screen->firmware_info.profiles_present |= 1;
         nouveau_object_del(&bsp);
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
@@ -370,7 +370,7 @@ nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0,
   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
   struct nvc0_hw_query *hq = nvc0_hw_query(q);
   struct nv04_resource *buf = nv04_resource(resource);
-   unsigned stride;
+   unsigned qoffset = 0, stride;

   assert(!hq->funcs || !hq->funcs->get_query_result);

@@ -426,17 +426,27 @@ nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0,
   case PIPE_QUERY_PIPELINE_STATISTICS:
      stride = 12;
      break;
+   case PIPE_QUERY_TIME_ELAPSED:
+   case PIPE_QUERY_TIMESTAMP:
+      qoffset = 8;
+      /* fallthrough */
   default:
      assert(index == 0);
      stride = 1;
      break;
   }

-   if (hq->is64bit) {
-      nouveau_pushbuf_data(push, hq->bo, hq->offset + 16 * index,
-                           8 | NVC0_IB_ENTRY_1_NO_PREFETCH);
-      nouveau_pushbuf_data(push, hq->bo, hq->offset + 16 * (index + stride),
+   if (hq->is64bit || qoffset) {
+      nouveau_pushbuf_data(push, hq->bo, hq->offset + qoffset + 16 * index,
                           8 | NVC0_IB_ENTRY_1_NO_PREFETCH);
+      if (q->type == PIPE_QUERY_TIMESTAMP) {
+         PUSH_DATA(push, 0);
+         PUSH_DATA(push, 0);
+      } else {
+         nouveau_pushbuf_data(push, hq->bo, hq->offset + qoffset +
+                              16 * (index + stride),
+                              8 | NVC0_IB_ENTRY_1_NO_PREFETCH);
+      }
   } else {
      nouveau_pushbuf_data(push, hq->bo, hq->offset + 4,
                           4 | NVC0_IB_ENTRY_1_NO_PREFETCH);
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -78,7 +78,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
   case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
      return 15;
   case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
-      return (class_3d >= NVE4_3D_CLASS) ? 13 : 12;
+      return 12;
   case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
      return 2048;
   case PIPE_CAP_MIN_TEXEL_OFFSET:
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -213,13 +213,14 @@ static uint32_t r600_translate_dbformat(enum pipe_format format)

 static bool r600_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format)
 {
-	return r600_translate_texformat(screen, format, NULL, NULL, NULL) != ~0U;
+	return r600_translate_texformat(screen, format, NULL, NULL, NULL,
+                                   FALSE) != ~0U;
 }

 static bool r600_is_colorbuffer_format_supported(enum chip_class chip, enum pipe_format format)
 {
-	return r600_translate_colorformat(chip, format) != ~0U &&
-		r600_translate_colorswap(format) != ~0U;
+	return r600_translate_colorformat(chip, format, FALSE) != ~0U &&
+		r600_translate_colorswap(format, FALSE) != ~0U;
 }

 static bool r600_is_zs_format_supported(enum pipe_format format)
@@ -668,6 +669,7 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
 	unsigned base_level, first_level, last_level;
 	unsigned dim, last_layer;
 	uint64_t va;
+	bool do_endian_swap = FALSE;

 	if (!view)
 		return NULL;
@@ -713,16 +715,19 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
 		}
 	}

+	if (R600_BIG_ENDIAN)
+		do_endian_swap = !(tmp->is_depth && !tmp->is_flushing_texture);
+
 	format = r600_translate_texformat(ctx->screen, pipe_format,
 					  swizzle,
-					  &word4, &yuv_format);
+					  &word4, &yuv_format, do_endian_swap);
 	assert(format != ~0);
 	if (format == ~0) {
 		FREE(view);
 		return NULL;
 	}

-	endian = r600_colorformat_endian_swap(format);
+	endian = r600_colorformat_endian_swap(format, do_endian_swap);

 	base_level = 0;
 	first_level = state->u.tex.first_level;
@@ -980,9 +985,9 @@ void evergreen_init_color_surface_rat(struct r600_context *rctx,
 {
 	struct pipe_resource *pipe_buffer = surf->base.texture;
 	unsigned format = r600_translate_colorformat(rctx->b.chip_class,
-						     surf->base.format);
-	unsigned endian = r600_colorformat_endian_swap(format);
-	unsigned swap = r600_translate_colorswap(surf->base.format);
+						     surf->base.format, FALSE);
+	unsigned endian = r600_colorformat_endian_swap(format, FALSE);
+	unsigned swap = r600_translate_colorswap(surf->base.format, FALSE);
 	unsigned block_size =
 		align(util_format_get_blocksize(pipe_buffer->format), 4);
 	unsigned pitch_alignment =
@@ -1035,7 +1040,7 @@ void evergreen_init_color_surface(struct r600_context *rctx,
 	unsigned non_disp_tiling, macro_aspect, tile_split, bankh, bankw, fmask_bankh, nbanks;
 	const struct util_format_description *desc;
 	int i;
-	bool blend_clamp = 0, blend_bypass = 0;
+	bool blend_clamp = 0, blend_bypass = 0, do_endian_swap = FALSE;

 	offset = rtex->surface.level[level].offset;
 	if (rtex->surface.level[level].mode == RADEON_SURF_MODE_LINEAR) {
@@ -1133,13 +1138,17 @@ void evergreen_init_color_surface(struct r600_context *rctx,
 			ntype = V_028C70_NUMBER_UINT;
 	}

-	format = r600_translate_colorformat(rctx->b.chip_class, surf->base.format);
+	if (R600_BIG_ENDIAN)
+		do_endian_swap = !(rtex->is_depth && !rtex->is_flushing_texture);
+
+	format = r600_translate_colorformat(rctx->b.chip_class, surf->base.format,
+			                              do_endian_swap);
 	assert(format != ~0);

-	swap = r600_translate_colorswap(surf->base.format);
+	swap = r600_translate_colorswap(surf->base.format, do_endian_swap);
 	assert(swap != ~0);

-	endian = r600_colorformat_endian_swap(format);
+	endian = r600_colorformat_endian_swap(format, do_endian_swap);

 	/* blend clamp should be set for all NORM/SRGB types */
 	if (ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM ||
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -769,9 +769,11 @@ unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format,
 				   boolean vtx);
 uint32_t r600_translate_texformat(struct pipe_screen *screen, enum pipe_format format,
 				  const unsigned char *swizzle_view,
-				  uint32_t *word4_p, uint32_t *yuv_format_p);
-uint32_t r600_translate_colorformat(enum chip_class chip, enum pipe_format format);
-uint32_t r600_colorformat_endian_swap(uint32_t colorformat);
+				  uint32_t *word4_p, uint32_t *yuv_format_p,
+				  bool do_endian_swap);
+uint32_t r600_translate_colorformat(enum chip_class chip, enum pipe_format format,
+				  bool do_endian_swap);
+uint32_t r600_colorformat_endian_swap(uint32_t colorformat, bool do_endian_swap);

 /* r600_uvd.c */
 struct pipe_video_codec *r600_uvd_create_decoder(struct pipe_context *context,
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -7787,6 +7787,15 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx)
 	int i, r, j;
 	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
 	int temp_regs[3];
+	unsigned op;
+
+	if (ctx->src[0].abs && ctx->src[0].neg) {
+		op = ALU_OP3_CNDE;
+		ctx->src[0].abs = 0;
+		ctx->src[0].neg = 0;
+	} else {
+		op = ALU_OP3_CNDGE;
+	}

 	for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
 		temp_regs[j] = 0;
@@ -7799,7 +7808,7 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx)
 			continue;

 		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
-		alu.op = ALU_OP3_CNDGE;
+		alu.op = op;
 		r = tgsi_make_src_for_op3(ctx, temp_regs[0], i, &alu.src[0], &ctx->src[0]);
 		if (r)
 			return r;
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -143,13 +143,14 @@ static uint32_t r600_translate_dbformat(enum pipe_format format)

 static bool r600_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format)
 {
-	return r600_translate_texformat(screen, format, NULL, NULL, NULL) != ~0U;
+	return r600_translate_texformat(screen, format, NULL, NULL, NULL,
+                                   FALSE) != ~0U;
 }

 static bool r600_is_colorbuffer_format_supported(enum chip_class chip, enum pipe_format format)
 {
-	return r600_translate_colorformat(chip, format) != ~0U &&
-	       r600_translate_colorswap(format) != ~0U;
+	return r600_translate_colorformat(chip, format, FALSE) != ~0U &&
+	       r600_translate_colorswap(format, FALSE) != ~0U;
 }

 static bool r600_is_zs_format_supported(enum pipe_format format)
@@ -641,6 +642,7 @@ r600_create_sampler_view_custom(struct pipe_context *ctx,
 	uint32_t word4 = 0, yuv_format = 0, pitch = 0;
 	unsigned char swizzle[4], array_mode = 0;
 	unsigned width, height, depth, offset_level, last_level;
+	bool do_endian_swap = FALSE;

 	if (!view)
 		return NULL;
@@ -661,9 +663,12 @@ r600_create_sampler_view_custom(struct pipe_context *ctx,
 	swizzle[2] = state->swizzle_b;
 	swizzle[3] = state->swizzle_a;

+	if (R600_BIG_ENDIAN)
+		do_endian_swap = !(tmp->is_depth && !tmp->is_flushing_texture);
+
 	format = r600_translate_texformat(ctx->screen, state->format,
 					  swizzle,
-					  &word4, &yuv_format);
+					  &word4, &yuv_format, do_endian_swap);
 	assert(format != ~0);
 	if (format == ~0) {
 		FREE(view);
@@ -678,7 +683,7 @@ r600_create_sampler_view_custom(struct pipe_context *ctx,
 		tmp = tmp->flushed_depth_texture;
 	}

-	endian = r600_colorformat_endian_swap(format);
+	endian = r600_colorformat_endian_swap(format, do_endian_swap);

 	offset_level = state->u.tex.first_level;
 	last_level = state->u.tex.last_level - offset_level;
@@ -861,7 +866,7 @@ static void r600_init_color_surface(struct r600_context *rctx,
 	unsigned offset;
 	const struct util_format_description *desc;
 	int i;
-	bool blend_bypass = 0, blend_clamp = 1;
+	bool blend_bypass = 0, blend_clamp = 1, do_endian_swap = FALSE;

 	if (rtex->is_depth && !rtex->is_flushing_texture && !r600_can_read_depth(rtex)) {
 		r600_init_flushed_depth_texture(&rctx->b.b, surf->base.texture, NULL);
@@ -924,13 +929,17 @@ static void r600_init_color_surface(struct r600_context *rctx,
 			ntype = V_0280A0_NUMBER_UINT;
 	}

-	format = r600_translate_colorformat(rctx->b.chip_class, surf->base.format);
+	if (R600_BIG_ENDIAN)
+		do_endian_swap = !(rtex->is_depth && !rtex->is_flushing_texture);
+
+	format = r600_translate_colorformat(rctx->b.chip_class, surf->base.format,
+			                              do_endian_swap);
 	assert(format != ~0);

-	swap = r600_translate_colorswap(surf->base.format);
+	swap = r600_translate_colorswap(surf->base.format, do_endian_swap);
 	assert(swap != ~0);

-	endian = r600_colorformat_endian_swap(format);
+	endian = r600_colorformat_endian_swap(format, do_endian_swap);

 	/* set blend bypass according to docs if SINT/UINT or
 	   8/24 COLOR variants */
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -2229,7 +2229,8 @@ unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format,
 uint32_t r600_translate_texformat(struct pipe_screen *screen,
 				  enum pipe_format format,
 				  const unsigned char *swizzle_view,
-				  uint32_t *word4_p, uint32_t *yuv_format_p)
+				  uint32_t *word4_p, uint32_t *yuv_format_p,
+				  bool do_endian_swap)
 {
 	struct r600_screen *rscreen = (struct r600_screen *)screen;
 	uint32_t result = 0, word4 = 0, yuv_format = 0;
@@ -2239,6 +2240,9 @@ uint32_t r600_translate_texformat(struct pipe_screen *screen,
 	bool is_srgb_valid = FALSE;
 	const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
 	const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1};
+	const unsigned char swizzle_xxxy[4] = {0, 0, 0, 1};
+	const unsigned char swizzle_zyx1[4] = {2, 1, 0, 5};
+	const unsigned char swizzle_zyxw[4] = {2, 1, 0, 3};

 	int i;
 	const uint32_t sign_bit[4] = {
@@ -2247,11 +2251,41 @@ uint32_t r600_translate_texformat(struct pipe_screen *screen,
 		S_038010_FORMAT_COMP_Z(V_038010_SQ_FORMAT_COMP_SIGNED),
 		S_038010_FORMAT_COMP_W(V_038010_SQ_FORMAT_COMP_SIGNED)
 	};
+
+	/* Need to replace the specified texture formats in case of big-endian.
+	 * These formats are formats that have channels with number of bits
+	 * not divisible by 8.
+	 * Mesa conversion functions don't swap bits for those formats, and because
+	 * we transmit this over a serial bus to the GPU (PCIe), the
+	 * bit-endianess is important!!!
+	 * In case we have an "opposite" format, just use that for the swizzling
+	 * information. If we don't have such an "opposite" format, we need
+	 * to use a fixed swizzle info instead (see below)
+	 */
+	if (format == PIPE_FORMAT_R4A4_UNORM && do_endian_swap)
+		format = PIPE_FORMAT_A4R4_UNORM;
+
 	desc = util_format_description(format);

 	/* Depth and stencil swizzling is handled separately. */
 	if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) {
-		word4 |= r600_get_swizzle_combined(desc->swizzle, swizzle_view, FALSE);
+		/* Need to check for specific texture formats that don't have
+		 * an "opposite" format we can use. For those formats, we directly
+		 * specify the swizzling, which is the LE swizzling as defined in
+		 * u_format.csv
+		 */
+		if (do_endian_swap) {
+			if (format == PIPE_FORMAT_L4A4_UNORM)
+				word4 |= r600_get_swizzle_combined(swizzle_xxxy, swizzle_view, FALSE);
+			else if (format == PIPE_FORMAT_B4G4R4A4_UNORM)
+				word4 |= r600_get_swizzle_combined(swizzle_zyxw, swizzle_view, FALSE);
+			else if (format == PIPE_FORMAT_B4G4R4X4_UNORM || format == PIPE_FORMAT_B5G6R5_UNORM)
+				word4 |= r600_get_swizzle_combined(swizzle_zyx1, swizzle_view, FALSE);
+			else
+				word4 |= r600_get_swizzle_combined(desc->swizzle, swizzle_view, FALSE);
+		} else {
+			word4 |= r600_get_swizzle_combined(desc->swizzle, swizzle_view, FALSE);
+		}
 	}

 	/* Colorspace (return non-RGB formats directly). */
@@ -2602,7 +2636,8 @@ out_unknown:
 	return ~0;
 }

-uint32_t r600_translate_colorformat(enum chip_class chip, enum pipe_format format)
+uint32_t r600_translate_colorformat(enum chip_class chip, enum pipe_format format,
+						bool do_endian_swap)
 {
 	const struct util_format_description *desc = util_format_description(format);
 	int channel = util_format_get_first_non_void_channel(format);
@@ -2660,7 +2695,7 @@ uint32_t r600_translate_colorformat(enum chip_class chip, enum pipe_format forma
 					return V_0280A0_COLOR_32_32;
 			}
 		} else if (HAS_SIZE(8,24,0,0)) {
-			return V_0280A0_COLOR_24_8;
+			return (do_endian_swap ? V_0280A0_COLOR_8_24 : V_0280A0_COLOR_24_8);
 		} else if (HAS_SIZE(24,8,0,0)) {
 			return V_0280A0_COLOR_8_24;
 		}
@@ -2702,7 +2737,7 @@ uint32_t r600_translate_colorformat(enum chip_class chip, enum pipe_format forma
 	return ~0U;
 }

-uint32_t r600_colorformat_endian_swap(uint32_t colorformat)
+uint32_t r600_colorformat_endian_swap(uint32_t colorformat, bool do_endian_swap)
 {
 	if (R600_BIG_ENDIAN) {
 		switch(colorformat) {
@@ -2712,17 +2747,24 @@ uint32_t r600_colorformat_endian_swap(uint32_t colorformat)
 			return ENDIAN_NONE;

 		/* 16-bit buffers. */
+		case V_0280A0_COLOR_8_8:
+			/*
+			 * No need to do endian swaps on array formats,
+			 * as mesa<-->pipe formats conversion take into account
+			 * the endianess
+			 */
+			return ENDIAN_NONE;
+
 		case V_0280A0_COLOR_5_6_5:
 		case V_0280A0_COLOR_1_5_5_5:
 		case V_0280A0_COLOR_4_4_4_4:
 		case V_0280A0_COLOR_16:
-		case V_0280A0_COLOR_8_8:
-			return ENDIAN_8IN16;
+			return (do_endian_swap ? ENDIAN_8IN16 : ENDIAN_NONE);

 		/* 32-bit buffers. */
 		case V_0280A0_COLOR_8_8_8_8:
 			/*
-			 * No need to do endian swaps on four 8-bits components,
+			 * No need to do endian swaps on array formats,
 			 * as mesa<-->pipe formats conversion take into account
 			 * the endianess
 			 */
@@ -2732,9 +2774,11 @@ uint32_t r600_colorformat_endian_swap(uint32_t colorformat)
 		case V_0280A0_COLOR_8_24:
 		case V_0280A0_COLOR_24_8:
 		case V_0280A0_COLOR_32_FLOAT:
+			return (do_endian_swap ? ENDIAN_8IN32 : ENDIAN_NONE);
+
 		case V_0280A0_COLOR_16_16_FLOAT:
 		case V_0280A0_COLOR_16_16:
-			return ENDIAN_8IN32;
+			return ENDIAN_8IN16;

 		/* 64-bit buffers. */
 		case V_0280A0_COLOR_16_16_16_16:
@@ -2781,7 +2825,8 @@ static void r600_invalidate_buffer(struct pipe_context *ctx, struct pipe_resourc
 	}
 	/* Streamout buffers. */
 	for (i = 0; i < rctx->b.streamout.num_targets; i++) {
-		if (rctx->b.streamout.targets[i]->b.buffer == &rbuffer->b.b) {
+		if (rctx->b.streamout.targets[i] &&
+		    rctx->b.streamout.targets[i]->b.buffer == &rbuffer->b.b) {
 			if (rctx->b.streamout.begin_emitted) {
 				r600_emit_streamout_end(&rctx->b);
 			}
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -358,6 +358,10 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
 				       0, 0, resource, level, box);

 			data = r600_buffer_map_sync_with_rings(rctx, staging, PIPE_TRANSFER_READ);
+			if (!data) {
+				pipe_resource_reference((struct pipe_resource **)&staging, NULL);
+				return NULL;
+			}
 			data += box->x % R600_MAP_BUFFER_ALIGNMENT;

 			return r600_buffer_get_transfer(ctx, resource, level, usage, box,
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -576,7 +576,7 @@ struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe,
 						struct pipe_resource *texture,
 						const struct pipe_surface *templ,
 						unsigned width, unsigned height);
-unsigned r600_translate_colorswap(enum pipe_format format);
+unsigned r600_translate_colorswap(enum pipe_format format, bool do_endian_swap);
 void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
 				   struct pipe_framebuffer_state *fb,
 				   struct r600_atom *fb_state,
--- a/src/gallium/drivers/radeon/r600_streamout.c
+++ b/src/gallium/drivers/radeon/r600_streamout.c
@@ -116,7 +116,7 @@ void r600_set_streamout_targets(struct pipe_context *ctx,
 {
 	struct r600_common_context *rctx = (struct r600_common_context *)ctx;
 	unsigned i;
-        unsigned append_bitmask = 0;
+        unsigned enabled_mask = 0, append_bitmask = 0;

 	/* Stop streamout. */
 	if (rctx->streamout.num_targets && rctx->streamout.begin_emitted) {
@@ -126,18 +126,19 @@ void r600_set_streamout_targets(struct pipe_context *ctx,
 	/* Set the new targets. */
 	for (i = 0; i < num_targets; i++) {
 		pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->streamout.targets[i], targets[i]);
+		if (!targets[i])
+			continue;
+
 		r600_context_add_resource_size(ctx, targets[i]->buffer);
+		enabled_mask |= 1 << i;
 		if (offsets[i] == ((unsigned)-1))
-			append_bitmask |=  1 << i;
+			append_bitmask |= 1 << i;
 	}
 	for (; i < rctx->streamout.num_targets; i++) {
 		pipe_so_target_reference((struct pipe_stream_output_target**)&rctx->streamout.targets[i], NULL);
 	}

-	rctx->streamout.enabled_mask = (num_targets >= 1 && targets[0] ? 1 : 0) |
-				       (num_targets >= 2 && targets[1] ? 2 : 0) |
-				       (num_targets >= 3 && targets[2] ? 4 : 0) |
-				       (num_targets >= 4 && targets[3] ? 8 : 0);
+	rctx->streamout.enabled_mask = enabled_mask;

 	rctx->streamout.num_targets = num_targets;
 	rctx->streamout.append_bitmask = append_bitmask;
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -1258,7 +1258,7 @@ static void r600_surface_destroy(struct pipe_context *pipe,
 	FREE(surface);
 }

-unsigned r600_translate_colorswap(enum pipe_format format)
+unsigned r600_translate_colorswap(enum pipe_format format, bool do_endian_swap)
 {
 	const struct util_format_description *desc = util_format_description(format);

@@ -1285,7 +1285,8 @@ unsigned r600_translate_colorswap(enum pipe_format format)
 		else if ((HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,X)) ||
 			 (HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,NONE)) ||
 		         (HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,X)))
-			return V_0280A0_SWAP_STD_REV; /* YX__ */
+			/* YX__ */
+			return (do_endian_swap ? V_0280A0_SWAP_STD : V_0280A0_SWAP_STD_REV);
 		else if (HAS_SWIZZLE(0,X) && HAS_SWIZZLE(3,Y))
 			return V_0280A0_SWAP_ALT; /* X__Y */
 		else if (HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(3,X))
@@ -1293,20 +1294,25 @@ unsigned r600_translate_colorswap(enum pipe_format format)
 		break;
 	case 3:
 		if (HAS_SWIZZLE(0,X))
-			return V_0280A0_SWAP_STD; /* XYZ */
+			return (do_endian_swap ? V_0280A0_SWAP_STD_REV : V_0280A0_SWAP_STD);
 		else if (HAS_SWIZZLE(0,Z))
 			return V_0280A0_SWAP_STD_REV; /* ZYX */
 		break;
 	case 4:
 		/* check the middle channels, the 1st and 4th channel can be NONE */
-		if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,Z))
+		if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,Z)) {
 			return V_0280A0_SWAP_STD; /* XYZW */
-		else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,Y))
+		} else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,Y)) {
 			return V_0280A0_SWAP_STD_REV; /* WZYX */
-		else if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,X))
+		} else if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,X)) {
 			return V_0280A0_SWAP_ALT; /* ZYXW */
-		else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,W))
-			return V_0280A0_SWAP_ALT_REV; /* YZWX */
+		} else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,W)) {
+			/* YZWX */
+			if (desc->is_array)
+				return V_0280A0_SWAP_ALT_REV;
+			else
+				return (do_endian_swap ? V_0280A0_SWAP_ALT : V_0280A0_SWAP_ALT_REV);
+		}
 		break;
 	}
 	return ~0U;
@@ -1357,7 +1363,7 @@ static void vi_get_fast_clear_parameters(enum pipe_format surface_format,
 	    surface_format == PIPE_FORMAT_B5G6R5_SRGB) {
 		extra_channel = -1;
 	} else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
-		if(r600_translate_colorswap(surface_format) <= 1)
+		if(r600_translate_colorswap(surface_format, FALSE) <= 1)
 			extra_channel = desc->nr_channels - 1;
 		else
 			extra_channel = 0;
--- a/src/gallium/drivers/radeon/radeon_uvd.c
+++ b/src/gallium/drivers/radeon/radeon_uvd.c
@@ -57,6 +57,7 @@

 #define FB_BUFFER_OFFSET 0x1000
 #define FB_BUFFER_SIZE 2048
+#define FB_BUFFER_SIZE_TONGA (2048 * 64)
 #define IT_SCALING_TABLE_SIZE 992

 /* UVD decoder representation */
@@ -78,6 +79,7 @@ struct ruvd_decoder {
 	struct rvid_buffer		msg_fb_it_buffers[NUM_BUFFERS];
 	struct ruvd_msg			*msg;
 	uint32_t			*fb;
+	unsigned			fb_size;
 	uint8_t				*it;

 	struct rvid_buffer		bs_buffers[NUM_BUFFERS];
@@ -148,7 +150,7 @@ static void map_msg_fb_it_buf(struct ruvd_decoder *dec)
 	dec->msg = (struct ruvd_msg *)ptr;
 	dec->fb = (uint32_t *)(ptr + FB_BUFFER_OFFSET);
 	if (have_it(dec))
-		dec->it = (uint8_t *)(ptr + FB_BUFFER_OFFSET + FB_BUFFER_SIZE);
+		dec->it = (uint8_t *)(ptr + FB_BUFFER_OFFSET + dec->fb_size);
 }

 /* unmap and send a message command to the VCPU */
@@ -958,7 +960,7 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder,

 	dec->msg->body.decode.dpb_size = dec->dpb.res->buf->size;
 	dec->msg->body.decode.bsd_size = bs_size;
-	dec->msg->body.decode.db_pitch = dec->base.width;
+	dec->msg->body.decode.db_pitch = align(dec->base.width, 16);

 	dt = dec->set_dtb(dec->msg, (struct vl_video_buffer *)target);
 	if (((struct r600_common_screen*)dec->screen)->family >= CHIP_STONEY)
@@ -994,7 +996,7 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder,
 	dec->msg->body.decode.extension_support = 0x1;

 	/* set at least the feedback buffer size */
-	dec->fb[0] = FB_BUFFER_SIZE;
+	dec->fb[0] = dec->fb_size;

 	send_msg_buf(dec);

@@ -1012,7 +1014,7 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder,
 		 FB_BUFFER_OFFSET, RADEON_USAGE_WRITE, RADEON_DOMAIN_GTT);
 	if (have_it(dec))
 		send_cmd(dec, RUVD_CMD_ITSCALING_TABLE_BUFFER, msg_fb_it_buf->res->buf,
-			 FB_BUFFER_OFFSET + FB_BUFFER_SIZE, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
+			 FB_BUFFER_OFFSET + dec->fb_size, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
 	set_reg(dec, RUVD_ENGINE_CNTL, 1);

 	flush(dec);
@@ -1092,9 +1094,11 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context,
 		goto error;
 	}

+	dec->fb_size = (info.family == CHIP_TONGA) ? FB_BUFFER_SIZE_TONGA :
+			FB_BUFFER_SIZE;
 	bs_buf_size = width * height * 512 / (16 * 16);
 	for (i = 0; i < NUM_BUFFERS; ++i) {
-		unsigned msg_fb_it_size = FB_BUFFER_OFFSET + FB_BUFFER_SIZE;
+		unsigned msg_fb_it_size = FB_BUFFER_OFFSET + dec->fb_size;
 		STATIC_ASSERT(sizeof(struct ruvd_msg) <= FB_BUFFER_OFFSET);
 		if (have_it(dec))
 			msg_fb_it_size += IT_SCALING_TABLE_SIZE;
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -482,7 +482,7 @@ void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuf

 	u_upload_alloc(sctx->b.uploader, 0, size, 256, const_offset,
 		       (struct pipe_resource**)rbuffer, &tmp);
-	if (rbuffer)
+	if (*rbuffer)
 		util_memcpy_cpu_to_le32(tmp, ptr, size);
 }

--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -1966,7 +1966,7 @@ static bool si_is_vertex_format_supported(struct pipe_screen *screen, enum pipe_
 static bool si_is_colorbuffer_format_supported(enum pipe_format format)
 {
 	return si_translate_colorformat(format) != V_028C70_COLOR_INVALID &&
-		r600_translate_colorswap(format) != ~0U;
+		r600_translate_colorswap(format, FALSE) != ~0U;
 }

 static bool si_is_zs_format_supported(enum pipe_format format)
@@ -2249,7 +2249,7 @@ static void si_initialize_color_surface(struct si_context *sctx,
 		R600_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format);
 	}
 	assert(format != V_028C70_COLOR_INVALID);
-	swap = r600_translate_colorswap(surf->base.format);
+	swap = r600_translate_colorswap(surf->base.format, FALSE);
 	endian = si_colorformat_endian_swap(format);

 	/* blend clamp should be set for all NORM/SRGB types */
@@ -2461,9 +2461,21 @@ static void si_init_depth_surface(struct si_context *sctx,
 		z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
 			  S_028040_ALLOW_EXPCLEAR(1);

-		if (rtex->surface.flags & RADEON_SURF_SBUFFER)
-			s_info |= S_028044_ALLOW_EXPCLEAR(1);
-		else
+		if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
+			/* Workaround: For a not yet understood reason, the
+			 * combination of MSAA, fast stencil clear and stencil
+			 * decompress messes with subsequent stencil buffer
+			 * uses. Problem was reproduced on Verde, Bonaire,
+			 * Tonga, and Carrizo.
+			 *
+			 * Disabling EXPCLEAR works around the problem.
+			 *
+			 * Check piglit's arb_texture_multisample-stencil-clear
+			 * test if you want to try changing this.
+			 */
+			if (rtex->resource.b.b.nr_samples <= 1)
+				s_info |= S_028044_ALLOW_EXPCLEAR(1);
+		} else
 			/* Use all of the htile_buffer for depth if there's no stencil. */
 			s_info |= S_028044_TILE_STENCIL_DISABLE(1);

@@ -3071,7 +3083,7 @@ si_create_sampler_view_custom(struct pipe_context *ctx,

 	if (tmp->dcc_buffer) {
 		uint64_t dcc_offset = surflevel[base_level].dcc_offset;
-		unsigned swap = r600_translate_colorswap(pipe_format);
+		unsigned swap = r600_translate_colorswap(pipe_format, FALSE);

 		view->state[6] = S_008F28_COMPRESSION_EN(1) | S_008F28_ALPHA_IS_ON_MSB(swap <= 1);
 		view->state[7] = (tmp->dcc_buffer->gpu_address + dcc_offset) >> 8;
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -2137,6 +2137,7 @@ vc4_update_compiled_fs(struct vc4_context *vc4, uint8_t prim_mode)
                            VC4_DIRTY_FRAMEBUFFER |
                            VC4_DIRTY_ZSA |
                            VC4_DIRTY_RASTERIZER |
+                            VC4_DIRTY_SAMPLE_MASK |
                            VC4_DIRTY_FRAGTEX |
                            VC4_DIRTY_TEXSTATE |
                            VC4_DIRTY_UNCOMPILED_FS))) {
--- a/src/gallium/drivers/vc4/vc4_resource.c
+++ b/src/gallium/drivers/vc4/vc4_resource.c
@@ -588,7 +588,8 @@ vc4_create_surface(struct pipe_context *pctx,
        psurf->u.tex.level = level;
        psurf->u.tex.first_layer = surf_tmpl->u.tex.first_layer;
        psurf->u.tex.last_layer = surf_tmpl->u.tex.last_layer;
-        surface->offset = rsc->slices[level].offset;
+        surface->offset = (rsc->slices[level].offset +
+                           psurf->u.tex.first_layer * rsc->cube_map_stride);
        surface->tiling = rsc->slices[level].tiling;

        return &surface->base;
--- a/src/gallium/drivers/vc4/vc4_screen.c
+++ b/src/gallium/drivers/vc4/vc4_screen.c
@@ -380,6 +380,9 @@ vc4_screen_is_format_supported(struct pipe_screen *pscreen,
 {
        unsigned retval = 0;

+        if (sample_count > 1 && sample_count != VC4_MAX_SAMPLES)
+                return FALSE;
+
        if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
            !util_format_is_supported(format, usage)) {
                return FALSE;
@@ -439,14 +442,12 @@ vc4_screen_is_format_supported(struct pipe_screen *pscreen,
        }

        if ((usage & PIPE_BIND_RENDER_TARGET) &&
-            (sample_count == 0 || sample_count == VC4_MAX_SAMPLES) &&
            vc4_rt_format_supported(format)) {
                retval |= PIPE_BIND_RENDER_TARGET;
        }

        if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
-            (sample_count == 0 || sample_count == VC4_MAX_SAMPLES) &&
-            (vc4_tex_format_supported(format))) {
+            vc4_tex_format_supported(format)) {
                retval |= PIPE_BIND_SAMPLER_VIEW;
        }

--- a/src/gallium/drivers/vc4/vc4_tiling.c
+++ b/src/gallium/drivers/vc4/vc4_tiling.c
@@ -140,8 +140,8 @@ vc4_load_lt_image(void *dst, uint32_t dst_stride,
 {
        uint32_t utile_w = vc4_utile_width(cpp);
        uint32_t utile_h = vc4_utile_height(cpp);
-        uint32_t xstart = box->x / utile_w;
-        uint32_t ystart = box->y / utile_h;
+        uint32_t xstart = box->x;
+        uint32_t ystart = box->y;

        for (uint32_t y = 0; y < box->height; y += utile_h) {
                for (int x = 0; x < box->width; x += utile_w) {
@@ -161,8 +161,8 @@ vc4_store_lt_image(void *dst, uint32_t dst_stride,
 {
        uint32_t utile_w = vc4_utile_width(cpp);
        uint32_t utile_h = vc4_utile_height(cpp);
-        uint32_t xstart = box->x / utile_w;
-        uint32_t ystart = box->y / utile_h;
+        uint32_t xstart = box->x;
+        uint32_t ystart = box->y;

        for (uint32_t y = 0; y < box->height; y += utile_h) {
                for (int x = 0; x < box->width; x += utile_w) {
--- a/src/gallium/winsys/sw/dri/dri_sw_winsys.c
+++ b/src/gallium/winsys/sw/dri/dri_sw_winsys.c
@@ -125,7 +125,7 @@ dri_sw_displaytarget_destroy(struct sw_winsys *ws,
 {
   struct dri_sw_displaytarget *dri_sw_dt = dri_sw_displaytarget(dt);

-   FREE(dri_sw_dt->data);
+   align_free(dri_sw_dt->data);

   FREE(dri_sw_dt);
 }
--- a/src/gallium/winsys/sw/xlib/xlib_sw_winsys.c
+++ b/src/gallium/winsys/sw/xlib/xlib_sw_winsys.c
@@ -268,7 +268,7 @@ xlib_displaytarget_destroy(struct sw_winsys *ws,
            xlib_dt->tempImage->data = NULL;
      }
      else {
-         FREE(xlib_dt->data);
+         align_free(xlib_dt->data);
         if (xlib_dt->tempImage && xlib_dt->tempImage->data == xlib_dt->data) {
            xlib_dt->tempImage->data = NULL;
         }
--- a/src/glx/dri2_glx.c
+++ b/src/glx/dri2_glx.c
@@ -520,7 +520,7 @@ dri2GetCurrentContext()
   struct glx_context *gc = __glXGetCurrentContext();
   struct dri2_context *dri2Ctx = (struct dri2_context *)gc;

-   return dri2Ctx ? dri2Ctx->driContext : NULL;
+   return (gc != &dummyContext) ? dri2Ctx->driContext : NULL;
 }

 /**
--- a/src/glx/dri3_glx.c
+++ b/src/glx/dri3_glx.c
@@ -127,13 +127,9 @@ static __DRIcontext *
 glx_dri3_get_dri_context(struct loader_dri3_drawable *draw)
 {
   struct glx_context *gc = __glXGetCurrentContext();
+   struct dri3_context *dri3Ctx = (struct dri3_context *) gc;

-   if (gc) {
-      struct dri3_context *dri3Ctx = (struct dri3_context *) gc;
-      return dri3Ctx->driContext;
-   }
-
-   return NULL;
+   return (gc != &dummyContext) ? dri3Ctx->driContext : NULL;
 }

 static void
--- a/src/mapi/glapi/glapi_getproc.c
+++ b/src/mapi/glapi/glapi_getproc.c
@@ -62,12 +62,7 @@ get_static_proc( const char * n )
   GLuint i;
   for (i = 0; static_functions[i].Name_offset >= 0; i++) {
      const char *testName = gl_string_table + static_functions[i].Name_offset;
-#ifdef MANGLE
-      /* skip the prefix on the name */
-      if (strcmp(testName, n + 1) == 0)
-#else
      if (strcmp(testName, n) == 0)
-#endif
      {
 	 return &static_functions[i];
      }
@@ -516,15 +511,14 @@ _glapi_get_proc_address(const char *funcName)

   init_glapi_relocs_once();

-#ifdef MANGLE
-   /* skip the prefix on the name */
-   if (funcName[1] != 'g' || funcName[2] != 'l')
-      return NULL;
-#else
-   if (funcName[0] != 'g' || funcName[1] != 'l')
-      return NULL;
+#ifdef USE_MGL_NAMESPACE
+   if (funcName && funcName[0] == 'm')
+      funcName++;
 #endif

+  if (!funcName || funcName[0] != 'g' || funcName[1] != 'l')
+      return NULL;
+
   /* search extension functions first */
   func = get_extension_proc_address(funcName);
   if (func)
--- a/src/mesa/drivers/dri/common/dri_util.c
+++ b/src/mesa/drivers/dri/common/dri_util.c
@@ -376,11 +376,32 @@ driCreateContextAttribs(__DRIscreen *screen, int api,
     *     EGL_CONTEXT_FLAGS_KHR, then a <debug context> will be created.
     *     [...] This bit is supported for OpenGL and OpenGL ES contexts.
     *
-     * None of the other flags have any meaning in an ES context, so this seems safe.
+     * No other EGL_CONTEXT_OPENGL_*_BIT is legal for an ES context.
+     *
+     * However, Mesa's EGL layer translates the context attribute
+     * EGL_CONTEXT_OPENGL_ROBUST_ACCESS into the context flag
+     * __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS.  That attribute is legal for ES
+     * (with EGL 1.5 or EGL_EXT_create_context_robustness) and GL (only with
+     * EGL 1.5).
+     *
+     * From the EGL_EXT_create_context_robustness spec:
+     *
+     *     This extension is written against the OpenGL ES 2.0 Specification
+     *     but can apply to OpenGL ES 1.1 and up.
+     *
+     * From the EGL 1.5 (2014.08.27) spec, p55:
+     *
+     *     If the EGL_CONTEXT_OPENGL_ROBUST_ACCESS attribute is set to
+     *     EGL_TRUE, a context supporting robust buffer access will be created.
+     *     OpenGL contexts must support the GL_ARB_robustness extension, or
+     *     equivalent core API functional- ity. OpenGL ES contexts must support
+     *     the GL_EXT_robustness extension, or equivalent core API
+     *     functionality.
     */
    if (mesa_api != API_OPENGL_COMPAT
        && mesa_api != API_OPENGL_CORE
-        && (flags & ~__DRI_CTX_FLAG_DEBUG)) {
+        && (flags & ~(__DRI_CTX_FLAG_DEBUG |
+	              __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS))) {
 	*error = __DRI_CTX_ERROR_BAD_FLAG;
 	return NULL;
    }
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -3060,12 +3060,18 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
   case nir_texop_txs: op = ir_txs; break;
   case nir_texop_texture_samples: {
      fs_reg dst = retype(get_nir_dest(instr->dest), BRW_REGISTER_TYPE_D);
-      fs_inst *inst = bld.emit(SHADER_OPCODE_SAMPLEINFO, dst,
+
+      fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_D, 4);
+      fs_inst *inst = bld.emit(SHADER_OPCODE_SAMPLEINFO, tmp,
                               bld.vgrf(BRW_REGISTER_TYPE_D, 1),
                               texture_reg, texture_reg);
      inst->mlen = 1;
      inst->header_size = 1;
      inst->base_mrf = -1;
+      inst->regs_written = 4 * (dispatch_width / 8);
+
+      /* Pick off the one component we care about */
+      bld.MOV(dst, tmp);
      return;
   }
   case nir_texop_samples_identical: op = ir_samples_identical; break;
--- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
@@ -651,6 +651,7 @@ brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb,
      GLubyte *color_mask = ctx->Color.ColorMask[buf];
      for (int i = 0; i < 4; i++) {
         if (_mesa_format_has_color_component(irb->mt->format, i) &&
+             !(i == 3 && irb->Base.Base._BaseFormat == GL_RGB) &&
             !color_mask[i]) {
            perf_debug("Falling back to plain clear on %dx%d buffer because of color mask\n",
                       irb->mt->logical_width0, irb->mt->logical_height0);
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -710,7 +710,8 @@ backend_instruction::is_tex() const
           opcode == SHADER_OPCODE_TXS ||
           opcode == SHADER_OPCODE_LOD ||
           opcode == SHADER_OPCODE_TG4 ||
-           opcode == SHADER_OPCODE_TG4_OFFSET);
+           opcode == SHADER_OPCODE_TG4_OFFSET ||
+           opcode == SHADER_OPCODE_SAMPLEINFO);
 }

 bool
--- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
@@ -47,7 +47,18 @@
 static void
 gen7_blorp_emit_urb_config(struct brw_context *brw)
 {
-   unsigned urb_size = (brw->is_haswell && brw->gt == 3) ? 32 : 16;
+   /* URB allocations must be done in 8k chunks. */
+   const unsigned chunk_size_bytes = 8192;
+   const unsigned urb_size =
+      (brw->gen >= 8 || (brw->is_haswell && brw->gt == 3)) ? 32 : 16;
+   const unsigned push_constant_bytes = 1024 * urb_size;
+   const unsigned push_constant_chunks =
+      push_constant_bytes / chunk_size_bytes;
+   const unsigned vs_size = 2;
+   const unsigned vs_start = push_constant_chunks;
+   const unsigned vs_chunks =
+      DIV_ROUND_UP(brw->urb.min_vs_entries * vs_size * 64, chunk_size_bytes);
+
   gen7_emit_push_constant_state(brw,
                                 urb_size / 2 /* vs_size */,
                                 0 /* hs_size */,
@@ -57,17 +68,17 @@ gen7_blorp_emit_urb_config(struct brw_context *brw)

   gen7_emit_urb_state(brw,
                       brw->urb.min_vs_entries /* num_vs_entries */,
-                       2 /* vs_size */,
-                       2 /* vs_start */,
+                       vs_size,
+                       vs_start,
                       0 /* num_hs_entries */,
                       1 /* hs_size */,
-                       2 /* hs_start */,
+                       vs_start + vs_chunks /* hs_start */,
                       0 /* num_ds_entries */,
                       1 /* ds_size */,
-                       2 /* ds_start */,
+                       vs_start + vs_chunks /* ds_start */,
                       0 /* num_gs_entries */,
                       1 /* gs_size */,
-                       2 /* gs_start */);
+                       vs_start + vs_chunks /* gs_start */);
 }


@@ -348,7 +359,7 @@ gen7_blorp_emit_gs_disable(struct brw_context *brw)
    * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS
    * Stall" bit set.
    */
-   if (!brw->is_haswell && brw->gt == 2 && brw->gs.enabled)
+   if (brw->gen < 8 && !brw->is_haswell && brw->gt == 2 && brw->gs.enabled)
      gen7_emit_cs_stall_flush(brw);

   BEGIN_BATCH(7);
--- a/src/mesa/state_tracker/st_cb_bitmap.c
+++ b/src/mesa/state_tracker/st_cb_bitmap.c
@@ -704,6 +704,10 @@ st_DrawAtlasBitmaps(struct gl_context *ctx,
   st_validate_state(st, ST_PIPELINE_RENDER);

   sv = st_create_texture_sampler_view(pipe, stObj->pt);
+   if (!sv) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCallLists(bitmap text)");
+      return;
+   }

   setup_render_state(ctx, sv, color, true);

@@ -793,6 +797,8 @@ st_DrawAtlasBitmaps(struct gl_context *ctx,

   pipe_resource_reference(&vb.buffer, NULL);

+   pipe_sampler_view_reference(&sv, NULL);
+
   /* We uploaded modified constants, need to invalidate them. */
   st->dirty.mesa |= _NEW_PROGRAM_CONSTANTS;
 }
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -2136,7 +2136,8 @@ st_GetTexSubImage(struct gl_context * ctx,
      goto fallback;
   }

-   if (!stImage->pt || !src) {
+   /* Handle non-finalized textures. */
+   if (!stImage->pt || stImage->pt != stObj->pt || !src) {
      goto fallback;
   }

--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -455,6 +455,8 @@ public:
   virtual void visit(ir_barrier *);
   /*@}*/

+   void visit_expression(ir_expression *, st_src_reg *) ATTRIBUTE_NOINLINE;
+
   void visit_atomic_counter_intrinsic(ir_call *);
   void visit_ssbo_intrinsic(ir_call *);
   void visit_membar_intrinsic(ir_call *);
@@ -1540,10 +1542,7 @@ glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
 void
 glsl_to_tgsi_visitor::visit(ir_expression *ir)
 {
-   unsigned int operand;
   st_src_reg op[ARRAY_SIZE(ir->operands)];
-   st_src_reg result_src;
-   st_dst_reg result_dst;

   /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c)
    */
@@ -1566,7 +1565,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
   if (ir->operation == ir_quadop_vector)
      assert(!"ir_quadop_vector should have been lowered");

-   for (operand = 0; operand < ir->get_num_operands(); operand++) {
+   for (unsigned int operand = 0; operand < ir->get_num_operands(); operand++) {
      this->result.file = PROGRAM_UNDEFINED;
      ir->operands[operand]->accept(this);
      if (this->result.file == PROGRAM_UNDEFINED) {
@@ -1583,6 +1582,19 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
      assert(!ir->operands[operand]->type->is_matrix());
   }

+   visit_expression(ir, op);
+}
+
+/* The non-recursive part of the expression visitor lives in a separate
+ * function and should be prevented from being inlined, to avoid a stack
+ * explosion when deeply nested expressions are visited.
+ */
+void
+glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
+{
+   st_src_reg result_src;
+   st_dst_reg result_dst;
+
   int vector_elements = ir->operands[0]->type->vector_elements;
   if (ir->operands[1]) {
      vector_elements = MAX2(vector_elements,
@@ -5268,7 +5280,7 @@ dst_register(struct st_translate *t, gl_register_file file, unsigned index,
   case PROGRAM_TEMPORARY:
      /* Allocate space for temporaries on demand. */
      if (index >= t->temps_size) {
-         const int inc = 4096;
+         const int inc = align(index - t->temps_size + 1, 4096);

         t->temps = (struct ureg_dst*)
                    realloc(t->temps,
--- a/src/util/macros.h
+++ b/src/util/macros.h
@@ -210,6 +210,12 @@ do {                       \
 #define MUST_CHECK
 #endif

+#if defined(__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))
+#define ATTRIBUTE_NOINLINE __attribute__((noinline))
+#else
+#define ATTRIBUTE_NOINLINE
+#endif
+
 /** Compute ceiling of integer quotient of A divided by B. */
 #define DIV_ROUND_UP( A, B )  ( (A) % (B) == 0 ? (A)/(B) : (A)/(B)+1 )
@@ -1 +1 @@
 .2.1
 .2.2