Add release notes for the 10.5.6 release

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Update version to 10.5.6
2015-05-23 09:02:41 +01:00 · 2015-05-23 08:58:02 +01:00 · 2015-05-20 22:16:56 +01:00 · 2015-05-20 22:16:48 +01:00 · 2015-05-20 22:16:18 +01:00 · 2015-05-20 22:14:52 +01:00
110 changed files with 1224 additions and 294 deletions
--- a/Android.common.mk
+++ b/Android.common.mk
@@ -31,6 +31,7 @@ endif
 endif

 LOCAL_C_INCLUDES += \
+	$(MESA_TOP)/src \
 	$(MESA_TOP)/include

 MESA_VERSION=$(shell cat $(MESA_TOP)/VERSION)
@@ -41,6 +42,19 @@ LOCAL_CFLAGS += \
 	-DANDROID_VERSION=0x0$(MESA_ANDROID_MAJOR_VERSION)0$(MESA_ANDROID_MINOR_VERSION)

 LOCAL_CFLAGS += \
+	-DHAVE___BUILTIN_EXPECT \
+	-DHAVE___BUILTIN_FFS \
+	-DHAVE___BUILTIN_FFSLL \
+	-DHAVE_FUNC_ATTRIBUTE_FLATTEN \
+	-DHAVE_FUNC_ATTRIBUTE_UNUSED \
+	-DHAVE_FUNC_ATTRIBUTE_FORMAT \
+	-DHAVE_FUNC_ATTRIBUTE_PACKED \
+	-DHAVE___BUILTIN_CTZ \
+	-DHAVE___BUILTIN_POPCOUNT \
+	-DHAVE___BUILTIN_POPCOUNTLL \
+	-DHAVE___BUILTIN_CLZ \
+	-DHAVE___BUILTIN_CLZLL \
+	-DHAVE___BUILTIN_UNREACHABLE \
 	-DHAVE_PTHREAD=1 \
 	-fvisibility=hidden \
 	-Wno-sign-compare
--- a/Makefile.am
+++ b/Makefile.am
@@ -49,6 +49,7 @@ noinst_HEADERS =					\
 	include/c99					\
 	include/c11					\
 	include/D3D9					\
+	include/VG					\
 	include/HaikuGL					\
 	include/pci_ids

--- a/2
+++ b/2
@@ -1 +1 @@
-10.5.2
+10.5.6
--- a/configure.ac
+++ b/configure.ac
@@ -115,8 +115,17 @@ if test "x$INDENT" != "xcat"; then
 fi

 AX_CHECK_PYTHON_MAKO_MODULE($PYTHON_MAKO_REQUIRED)
-if test -n "$PYTHON2" -a "x$acv_mako_found" != "xyes"; then
-    AC_MSG_ERROR([Python mako module v$PYTHON_MAKO_REQUIRED or higher not found])
+
+if test -z "$PYTHON2"; then
+    if test ! -f "$srcdir/src/util/format_srgb.c"; then
+        AC_MSG_ERROR([Python not found - unable to generate sources])
+    fi
+else
+    if test "x$acv_mako_found" = xno; then
+        if test ! -f "$srcdir/src/mesa/main/format_unpack.c"; then
+            AC_MSG_ERROR([Python mako module v$PYTHON_MAKO_REQUIRED or higher not found])
+        fi
+    fi
 fi

 AC_PROG_INSTALL
@@ -600,6 +609,7 @@ if test "x$enable_asm" = xyes; then
 fi

 AC_CHECK_HEADER([xlocale.h], [DEFINES="$DEFINES -DHAVE_XLOCALE_H"])
+AC_CHECK_HEADER([sys/sysctl.h], [DEFINES="$DEFINES -DHAVE_SYS_SYSCTL_H"])
 AC_CHECK_FUNC([strtof], [DEFINES="$DEFINES -DHAVE_STRTOF"])

 dnl Check to see if dlopen is in default libraries (like Solaris, which
--- a/docs/relnotes/10.5.2.html
+++ b/docs/relnotes/10.5.2.html
@@ -31,7 +31,8 @@ because compatibility contexts are not supported.

 <h2>SHA256 checksums</h2>
 <pre>
-TBD
+755220e160a9f22fda0dffd47746f997b6e196d03f8edc390df7793aecaaa541  mesa-10.5.2.tar.gz
+2f4b6fb77c3e7d6f861558d0884a3073f575e1e673dad8d1b0624e78e9c4dd44  mesa-10.5.2.tar.xz
 </pre>


--- a/docs/relnotes/10.5.3.html
+++ b/docs/relnotes/10.5.3.html
@@ -0,0 +1,125 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.5.3 Release Notes / April 12, 2015</h1>
+
+<p>
+Mesa 10.5.3 is a bug fix release which fixes bugs found since the 10.5.2 release.
+</p>
+<p>
+Mesa 10.5.3 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+2371b8e210ccd19f61dd94b6664d612e5a479ba7d431a074512d87633bd6aeb4  mesa-10.5.3.tar.gz
+8701ee1be4f5c03238f5e63c1a9bd4cc03a2f6c0155ed42a1ae7d58f18912ba2  mesa-10.5.3.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=83962">Bug 83962</a> - [HSW/BYT]Piglit spec_ARB_gpu_shader5_arb_gpu_shader5-emitstreamvertex_nodraw fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89679">Bug 89679</a> - [NV50] Portal/Half-Life 2 will not start (native Steam)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89746">Bug 89746</a> - Mesa and LLVM 3.6+ break opengl for genymotion</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89754">Bug 89754</a> - vertexAttrib fails WebGL Conformance test with mesa drivers</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89758">Bug 89758</a> - pow WebGL Conformance test with mesa drivers</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89759">Bug 89759</a> - WebGL OGL ES GLSL conformance test with mesa drivers fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89905">Bug 89905</a> - scons build broken on 10.5.2 due to activated vega st</li>
+
+</ul>
+
+<h2>Changes</h2>
+
+<p>Dave Airlie (1):</p>
+<ul>
+  <li>st_glsl_to_tgsi: only do mov copy propagation on temps (v2)</li>
+</ul>
+
+<p>Emil Velikov (5):</p>
+<ul>
+  <li>docs: Add sha256 sums for the 10.5.2 release</li>
+  <li>xmlpool: don't forget to ship the MOS</li>
+  <li>configure.ac: error out if python/mako is not found when required</li>
+  <li>dist: add the VG depedencies into the tarball</li>
+  <li>Update version to 10.5.3</li>
+</ul>
+
+<p>Iago Toral Quiroga (1):</p>
+<ul>
+  <li>i965: Do not render primitives in non-zero streams then TF is disabled</li>
+</ul>
+
+<p>Ilia Mirkin (7):</p>
+<ul>
+  <li>st/mesa: update arrays when the current attrib has been updated</li>
+  <li>nv50/ir: take postFactor into account when doing peephole optimizations</li>
+  <li>nv50/ir/gk110: fix offset flag position for TXD opcode</li>
+  <li>freedreno/a3xx: fix 3d texture layout</li>
+  <li>freedreno/a3xx: point size should not be divided by 2</li>
+  <li>nv50: allocate more offset space for occlusion queries</li>
+  <li>nv50,nvc0: limit the y-tiling of 3d textures to the first level's tiling</li>
+</ul>
+
+<p>Kenneth Graunke (2):</p>
+<ul>
+  <li>i965: Fix instanced geometry shaders on Gen8+.</li>
+  <li>i965: Add forgotten multi-stream code to Gen8 SOL state.</li>
+</ul>
+
+<p>Marcin Ślusarz (1):</p>
+<ul>
+  <li>nouveau: synchronize "scratch runout" destruction with the command stream</li>
+</ul>
+
+<p>Michel Dänzer (1):</p>
+<ul>
+  <li>radeonsi: Cache LLVMTargetMachineRef in context instead of in screen</li>
+</ul>
+
+<p>Tom Stellard (1):</p>
+<ul>
+  <li>clover: Return CL_BUILD_ERROR for CL_PROGRAM_BUILD_STATUS when compilation fails v2</li>
+</ul>
+
+<p>Ville Syrjälä (1):</p>
+<ul>
+  <li>i965: Fix URB size for CHV</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/10.5.4.html
+++ b/docs/relnotes/10.5.4.html
@@ -0,0 +1,125 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.5.4 Release Notes / April 24, 2015</h1>
+
+<p>
+Mesa 10.5.4 is a bug fix release which fixes bugs found since the 10.5.3 release.
+</p>
+<p>
+Mesa 10.5.4 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+e1089567fc7bf8d9b2d8badcc9f2fc3b758701c8c0ccfe7af1805549fea53f11  mesa-10.5.4.tar.gz
+b51e723f3a20d842c88a92d809435b229fc4744ca0dbec0317d9d4a3ac4c6803  mesa-10.5.4.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=69226">Bug 69226</a> - Cannot enable basic shaders with Second Life aborts attempt</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=71591">Bug 71591</a> - Second Life shaders fail to compile (extension declared in middle of shader)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=81025">Bug 81025</a> - [IVB/BYT Bisected]Piglit spec_ARB_draw_indirect_arb_draw_indirect-draw-elements-prim-restart-ugly fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89457">Bug 89457</a> - [BSW Bisected]ogles3conform ES3-CTS.gtf.GL3Tests.shadow.shadow_execution_vert fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89957">Bug 89957</a> - vm protection faults in piglit lest: texsubimage cube_map_array pbo</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Brian Paul (1):</p>
+<ul>
+  <li>glsl: rewrite glsl_type::record_key_hash() to avoid buffer overflow</li>
+</ul>
+
+<p>Dave Airlie (2):</p>
+<ul>
+  <li>st/mesa: convert sub image for cube map arrays to 2d arrays for upload</li>
+  <li>st/mesa: align cube map arrays layers</li>
+</ul>
+
+<p>Emil Velikov (11):</p>
+<ul>
+  <li>docs: Add 256 sums for the 10.5.3 release</li>
+  <li>radeonsi: remove unused si_dump_key()</li>
+  <li>android: use LOCAL_SHARED_LIBRARIES over TARGET_OUT_HEADERS</li>
+  <li>android: add $(mesa_top)/src include to the whole of mesa</li>
+  <li>android: egl: add libsync_cflags to the build</li>
+  <li>android: dri/common: conditionally include drm_cflags/set __NOT_HAVE_DRM_H</li>
+  <li>android: add HAVE__BUILTIN_* and HAVE_FUNC_ATTRIBUTE_* defines</li>
+  <li>android: add $(mesa_top)/src/mesa/main to the includes list</li>
+  <li>android: dri: link against libmesa_util</li>
+  <li>android: mesa: fix the path of the SSE4_1 optimisations</li>
+  <li>Update version to 10.5.4</li>
+</ul>
+
+<p>Ian Romanick (1):</p>
+<ul>
+  <li>nir: Fix typo in "ushr by 0" algebraic replacement</li>
+</ul>
+
+<p>Kenneth Graunke (2):</p>
+<ul>
+  <li>i965: Fix software primitive restart with indirect draws.</li>
+  <li>drirc: Add "Second Life" quirk (allow_glsl_extension_directive_midshader).</li>
+</ul>
+
+<p>Kristian Høgsberg (1):</p>
+<ul>
+  <li>i965: Rewrite ir_tex to ir_txl with lod 0 for vertex shaders</li>
+</ul>
+
+<p>Marek Olšák (2):</p>
+<ul>
+  <li>glsl_to_tgsi: fix out-of-bounds constant access and crash for uniforms</li>
+  <li>glsl_to_tgsi: don't use a potentially-undefined immediate for ir_query_levels</li>
+</ul>
+
+<p>Mathias Froehlich (1):</p>
+<ul>
+  <li>i965: Flush batchbuffer containing the query on glQueryCounter.</li>
+</ul>
+
+<p>Mauro Rossi (2):</p>
+<ul>
+  <li>android: mesa: generate the format_{un,}pack.[ch] sources</li>
+  <li>android: add inital NIR build</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/10.5.5.html
+++ b/docs/relnotes/10.5.5.html
@@ -0,0 +1,95 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.5.5 Release Notes / May 11, 2015</h1>
+
+<p>
+Mesa 10.5.5 is a bug fix release which fixes bugs found since the 10.5.4 release.
+</p>
+<p>
+Mesa 10.5.5 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+c10f00fd792b8290dd51ebcc48a9016c4cafab19ec205423c6fcadfd7f3a59f2  mesa-10.5.5.tar.gz
+4ac4e4ea3414f1cadb1467f2f173f9e56170d31e8674f7953a46f0549d319f28  mesa-10.5.5.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=88521">Bug 88521</a> - GLBenchmark 2.7 TRex renders with artifacts on Gen8 with !UXA</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89455">Bug 89455</a> - [NVC0/Gallium] Unigine Heaven black and white boxes</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89689">Bug 89689</a> - [Regression] Weston on DRM backend won't start with new version of mesa</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90130">Bug 90130</a> - gl_PrimitiveId seems to reset at 340</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Boyan Ding (1):</p>
+<ul>
+  <li>i965: Add XRGB8888 format to intel_screen_make_configs</li>
+</ul>
+
+<p>Emil Velikov (3):</p>
+<ul>
+  <li>docs: Add sha256 sums for the 10.5.4 release</li>
+  <li>r300: do not link against libdrm_intel</li>
+  <li>Update version to 10.5.5</li>
+</ul>
+
+<p>Ilia Mirkin (4):</p>
+<ul>
+  <li>nvc0/ir: flush denorms to zero in non-compute shaders</li>
+  <li>gk110/ir: fix set with a register dest to not auto-set the abs flag</li>
+  <li>nvc0/ir: fix predicated PFETCH emission</li>
+  <li>nv50/ir: fix asFlow() const helper for OP_JOIN</li>
+</ul>
+
+<p>Kenneth Graunke (2):</p>
+<ul>
+  <li>i965: Make intel_emit_linear_blit handle Gen8+ alignment restrictions.</li>
+  <li>i965: Disallow linear blits that are not cacheline aligned.</li>
+</ul>
+
+<p>Roland Scheidegger (1):</p>
+<ul>
+  <li>draw: fix prim ids when there's no gs</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/10.5.6.html
+++ b/docs/relnotes/10.5.6.html
@@ -0,0 +1,146 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.5.6 Release Notes / May 23, 2015</h1>
+
+<p>
+Mesa 10.5.6 is a bug fix release which fixes bugs found since the 10.5.5 release.
+</p>
+<p>
+Mesa 10.5.6 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+TBD
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=86792">Bug 86792</a> - [NVC0] Portal 2 Crashes in Wine</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90147">Bug 90147</a> - swrast: build error undeclared _SC_PHYS_PAGES on osx</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90350">Bug 90350</a> - [G96] Portal's portal are incorrectly rendered</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90363">Bug 90363</a> - [nv50] HW state is not reset correctly when using a new GL context</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Alex Deucher (1):</p>
+<ul>
+  <li>radeonsi: add new bonaire pci id</li>
+</ul>
+
+<p>Axel Davy (2):</p>
+<ul>
+  <li>egl/wayland: properly destroy wayland objects</li>
+  <li>glx/dri3: Add additional check for gpu offloading case</li>
+</ul>
+
+<p>Emil Velikov (4):</p>
+<ul>
+  <li>docs: Add sha256 sums for the 10.5.5 release</li>
+  <li>egl/main: fix EGL_KHR_get_all_proc_addresses</li>
+  <li>targets/osmesa: drop the -module tag from LDFLAGS</li>
+  <li>Update version to 10.5.6</li>
+</ul>
+
+<p>Francisco Jerez (4):</p>
+<ul>
+  <li>clover: Refactor event::trigger and ::abort to prevent deadlock and reentrancy issues.</li>
+  <li>clover: Wrap event::_status in a method to prevent unlocked access.</li>
+  <li>clover: Implement locking of the wait_count, _chain and _status members of event.</li>
+  <li>i965: Fix PBO cache coherency issue after _mesa_meta_pbo_GetTexSubImage().</li>
+</ul>
+
+<p>Fredrik Höglund (2):</p>
+<ul>
+  <li>main: Require that the texture exists in framebuffer_texture</li>
+  <li>mesa: Generate GL_INVALID_VALUE in framebuffer_texture when layer &lt; 0</li>
+</ul>
+
+<p>Ilia Mirkin (7):</p>
+<ul>
+  <li>nv50/ir: only propagate saturate up if some actual folding took place</li>
+  <li>nv50: keep track of PGRAPH state in nv50_screen</li>
+  <li>nvc0: keep track of PGRAPH state in nvc0_screen</li>
+  <li>nvc0: reset the instanced elements state when doing blit using 3d engine</li>
+  <li>nv50/ir: only enable mul saturate on G200+</li>
+  <li>st/mesa: make sure to create a "clean" bool when doing i2b</li>
+  <li>nvc0: switch mechanism for shader eviction to be a while loop</li>
+</ul>
+
+<p>Jeremy Huddleston Sequoia (2):</p>
+<ul>
+  <li>swrast: Build fix for darwin</li>
+  <li>darwin: Fix install name of libOSMesa</li>
+</ul>
+
+<p>Laura Ekstrand (2):</p>
+<ul>
+  <li>main: Fix an error generated by FramebufferTexture</li>
+  <li>main: Complete error conditions for glInvalidate*Framebuffer.</li>
+</ul>
+
+<p>Marta Lofstedt (1):</p>
+<ul>
+  <li>main: glGetIntegeri_v fails for GL_VERTEX_BINDING_STRIDE</li>
+</ul>
+
+<p>Rob Clark (2):</p>
+<ul>
+  <li>freedreno: enable a306</li>
+  <li>freedreno: fix bug in tile/slot calculation</li>
+</ul>
+
+<p>Roland Scheidegger (1):</p>
+<ul>
+  <li>draw: (trivial) fix out-of-bounds vector initialization</li>
+</ul>
+
+<p>Tim Rowley (1):</p>
+<ul>
+  <li>mesa: fix shininess check for ffvertex_prog v2</li>
+</ul>
+
+<p>Tom Stellard (2):</p>
+<ul>
+  <li>clover: Add a mutex to guard queue::queued_events</li>
+  <li>clover: Fix a bug with multi-threaded events v2</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/include/pci_ids/radeonsi_pci_ids.h
+++ b/include/pci_ids/radeonsi_pci_ids.h
@@ -85,6 +85,7 @@ CHIPSET(0x6651, BONAIRE_6651, BONAIRE)
 CHIPSET(0x6658, BONAIRE_6658, BONAIRE)
 CHIPSET(0x665C, BONAIRE_665C, BONAIRE)
 CHIPSET(0x665D, BONAIRE_665D, BONAIRE)
+CHIPSET(0x665F, BONAIRE_665F, BONAIRE)

 CHIPSET(0x9830, KABINI_9830, KABINI)
 CHIPSET(0x9831, KABINI_9831, KABINI)
--- a/src/egl/drivers/dri2/Android.mk
+++ b/src/egl/drivers/dri2/Android.mk
@@ -40,12 +40,18 @@ LOCAL_C_INCLUDES := \
 	$(MESA_TOP)/src/mapi \
 	$(MESA_TOP)/src/egl/main \
 	$(MESA_TOP)/src/loader \
-	$(TARGET_OUT_HEADERS)/libdrm \
 	$(DRM_GRALLOC_TOP)

 LOCAL_STATIC_LIBRARIES := \
 	libmesa_loader

+LOCAL_SHARED_LIBRARIES := libdrm
+
+ifeq ($(shell echo "$(MESA_ANDROID_VERSION) >= 4.2" | bc),1)
+LOCAL_SHARED_LIBRARIES += \
+	libsync
+endif
+
 LOCAL_MODULE := libmesa_egl_dri2

 include $(MESA_COMMON_MK)
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -703,6 +703,8 @@ dri2_terminate(_EGLDriver *drv, _EGLDisplay *disp)
 #ifdef HAVE_WAYLAND_PLATFORM
   case _EGL_PLATFORM_WAYLAND:
      wl_drm_destroy(dri2_dpy->wl_drm);
+      wl_registry_destroy(dri2_dpy->wl_registry);
+      wl_event_queue_destroy(dri2_dpy->wl_queue);
      if (dri2_dpy->own_device) {
         wl_display_disconnect(dri2_dpy->wl_dpy);
      }
--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -1028,7 +1028,7 @@ dri2_initialize_wayland(_EGLDriver *drv, _EGLDisplay *disp)
   wl_registry_add_listener(dri2_dpy->wl_registry,
                            &registry_listener, dri2_dpy);
   if (roundtrip(dri2_dpy) < 0 || dri2_dpy->wl_drm == NULL)
-      goto cleanup_dpy;
+      goto cleanup_registry;

   if (roundtrip(dri2_dpy) < 0 || dri2_dpy->fd == -1)
      goto cleanup_drm;
@@ -1111,6 +1111,9 @@ dri2_initialize_wayland(_EGLDriver *drv, _EGLDisplay *disp)
 cleanup_drm:
   free(dri2_dpy->device_name);
   wl_drm_destroy(dri2_dpy->wl_drm);
+ cleanup_registry:
+   wl_registry_destroy(dri2_dpy->wl_registry);
+   wl_event_queue_destroy(dri2_dpy->wl_queue);
 cleanup_dpy:
   free(dri2_dpy);
   
--- a/src/egl/main/eglapi.c
+++ b/src/egl/main/eglapi.c
@@ -1029,8 +1029,9 @@ eglGetProcAddress(const char *procname)
      const char *name;
      _EGLProc function;
   } egl_functions[] = {
-      /* core functions should not be queryable, but, well... */
-#ifdef _EGL_GET_CORE_ADDRESSES
+      /* core functions queryable in the presence of
+       * EGL_KHR_get_all_proc_addresses or EGL 1.5
+       */
      /* alphabetical order */
      { "eglBindAPI", (_EGLProc) eglBindAPI },
      { "eglBindTexImage", (_EGLProc) eglBindTexImage },
@@ -1066,7 +1067,6 @@ eglGetProcAddress(const char *procname)
      { "eglWaitClient", (_EGLProc) eglWaitClient },
      { "eglWaitGL", (_EGLProc) eglWaitGL },
      { "eglWaitNative", (_EGLProc) eglWaitNative },
-#endif /* _EGL_GET_CORE_ADDRESSES */
 #ifdef EGL_MESA_screen_surface
      { "eglChooseModeMESA", (_EGLProc) eglChooseModeMESA },
      { "eglGetModesMESA", (_EGLProc) eglGetModesMESA },
--- a/src/gallium/auxiliary/Android.mk
+++ b/src/gallium/auxiliary/Android.mk
@@ -33,8 +33,7 @@ LOCAL_SRC_FILES := \
 	$(VL_STUB_SOURCES)

 LOCAL_C_INCLUDES := \
-	$(GALLIUM_TOP)/auxiliary/util \
-	$(MESA_TOP)/src
+	$(GALLIUM_TOP)/auxiliary/util

 LOCAL_MODULE := libmesa_gallium

--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -182,6 +182,7 @@ boolean draw_init(struct draw_context *draw)
 void draw_new_instance(struct draw_context *draw)
 {
   draw_geometry_shader_new_instance(draw->gs.geometry_shader);
+   draw_prim_assembler_new_instance(draw->ia);
 }


--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -2049,7 +2049,7 @@ generate_mask_value(struct draw_gs_llvm_variant *variant,

   num_prims = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, mask_type),
                                  variant->num_prims);
-   for (i = 0; i <= gs_type.length; i++) {
+   for (i = 0; i < gs_type.length; i++) {
      LLVMValueRef idx = lp_build_const_int32(gallivm, i);
      mask_val = LLVMBuildInsertElement(builder, mask_val, idx, idx, "");
   }
--- a/src/gallium/auxiliary/draw/draw_prim_assembler.c
+++ b/src/gallium/auxiliary/draw/draw_prim_assembler.c
@@ -189,7 +189,6 @@ draw_prim_assembler_prepare_outputs(struct draw_assembler *ia)
   } else {
      ia->primid_slot = -1;
   }
-   ia->primid = 0;
 }


@@ -233,7 +232,6 @@ draw_prim_assembler_run(struct draw_context *draw,
   asmblr->input_prims = input_prims;
   asmblr->input_verts = input_verts;
   asmblr->needs_primid = needs_primid(asmblr->draw);
-   asmblr->primid = 0;
   asmblr->num_prims = 0;

   output_prims->linear = TRUE;
@@ -284,3 +282,14 @@ draw_prim_assembler_destroy(struct draw_assembler *ia)
 {
   FREE(ia);
 }
+
+
+/*
+ * Called at the very begin of the draw call with a new instance
+ * Used to reset state that should persist between primitive restart.
+ */
+void
+draw_prim_assembler_new_instance(struct draw_assembler *asmblr)
+{
+   asmblr->primid = 0;
+}
--- a/src/gallium/auxiliary/draw/draw_prim_assembler.h
+++ b/src/gallium/auxiliary/draw/draw_prim_assembler.h
@@ -70,5 +70,8 @@ draw_prim_assembler_run(struct draw_context *draw,
 void
 draw_prim_assembler_prepare_outputs(struct draw_assembler *ia);

+void
+draw_prim_assembler_new_instance(struct draw_assembler *ia);
+

 #endif
--- a/src/gallium/drivers/freedreno/Android.mk
+++ b/src/gallium/drivers/freedreno/Android.mk
@@ -34,10 +34,9 @@ LOCAL_CFLAGS := \
 	-Wno-packed-bitfield-compat

 LOCAL_C_INCLUDES := \
-	$(LOCAL_PATH)/ir3 \
-	$(TARGET_OUT_HEADERS)/libdrm \
-	$(TARGET_OUT_HEADERS)/freedreno
+	$(LOCAL_PATH)/ir3

+LOCAL_SHARED_LIBRARIES := libdrm libdrm_freedreno
 LOCAL_MODULE := libmesa_pipe_freedreno

 include $(GALLIUM_COMMON_MK)
--- a/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c
@@ -50,7 +50,7 @@ fd3_rasterizer_state_create(struct pipe_context *pctx,

 	if (cso->point_size_per_vertex) {
 		psize_min = util_get_min_point_size(cso);
-		psize_max = 8192;
+		psize_max = 4092;
 	} else {
 		/* Force the point size to be as if the vertex output was disabled. */
 		psize_min = cso->point_size;
@@ -67,9 +67,9 @@ fd3_rasterizer_state_create(struct pipe_context *pctx,
 */
 	so->gras_cl_clip_cntl = A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER; /* ??? */
 	so->gras_su_point_minmax =
-			A3XX_GRAS_SU_POINT_MINMAX_MIN(psize_min/2) |
-			A3XX_GRAS_SU_POINT_MINMAX_MAX(psize_max/2);
-	so->gras_su_point_size   = A3XX_GRAS_SU_POINT_SIZE(cso->point_size/2);
+			A3XX_GRAS_SU_POINT_MINMAX_MIN(psize_min) |
+			A3XX_GRAS_SU_POINT_MINMAX_MAX(psize_max);
+	so->gras_su_point_size   = A3XX_GRAS_SU_POINT_SIZE(cso->point_size);
 	so->gras_su_poly_offset_scale =
 			A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL(cso->offset_scale);
 	so->gras_su_poly_offset_offset =
--- a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
@@ -212,6 +212,7 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
 	struct fd_resource *rsc = fd_resource(prsc);
 	unsigned lvl = cso->u.tex.first_level;
 	unsigned miplevels = cso->u.tex.last_level - lvl;
+	uint32_t sz2 = 0;

 	if (!so)
 		return NULL;
@@ -252,8 +253,10 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
 	case PIPE_TEXTURE_3D:
 		so->texconst3 =
 				A3XX_TEX_CONST_3_DEPTH(u_minify(prsc->depth0, lvl)) |
-				A3XX_TEX_CONST_3_LAYERSZ1(rsc->slices[0].size0) |
-				A3XX_TEX_CONST_3_LAYERSZ2(rsc->slices[0].size0);
+				A3XX_TEX_CONST_3_LAYERSZ1(rsc->slices[lvl].size0);
+		while (lvl < cso->u.tex.last_level && sz2 != rsc->slices[lvl+1].size0)
+			sz2 = rsc->slices[++lvl].size0;
+		so->texconst3 |= A3XX_TEX_CONST_3_LAYERSZ2(sz2);
 		break;
 	default:
 		so->texconst3 = 0x00000000;
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -293,7 +293,7 @@ struct fd_context {
 	 */
 	struct fd_gmem_stateobj gmem;
 	struct fd_vsc_pipe      pipe[8];
-	struct fd_tile          tile[64];
+	struct fd_tile          tile[256];

 	/* which state objects need to be re-emit'd: */
 	enum {
--- a/src/gallium/drivers/freedreno/freedreno_gmem.c
+++ b/src/gallium/drivers/freedreno/freedreno_gmem.c
@@ -91,6 +91,7 @@ calculate_tiles(struct fd_context *ctx)
 	uint32_t i, j, t, xoff, yoff;
 	uint32_t tpp_x, tpp_y;
 	bool has_zs = !!(ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL));
+	int tile_n[ARRAY_SIZE(ctx->pipe)];

 	if (pfb->cbufs[0])
 		cpp = util_format_get_blocksize(pfb->cbufs[0]->format);
@@ -213,6 +214,7 @@ calculate_tiles(struct fd_context *ctx)
 	/* configure tiles: */
 	t = 0;
 	yoff = miny;
+	memset(tile_n, 0, sizeof(tile_n));
 	for (i = 0; i < nbins_y; i++) {
 		uint32_t bw, bh;

@@ -223,20 +225,17 @@ calculate_tiles(struct fd_context *ctx)

 		for (j = 0; j < nbins_x; j++) {
 			struct fd_tile *tile = &ctx->tile[t];
-			uint32_t n, p;
+			uint32_t p;

 			assert(t < ARRAY_SIZE(ctx->tile));

 			/* pipe number: */
 			p = ((i / tpp_y) * div_round_up(nbins_x, tpp_x)) + (j / tpp_x);

-			/* slot number: */
-			n = ((i % tpp_y) * tpp_x) + (j % tpp_x);
-
 			/* clip bin width: */
 			bw = MIN2(bin_w, minx + width - xoff);

-			tile->n = n;
+			tile->n = tile_n[p]++;
 			tile->p = p;
 			tile->bin_w = bw;
 			tile->bin_h = bh;
--- a/src/gallium/drivers/freedreno/freedreno_resource.c
+++ b/src/gallium/drivers/freedreno/freedreno_resource.c
@@ -215,14 +215,20 @@ setup_slices(struct fd_resource *rsc, uint32_t alignment)

 		slice->pitch = width = align(width, 32);
 		slice->offset = size;
-		/* 1d array, 2d array, 3d textures (but not cube!) must all have the
-		 * same layer size for each miplevel on a3xx. These are also the
-		 * targets that have non-1 alignment.
+		/* 1d array and 2d array textures must all have the same layer size
+		 * for each miplevel on a3xx. 3d textures can have different layer
+		 * sizes for high levels, but the hw auto-sizer is buggy (or at least
+		 * different than what this code does), so as soon as the layer size
+		 * range gets into range, we stop reducing it.
 		 */
-		if (level == 0 || layers_in_level == 1 || alignment == 1)
+		if (prsc->target == PIPE_TEXTURE_3D && (
+					level == 1 ||
+					(level > 1 && rsc->slices[level - 1].size0 > 0xf000)))
+			slice->size0 = align(slice->pitch * height * rsc->cpp, alignment);
+		else if (level == 0 || rsc->layer_first || alignment == 1)
 			slice->size0 = align(slice->pitch * height * rsc->cpp, alignment);
 		else
-			slice->size0 = rsc->slices[0].size0;
+			slice->size0 = rsc->slices[level - 1].size0;

 		size += slice->size0 * depth * layers_in_level;

--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -314,7 +314,7 @@ fd_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
 	case PIPE_CAPF_MAX_LINE_WIDTH_AA:
 	case PIPE_CAPF_MAX_POINT_WIDTH:
 	case PIPE_CAPF_MAX_POINT_WIDTH_AA:
-		return 8192.0f;
+		return 4092.0f;
 	case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
 		return 16.0f;
 	case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
@@ -531,6 +531,7 @@ fd_screen_create(struct fd_device *dev)
 	case 220:
 		fd2_screen_init(pscreen);
 		break;
+	case 307:
 	case 320:
 	case 330:
 		fd3_screen_init(pscreen);
--- a/src/gallium/drivers/nouveau/Android.mk
+++ b/src/gallium/drivers/nouveau/Android.mk
@@ -36,9 +36,7 @@ LOCAL_SRC_FILES := \
 	$(NVC0_CODEGEN_SOURCES) \
 	$(NVC0_C_SOURCES)

-LOCAL_C_INCLUDES := \
-	$(TARGET_OUT_HEADERS)/libdrm
-
+LOCAL_SHARED_LIBRARIES := libdrm libdrm_nouveau
 LOCAL_MODULE := libmesa_pipe_nouveau

 include external/stlport/libstlport.mk
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -903,7 +903,7 @@ CodeEmitterGK110::emitSET(const CmpInstruction *i)
      code[0] |= 0x1c;
   } else {
      switch (i->sType) {
-      case TYPE_F32: op2 = 0x000; op1 = 0x820; break;
+      case TYPE_F32: op2 = 0x000; op1 = 0x800; break;
      case TYPE_F64: op2 = 0x080; op1 = 0x900; break;
      default:
         op2 = 0x1a8;
@@ -1116,6 +1116,7 @@ CodeEmitterGK110::emitTEX(const TexInstruction *i)
   if (i->tex.useOffsets == 1) {
      switch (i->op) {
      case OP_TXF: code[1] |= 0x200; break;
+      case OP_TXD: code[1] |= 0x00400000; break;
      default: code[1] |= 0x800; break;
      }
   }
@@ -1264,8 +1265,10 @@ CodeEmitterGK110::emitPFETCH(const Instruction *i)

   emitPredicate(i);

+   const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
+
   defId(i->def(0), 2);
-   srcId(i->src(1), 10);
+   srcId(i, src1, 10);
 }

 void
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -1441,8 +1441,10 @@ CodeEmitterNVC0::emitPFETCH(const Instruction *i)

   emitPredicate(i);

+   const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2)
+
   defId(i->def(0), 14);
-   srcId(i->src(1), 20);
+   srcId(i, src1, 20);
 }

 void
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h
@@ -302,7 +302,7 @@ FlowInstruction *Instruction::asFlow()

 const FlowInstruction *Instruction::asFlow() const
 {
-   if (op >= OP_BRA && op <= OP_JOINAT)
+   if (op >= OP_BRA && op <= OP_JOIN)
      return static_cast<const FlowInstruction *>(this);
   return NULL;
 }
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -73,6 +73,26 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i)
   // TODO
 }

+void
+NVC0LegalizeSSA::handleFTZ(Instruction *i)
+{
+   // Only want to flush float inputs
+   if (i->sType != TYPE_F32)
+      return;
+
+   // If we're already flushing denorms (and NaN's) to zero, no need for this.
+   if (i->dnz)
+      return;
+
+   // Only certain classes of operations can flush
+   OpClass cls = prog->getTarget()->getOpClass(i->op);
+   if (cls != OPCLASS_ARITH && cls != OPCLASS_COMPARE &&
+       cls != OPCLASS_CONVERT)
+      return;
+
+   i->ftz = true;
+}
+
 bool
 NVC0LegalizeSSA::visit(Function *fn)
 {
@@ -86,8 +106,11 @@ NVC0LegalizeSSA::visit(BasicBlock *bb)
   Instruction *next;
   for (Instruction *i = bb->getEntry(); i; i = next) {
      next = i->next;
-      if (i->dType == TYPE_F32)
+      if (i->dType == TYPE_F32) {
+         if (prog->getType() != Program::TYPE_COMPUTE)
+            handleFTZ(i);
         continue;
+      }
      switch (i->op) {
      case OP_DIV:
      case OP_MOD:
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -36,6 +36,7 @@ private:
   // we want to insert calls to the builtin library only after optimization
   void handleDIV(Instruction *); // integer division, modulus
   void handleRCPRSQ(Instruction *); // double precision float recip/rsqrt
+   void handleFTZ(Instruction *);

 private:
   BuildUtil bld;
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -422,7 +422,9 @@ ConstantFolding::expr(Instruction *i,
            b->data.f32 = 0.0f;
      }
      switch (i->dType) {
-      case TYPE_F32: res.data.f32 = a->data.f32 * b->data.f32; break;
+      case TYPE_F32:
+         res.data.f32 = a->data.f32 * b->data.f32 * exp2f(i->postFactor);
+         break;
      case TYPE_F64: res.data.f64 = a->data.f64 * b->data.f64; break;
      case TYPE_S32:
         if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) {
@@ -550,6 +552,7 @@ ConstantFolding::expr(Instruction *i,

   i->src(0).mod = Modifier(0);
   i->src(1).mod = Modifier(0);
+   i->postFactor = 0;

   i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res.data.u32));
   i->setSrc(1, NULL);
@@ -653,7 +656,7 @@ ConstantFolding::tryCollapseChainedMULs(Instruction *mul2,
   Instruction *insn;
   Instruction *mul1 = NULL; // mul1 before mul2
   int e = 0;
-   float f = imm2.reg.data.f32;
+   float f = imm2.reg.data.f32 * exp2f(mul2->postFactor);
   ImmediateValue imm1;

   assert(mul2->op == OP_MUL && mul2->dType == TYPE_F32);
@@ -673,6 +676,7 @@ ConstantFolding::tryCollapseChainedMULs(Instruction *mul2,
            mul1->setSrc(s1, bld.loadImm(NULL, f * imm1.reg.data.f32));
            mul1->src(s1).mod = Modifier(0);
            mul2->def(0).replace(mul1->getDef(0), false);
+            mul1->saturate = mul2->saturate;
         } else
         if (prog->getTarget()->isPostMultiplySupported(OP_MUL, f, e)) {
            // c = mul a, b
@@ -681,8 +685,8 @@ ConstantFolding::tryCollapseChainedMULs(Instruction *mul2,
            mul2->def(0).replace(mul1->getDef(0), false);
            if (f < 0)
               mul1->src(0).mod *= Modifier(NV50_IR_MOD_NEG);
+            mul1->saturate = mul2->saturate;
         }
-         mul1->saturate = mul2->saturate;
         return;
      }
   }
@@ -753,9 +757,10 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
         i->op = OP_MOV;
         i->setSrc(0, new_ImmediateValue(prog, 0u));
         i->src(0).mod = Modifier(0);
+         i->postFactor = 0;
         i->setSrc(1, NULL);
      } else
-      if (imm0.isInteger(1) || imm0.isInteger(-1)) {
+      if (!i->postFactor && (imm0.isInteger(1) || imm0.isInteger(-1))) {
         if (imm0.isNegative())
            i->src(t).mod = i->src(t).mod ^ Modifier(NV50_IR_MOD_NEG);
         i->op = i->src(t).mod.getOp();
@@ -768,7 +773,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
            i->src(0).mod = 0;
         i->setSrc(1, NULL);
      } else
-      if (imm0.isInteger(2) || imm0.isInteger(-2)) {
+      if (!i->postFactor && (imm0.isInteger(2) || imm0.isInteger(-2))) {
         if (imm0.isNegative())
            i->src(t).mod = i->src(t).mod ^ Modifier(NV50_IR_MOD_NEG);
         i->op = OP_ADD;
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
@@ -84,7 +84,7 @@ static const struct opProperties _initProps[] =
   //           neg  abs  not  sat  c[]  s[], a[], imm
   { OP_ADD,    0x3, 0x0, 0x0, 0x8, 0x2, 0x1, 0x1, 0x2 },
   { OP_SUB,    0x3, 0x0, 0x0, 0x8, 0x2, 0x1, 0x1, 0x2 },
-   { OP_MUL,    0x3, 0x0, 0x0, 0x8, 0x2, 0x1, 0x1, 0x2 },
+   { OP_MUL,    0x3, 0x0, 0x0, 0x0, 0x2, 0x1, 0x1, 0x2 },
   { OP_MAX,    0x3, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 },
   { OP_MIN,    0x3, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 },
   { OP_MAD,    0x7, 0x0, 0x0, 0x8, 0x6, 0x1, 0x1, 0x0 }, // special constraint
@@ -188,6 +188,9 @@ void TargetNV50::initOpInfo()
      if (prop->mSat & 8)
         opInfo[prop->op].dstMods = NV50_IR_MOD_SAT;
   }
+
+   if (chipset >= 0xa0)
+      opInfo[OP_MUL].dstMods = NV50_IR_MOD_SAT;
 }

 unsigned int
--- a/src/gallium/drivers/nouveau/nouveau_buffer.c
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
@@ -846,17 +846,28 @@ nouveau_scratch_bo_alloc(struct nouveau_context *nv, struct nouveau_bo **pbo,
                         4096, size, NULL, pbo);
 }

+static void
+nouveau_scratch_unref_bos(void *d)
+{
+   struct runout *b = d;
+   int i;
+
+   for (i = 0; i < b->nr; ++i)
+      nouveau_bo_ref(NULL, &b->bo[i]);
+
+   FREE(b);
+}
+
 void
 nouveau_scratch_runout_release(struct nouveau_context *nv)
 {
-   if (!nv->scratch.nr_runout)
+   if (!nv->scratch.runout)
+      return;
+
+   if (!nouveau_fence_work(nv->screen->fence.current, nouveau_scratch_unref_bos,
+         nv->scratch.runout))
      return;
-   do {
-      --nv->scratch.nr_runout;
-      nouveau_bo_ref(NULL, &nv->scratch.runout[nv->scratch.nr_runout]);
-   } while (nv->scratch.nr_runout);

-   FREE(nv->scratch.runout);
   nv->scratch.end = 0;
   nv->scratch.runout = NULL;
 }
@@ -868,21 +879,26 @@ static INLINE boolean
 nouveau_scratch_runout(struct nouveau_context *nv, unsigned size)
 {
   int ret;
-   const unsigned n = nv->scratch.nr_runout++;
+   unsigned n;

-   nv->scratch.runout = REALLOC(nv->scratch.runout,
-                                (n + 0) * sizeof(*nv->scratch.runout),
-                                (n + 1) * sizeof(*nv->scratch.runout));
-   nv->scratch.runout[n] = NULL;
+   if (nv->scratch.runout)
+      n = nv->scratch.runout->nr;
+   else
+      n = 0;
+   nv->scratch.runout = REALLOC(nv->scratch.runout, n == 0 ? 0 :
+                                (sizeof(*nv->scratch.runout) + (n + 0) * sizeof(void *)),
+                                 sizeof(*nv->scratch.runout) + (n + 1) * sizeof(void *));
+   nv->scratch.runout->nr = n + 1;
+   nv->scratch.runout->bo[n] = NULL;

-   ret = nouveau_scratch_bo_alloc(nv, &nv->scratch.runout[n], size);
+   ret = nouveau_scratch_bo_alloc(nv, &nv->scratch.runout->bo[n], size);
   if (!ret) {
-      ret = nouveau_bo_map(nv->scratch.runout[n], 0, NULL);
+      ret = nouveau_bo_map(nv->scratch.runout->bo[n], 0, NULL);
      if (ret)
-         nouveau_bo_ref(NULL, &nv->scratch.runout[--nv->scratch.nr_runout]);
+         nouveau_bo_ref(NULL, &nv->scratch.runout->bo[--nv->scratch.runout->nr]);
   }
   if (!ret) {
-      nv->scratch.current = nv->scratch.runout[n];
+      nv->scratch.current = nv->scratch.runout->bo[n];
      nv->scratch.offset = 0;
      nv->scratch.end = size;
      nv->scratch.map = nv->scratch.current->map;
--- a/src/gallium/drivers/nouveau/nouveau_context.h
+++ b/src/gallium/drivers/nouveau/nouveau_context.h
@@ -40,8 +40,10 @@ struct nouveau_context {
      unsigned end;
      struct nouveau_bo *bo[NOUVEAU_MAX_SCRATCH_BUFS];
      struct nouveau_bo *current;
-      struct nouveau_bo **runout;
-      unsigned nr_runout;
+      struct runout {
+         unsigned nr;
+         struct nouveau_bo *bo[0];
+      } *runout;
      unsigned bo_size;
   } scratch;

@@ -71,7 +73,7 @@ static INLINE void
 nouveau_scratch_done(struct nouveau_context *nv)
 {
   nv->scratch.wrap = nv->scratch.id;
-   if (unlikely(nv->scratch.nr_runout))
+   if (unlikely(nv->scratch.runout))
      nouveau_scratch_runout_release(nv);
 }

--- a/src/gallium/drivers/nouveau/nv50/nv50_context.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c
@@ -138,8 +138,11 @@ nv50_destroy(struct pipe_context *pipe)
 {
   struct nv50_context *nv50 = nv50_context(pipe);

-   if (nv50_context_screen(nv50)->cur_ctx == nv50)
-      nv50_context_screen(nv50)->cur_ctx = NULL;
+   if (nv50->screen->cur_ctx == nv50) {
+      nv50->screen->cur_ctx = NULL;
+      /* Save off the state in case another context gets created */
+      nv50->screen->save_state = nv50->state;
+   }
   nouveau_pushbuf_bufctx(nv50->base.pushbuf, NULL);
   nouveau_pushbuf_kick(nv50->base.pushbuf, nv50->base.pushbuf->channel);

@@ -290,6 +293,10 @@ nv50_create(struct pipe_screen *pscreen, void *priv)
   pipe->get_sample_position = nv50_context_get_sample_position;

   if (!screen->cur_ctx) {
+      /* Restore the last context's state here, normally handled during
+       * context switch
+       */
+      nv50->state = screen->save_state;
      screen->cur_ctx = nv50;
      nouveau_pushbuf_bufctx(screen->base.pushbuf, nv50->bufctx);
   }
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h
@@ -104,28 +104,7 @@ struct nv50_context {
   uint32_t dirty;
   boolean cb_dirty;

-   struct {
-      uint32_t instance_elts; /* bitmask of per-instance elements */
-      uint32_t instance_base;
-      uint32_t interpolant_ctrl;
-      uint32_t semantic_color;
-      uint32_t semantic_psize;
-      int32_t index_bias;
-      boolean uniform_buffer_bound[3];
-      boolean prim_restart;
-      boolean point_sprite;
-      boolean rt_serialize;
-      boolean flushed;
-      boolean rasterizer_discard;
-      uint8_t tls_required;
-      boolean new_tls_space;
-      uint8_t num_vtxbufs;
-      uint8_t num_vtxelts;
-      uint8_t num_textures[3];
-      uint8_t num_samplers[3];
-      uint8_t prim_size;
-      uint16_t scissor;
-   } state;
+   struct nv50_graph_state state;

   struct nv50_blend_stateobj *blend;
   struct nv50_rasterizer_stateobj *rast;
@@ -191,12 +170,6 @@ nv50_context(struct pipe_context *pipe)
   return (struct nv50_context *)pipe;
 }

-static INLINE struct nv50_screen *
-nv50_context_screen(struct nv50_context *nv50)
-{
-   return nv50_screen(&nv50->base.screen->base);
-}
-
 /* return index used in nv50_context arrays for a specific shader type */
 static INLINE unsigned
 nv50_context_shader_stage(unsigned pipe)
--- a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
@@ -29,7 +29,8 @@
 #include "nv50/nv50_resource.h"

 uint32_t
-nv50_tex_choose_tile_dims_helper(unsigned nx, unsigned ny, unsigned nz)
+nv50_tex_choose_tile_dims_helper(unsigned nx, unsigned ny, unsigned nz,
+                                 boolean is_3d)
 {
   uint32_t tile_mode = 0x000;

@@ -41,7 +42,7 @@ nv50_tex_choose_tile_dims_helper(unsigned nx, unsigned ny, unsigned nz)
   else
   if (ny >  8) tile_mode = 0x010; /* height 16 tiles */

-   if (nz == 1)
+   if (!is_3d)
      return tile_mode;
   else
      if (tile_mode > 0x020)
@@ -52,14 +53,15 @@ nv50_tex_choose_tile_dims_helper(unsigned nx, unsigned ny, unsigned nz)
   if (nz > 8) return tile_mode | 0x400; /* depth 16 tiles */
   if (nz > 4) return tile_mode | 0x300; /* depth 8 tiles */
   if (nz > 2) return tile_mode | 0x200; /* depth 4 tiles */
+   if (nz > 1) return tile_mode | 0x100; /* depth 2 tiles */

-   return tile_mode | 0x100;
+   return tile_mode;
 }

 static uint32_t
-nv50_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz)
+nv50_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz, boolean is_3d)
 {
-   return nv50_tex_choose_tile_dims_helper(nx, ny * 2, nz);
+   return nv50_tex_choose_tile_dims_helper(nx, ny * 2, nz, is_3d);
 }

 static uint32_t
@@ -304,7 +306,7 @@ nv50_miptree_init_layout_tiled(struct nv50_miptree *mt)

      lvl->offset = mt->total_size;

-      lvl->tile_mode = nv50_tex_choose_tile_dims(nbx, nby, d);
+      lvl->tile_mode = nv50_tex_choose_tile_dims(nbx, nby, d, mt->layout_3d);

      tsx = NV50_TILE_SIZE_X(lvl->tile_mode); /* x is tile row pitch in bytes */
      tsy = NV50_TILE_SIZE_Y(lvl->tile_mode);
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -41,7 +41,7 @@ struct nv50_query {
   uint32_t sequence;
   struct nouveau_bo *bo;
   uint32_t base;
-   uint32_t offset; /* base + i * 16 */
+   uint32_t offset; /* base + i * 32 */
   boolean ready;
   boolean flushed;
   boolean is64bit;
@@ -116,8 +116,8 @@ nv50_query_create(struct pipe_context *pipe, unsigned type, unsigned index)
   q->type = type;

   if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
-      q->offset -= 16;
-      q->data -= 16 / sizeof(*q->data); /* we advance before query_begin ! */
+      q->offset -= 32;
+      q->data -= 32 / sizeof(*q->data); /* we advance before query_begin ! */
   }

   return (struct pipe_query *)q;
@@ -150,8 +150,8 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
    * initialized it to TRUE.
    */
   if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) {
-      q->offset += 16;
-      q->data += 16 / sizeof(*q->data);
+      q->offset += 32;
+      q->data += 32 / sizeof(*q->data);
      if (q->offset - q->base == NV50_QUERY_ALLOC_SPACE)
         nv50_query_allocate(nv50, q, NV50_QUERY_ALLOC_SPACE);

--- a/src/gallium/drivers/nouveau/nv50/nv50_resource.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_resource.h
@@ -34,7 +34,8 @@ nv50_screen_init_resource_functions(struct pipe_screen *pscreen);
 #endif /* __NVC0_RESOURCE_H__ */

 uint32_t
-nv50_tex_choose_tile_dims_helper(unsigned nx, unsigned ny, unsigned nz);
+nv50_tex_choose_tile_dims_helper(unsigned nx, unsigned ny, unsigned nz,
+                                 boolean is_3d);

 struct nv50_miptree_level {
   uint32_t offset;
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.h
@@ -25,10 +25,34 @@ struct nv50_context;

 struct nv50_blitter;

+struct nv50_graph_state {
+   uint32_t instance_elts; /* bitmask of per-instance elements */
+   uint32_t instance_base;
+   uint32_t interpolant_ctrl;
+   uint32_t semantic_color;
+   uint32_t semantic_psize;
+   int32_t index_bias;
+   boolean uniform_buffer_bound[3];
+   boolean prim_restart;
+   boolean point_sprite;
+   boolean rt_serialize;
+   boolean flushed;
+   boolean rasterizer_discard;
+   uint8_t tls_required;
+   boolean new_tls_space;
+   uint8_t num_vtxbufs;
+   uint8_t num_vtxelts;
+   uint8_t num_textures[3];
+   uint8_t num_samplers[3];
+   uint8_t prim_size;
+   uint16_t scissor;
+};
+
 struct nv50_screen {
   struct nouveau_screen base;

   struct nv50_context *cur_ctx;
+   struct nv50_graph_state save_state;

   struct nouveau_bo *code;
   struct nouveau_bo *uniforms;
--- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
@@ -394,6 +394,8 @@ nv50_switch_pipe_context(struct nv50_context *ctx_to)

   if (ctx_from)
      ctx_to->state = ctx_from->state;
+   else
+      ctx_to->state = ctx_to->screen->save_state;

   ctx_to->dirty = ~0;
   ctx_to->viewports_dirty = ~0;
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
@@ -139,8 +139,12 @@ nvc0_destroy(struct pipe_context *pipe)
 {
   struct nvc0_context *nvc0 = nvc0_context(pipe);

-   if (nvc0->screen->cur_ctx == nvc0)
+   if (nvc0->screen->cur_ctx == nvc0) {
      nvc0->screen->cur_ctx = NULL;
+      nvc0->screen->save_state = nvc0->state;
+      nvc0->screen->save_state.tfb = NULL;
+   }
+
   /* Unset bufctx, we don't want to revalidate any resources after the flush.
    * Other contexts will always set their bufctx again on action calls.
    */
@@ -303,6 +307,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv)
   pipe->get_sample_position = nvc0_context_get_sample_position;

   if (!screen->cur_ctx) {
+      nvc0->state = screen->save_state;
      screen->cur_ctx = nvc0;
      nouveau_pushbuf_bufctx(screen->base.pushbuf, nvc0->bufctx);
   }
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -113,29 +113,7 @@ struct nvc0_context {
   uint32_t dirty;
   uint32_t dirty_cp; /* dirty flags for compute state */

-   struct {
-      boolean flushed;
-      boolean rasterizer_discard;
-      boolean early_z_forced;
-      boolean prim_restart;
-      uint32_t instance_elts; /* bitmask of per-instance elements */
-      uint32_t instance_base;
-      uint32_t constant_vbos;
-      uint32_t constant_elts;
-      int32_t index_bias;
-      uint16_t scissor;
-      uint8_t vbo_mode; /* 0 = normal, 1 = translate, 3 = translate, forced */
-      uint8_t num_vtxbufs;
-      uint8_t num_vtxelts;
-      uint8_t num_textures[6];
-      uint8_t num_samplers[6];
-      uint8_t tls_required; /* bitmask of shader types using l[] */
-      uint8_t c14_bound; /* whether immediate array constbuf is bound */
-      uint8_t clip_enable;
-      uint32_t clip_mode;
-      uint32_t uniform_buffer_bound[5];
-      struct nvc0_transform_feedback_state *tfb;
-   } state;
+   struct nvc0_graph_state state;

   struct nvc0_blend_stateobj *blend;
   struct nvc0_rasterizer_stateobj *rast;
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
@@ -29,9 +29,9 @@
 #include "nvc0/nvc0_resource.h"

 static uint32_t
-nvc0_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz)
+nvc0_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz, boolean is_3d)
 {
-   return nv50_tex_choose_tile_dims_helper(nx, ny, nz);
+   return nv50_tex_choose_tile_dims_helper(nx, ny, nz, is_3d);
 }

 static uint32_t
@@ -211,7 +211,7 @@ nvc0_miptree_init_layout_tiled(struct nv50_miptree *mt)

      lvl->offset = mt->total_size;

-      lvl->tile_mode = nvc0_tex_choose_tile_dims(nbx, nby, d);
+      lvl->tile_mode = nvc0_tex_choose_tile_dims(nbx, nby, d, mt->layout_3d);

      tsx = NVC0_TILE_SIZE_X(lvl->tile_mode); /* x is tile row pitch in bytes */
      tsy = NVC0_TILE_SIZE_Y(lvl->tile_mode);
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -683,11 +683,12 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
   ret = nouveau_heap_alloc(screen->text_heap, size, prog, &prog->mem);
   if (ret) {
      struct nouveau_heap *heap = screen->text_heap;
-      struct nouveau_heap *iter;
-      for (iter = heap; iter && iter->next != heap; iter = iter->next) {
-         struct nvc0_program *evict = iter->priv;
-         if (evict)
-            nouveau_heap_free(&evict->mem);
+      /* Note that the code library, which is allocated before anything else,
+       * does not have a priv pointer. We can stop once we hit it.
+       */
+      while (heap->next && heap->next->priv) {
+         struct nvc0_program *evict = heap->next->priv;
+         nouveau_heap_free(&evict->mem);
      }
      debug_printf("WARNING: out of code space, evicting all shaders.\n");
      ret = nouveau_heap_alloc(heap, size, prog, &prog->mem);
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
@@ -27,10 +27,35 @@ struct nvc0_context;

 struct nvc0_blitter;

+struct nvc0_graph_state {
+   boolean flushed;
+   boolean rasterizer_discard;
+   boolean early_z_forced;
+   boolean prim_restart;
+   uint32_t instance_elts; /* bitmask of per-instance elements */
+   uint32_t instance_base;
+   uint32_t constant_vbos;
+   uint32_t constant_elts;
+   int32_t index_bias;
+   uint16_t scissor;
+   uint8_t vbo_mode; /* 0 = normal, 1 = translate, 3 = translate, forced */
+   uint8_t num_vtxbufs;
+   uint8_t num_vtxelts;
+   uint8_t num_textures[6];
+   uint8_t num_samplers[6];
+   uint8_t tls_required; /* bitmask of shader types using l[] */
+   uint8_t c14_bound; /* whether immediate array constbuf is bound */
+   uint8_t clip_enable;
+   uint32_t clip_mode;
+   uint32_t uniform_buffer_bound[5];
+   struct nvc0_transform_feedback_state *tfb;
+};
+
 struct nvc0_screen {
   struct nouveau_screen base;

   struct nvc0_context *cur_ctx;
+   struct nvc0_graph_state save_state;

   int num_occlusion_queries_active;

--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -543,6 +543,8 @@ nvc0_switch_pipe_context(struct nvc0_context *ctx_to)

   if (ctx_from)
      ctx_to->state = ctx_from->state;
+   else
+      ctx_to->state = ctx_to->screen->save_state;

   ctx_to->dirty = ~0;
   ctx_to->viewports_dirty = ~0;
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -1152,6 +1152,12 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
                      NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32 |
                      NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST);
   }
+   if (nvc0->state.instance_elts) {
+      nvc0->state.instance_elts = 0;
+      BEGIN_NVC0(push, NVC0_3D(MACRO_VERTEX_ARRAY_PER_INSTANCE), 2);
+      PUSH_DATA (push, n);
+      PUSH_DATA (push, 0);
+   }
   nvc0->state.num_vtxelts = 2;

   for (i = 0; i < info->dst.box.depth; ++i, z += dz) {
--- a/src/gallium/drivers/r300/Android.mk
+++ b/src/gallium/drivers/r300/Android.mk
@@ -33,9 +33,10 @@ LOCAL_SRC_FILES := $(C_SOURCES)
 LOCAL_C_INCLUDES := \
 	$(MESA_TOP)/src/mapi \
 	$(MESA_TOP)/src/glsl \
-	$(MESA_TOP)/src/mesa \
-	$(TARGET_OUT_HEADERS)/libdrm
+	$(MESA_TOP)/src/mesa

+
+LOCAL_SHARED_LIBRARIES := libdrm libdrm_radeon
 LOCAL_MODULE := libmesa_pipe_r300

 include $(GALLIUM_COMMON_MK)
--- a/src/gallium/drivers/r300/Automake.inc
+++ b/src/gallium/drivers/r300/Automake.inc
@@ -5,7 +5,7 @@ TARGET_CPPFLAGS += -DGALLIUM_R300
 TARGET_LIB_DEPS += \
 	$(top_builddir)/src/gallium/drivers/r300/libr300.la \
 	$(RADEON_LIBS) \
-	$(INTEL_LIBS)
+	$(LIBDRM_LIBS)

 TARGET_RADEON_WINSYS = \
 	$(top_builddir)/src/gallium/winsys/radeon/drm/libradeonwinsys.la
--- a/src/gallium/drivers/r600/Android.mk
+++ b/src/gallium/drivers/r600/Android.mk
@@ -30,8 +30,7 @@ include $(CLEAR_VARS)

 LOCAL_SRC_FILES := $(C_SOURCES) $(CXX_SOURCES)

-LOCAL_C_INCLUDES := $(TARGET_OUT_HEADERS)/libdrm
-
+LOCAL_SHARED_LIBRARIES := libdrm libdrm_radeon
 LOCAL_MODULE := libmesa_pipe_r600

 include external/stlport/libstlport.mk
--- a/src/gallium/drivers/radeon/Android.mk
+++ b/src/gallium/drivers/radeon/Android.mk
@@ -30,8 +30,7 @@ include $(CLEAR_VARS)

 LOCAL_SRC_FILES := $(C_SOURCES)

-LOCAL_C_INCLUDES := $(TARGET_OUT_HEADERS)/libdrm
-
+LOCAL_SHARED_LIBRARIES := libdrm libdrm_radeon
 LOCAL_MODULE := libmesa_pipe_radeon

 include $(GALLIUM_COMMON_MK)
--- a/src/gallium/drivers/radeonsi/Android.mk
+++ b/src/gallium/drivers/radeonsi/Android.mk
@@ -30,8 +30,7 @@ include $(CLEAR_VARS)

 LOCAL_SRC_FILES := $(C_SOURCES)

-LOCAL_C_INCLUDES := $(TARGET_OUT_HEADERS)/libdrm
-
+LOCAL_SHARED_LIBRARIES := libdrm libdrm_radeon
 LOCAL_MODULE := libmesa_pipe_radeonsi

 include $(GALLIUM_COMMON_MK)
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -130,7 +130,8 @@ static void *si_create_compute_state(
 	        for (i = 0; i < program->num_kernels; i++) {
 		        LLVMModuleRef mod = radeon_llvm_get_kernel_module(program->llvm_ctx, i,
                                                        code, header->num_bytes);
-			si_compile_llvm(sctx->screen, &program->kernels[i], mod);
+			si_compile_llvm(sctx->screen, &program->kernels[i], sctx->tm,
+					mod);
 			LLVMDisposeModule(mod);
 		}
 	}
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -69,6 +69,11 @@ static void si_destroy_context(struct pipe_context *context)
 	si_pm4_cleanup(sctx);

 	r600_common_context_cleanup(&sctx->b);
+
+#if HAVE_LLVM >= 0x0306
+	LLVMDisposeTargetMachine(sctx->tm);
+#endif
+
 	FREE(sctx);
 }

@@ -77,6 +82,12 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void *
 	struct si_context *sctx = CALLOC_STRUCT(si_context);
 	struct si_screen* sscreen = (struct si_screen *)screen;
 	struct radeon_winsys *ws = sscreen->b.ws;
+	LLVMTargetRef r600_target;
+#if HAVE_LLVM >= 0x0306
+	const char *triple = "amdgcn--";
+#else
+	const char *triple = "r600--";
+#endif
 	int shader, i;

 	if (sctx == NULL)
@@ -167,6 +178,17 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void *
 	 */
 	sctx->scratch_waves = 32 * sscreen->b.info.max_compute_units;

+#if HAVE_LLVM >= 0x0306
+	/* Initialize LLVM TargetMachine */
+	r600_target = radeon_llvm_get_r600_target(triple);
+	sctx->tm = LLVMCreateTargetMachine(r600_target, triple,
+					   r600_get_llvm_processor_name(sscreen->b.family),
+					   "+DumpCode,+vgpr-spilling",
+					   LLVMCodeGenLevelDefault,
+					   LLVMRelocDefault,
+					   LLVMCodeModelDefault);
+#endif
+
 	return &sctx->b.b;
 fail:
 	si_destroy_context(&sctx->b.b);
@@ -435,12 +457,6 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
 	if (!sscreen->b.ws->unref(sscreen->b.ws))
 		return;

-#if HAVE_LLVM >= 0x0306
-	// r600_destroy_common_screen() frees sscreen, so we need to make
-	// sure to dispose the TargetMachine before we call it.
-	LLVMDisposeTargetMachine(sscreen->tm);
-#endif
-
 	r600_destroy_common_screen(&sscreen->b);
 }

@@ -498,12 +514,7 @@ static bool si_initialize_pipe_config(struct si_screen *sscreen)
 struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
 {
 	struct si_screen *sscreen = CALLOC_STRUCT(si_screen);
-	LLVMTargetRef r600_target;
-#if HAVE_LLVM >= 0x0306
-	const char *triple = "amdgcn--";
-#else
-	const char *triple = "r600--";
-#endif
+
 	if (sscreen == NULL) {
 		return NULL;
 	}
@@ -531,13 +542,5 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
 	/* Create the auxiliary context. This must be done last. */
 	sscreen->b.aux_context = sscreen->b.b.context_create(&sscreen->b.b, NULL);

-#if HAVE_LLVM >= 0x0306
-	/* Initialize LLVM TargetMachine */
-	r600_target = radeon_llvm_get_r600_target(triple);
-	sscreen->tm = LLVMCreateTargetMachine(r600_target, triple,
-				r600_get_llvm_processor_name(sscreen->b.family),
-				"+DumpCode,+vgpr-spilling", LLVMCodeGenLevelDefault, LLVMRelocDefault,
-				LLVMCodeModelDefault);
-#endif
 	return &sscreen->b.b;
 }
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -83,7 +83,6 @@ struct si_compute;

 struct si_screen {
 	struct r600_common_screen	b;
-	LLVMTargetMachineRef		tm;
 };

 struct si_sampler_view {
@@ -200,6 +199,8 @@ struct si_context {
 	struct pipe_resource	*esgs_ring;
 	struct pipe_resource	*gsvs_ring;

+	LLVMTargetMachineRef		tm;
+
 	/* SI state handling */
 	union si_state	queued;
 	union si_state	emitted;
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -72,6 +72,7 @@ struct si_shader_context
 	int param_streamout_offset[4];
 	int param_vertex_id;
 	int param_instance_id;
+	LLVMTargetMachineRef tm;
 	LLVMValueRef const_md;
 	LLVMValueRef const_resource[SI_NUM_CONST_BUFFERS];
 	LLVMValueRef ddxy_lds;
@@ -2638,13 +2639,13 @@ int si_shader_binary_read(struct si_screen *sscreen,
 }

 int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
-							LLVMModuleRef mod)
+		    LLVMTargetMachineRef tm, LLVMModuleRef mod)
 {
 	int r = 0;
 	bool dump = r600_can_dump_shader(&sscreen->b,
 			shader->selector ? shader->selector->tokens : NULL);
 	r = radeon_llvm_compile(mod, &shader->binary,
-		r600_get_llvm_processor_name(sscreen->b.family), dump, sscreen->tm);
+		r600_get_llvm_processor_name(sscreen->b.family), dump, tm);

 	if (r) {
 		return r;
@@ -2732,7 +2733,7 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen,
 		fprintf(stderr, "Copy Vertex Shader for Geometry Shader:\n\n");

 	r = si_compile_llvm(sscreen, si_shader_ctx->shader,
-			    bld_base->base.gallivm->module);
+			    si_shader_ctx->tm, bld_base->base.gallivm->module);

 	radeon_llvm_dispose(&si_shader_ctx->radeon_bld);

@@ -2740,7 +2741,8 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen,
 	return r;
 }

-int si_shader_create(struct si_screen *sscreen, struct si_shader *shader)
+int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
+		     struct si_shader *shader)
 {
 	struct si_shader_selector *sel = shader->selector;
 	struct tgsi_token *tokens = sel->tokens;
@@ -2812,6 +2814,7 @@ int si_shader_create(struct si_screen *sscreen, struct si_shader *shader)
 	si_shader_ctx.shader = shader;
 	si_shader_ctx.type = tgsi_get_processor_type(tokens);
 	si_shader_ctx.screen = sscreen;
+	si_shader_ctx.tm = tm;

 	switch (si_shader_ctx.type) {
 	case TGSI_PROCESSOR_VERTEX:
@@ -2867,7 +2870,7 @@ int si_shader_create(struct si_screen *sscreen, struct si_shader *shader)
 	radeon_llvm_finalize_module(&si_shader_ctx.radeon_bld);

 	mod = bld_base->base.gallivm->module;
-	r = si_compile_llvm(sscreen, shader, mod);
+	r = si_compile_llvm(sscreen, shader, tm, mod);
 	if (r) {
 		fprintf(stderr, "LLVM failed to compile shader\n");
 		goto out;
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -181,9 +181,10 @@ static inline struct si_shader* si_get_vs_state(struct si_context *sctx)
 }

 /* radeonsi_shader.c */
-int si_shader_create(struct si_screen *sscreen, struct si_shader *shader);
+int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
+		     struct si_shader *shader);
 int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
-		    LLVMModuleRef mod);
+		    LLVMTargetMachineRef tm, LLVMModuleRef mod);
 void si_shader_destroy(struct pipe_context *ctx, struct si_shader *shader);
 unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
 int si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader,
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -398,6 +398,7 @@ static INLINE void si_shader_selector_key(struct pipe_context *ctx,
 static int si_shader_select(struct pipe_context *ctx,
 			    struct si_shader_selector *sel)
 {
+	struct si_context *sctx = (struct si_context *)ctx;
 	union si_shader_key key;
 	struct si_shader * shader = NULL;
 	int r;
@@ -437,7 +438,8 @@ static int si_shader_select(struct pipe_context *ctx,

 		shader->next_variant = sel->current;
 		sel->current = shader;
-		r = si_shader_create((struct si_screen*)ctx->screen, shader);
+		r = si_shader_create((struct si_screen*)ctx->screen, sctx->tm,
+				     shader);
 		if (unlikely(r)) {
 			R600_ERR("Failed to build shader variant (type=%u) %d\n",
 				 sel->type, r);
--- a/src/gallium/state_trackers/clover/core/event.cpp
+++ b/src/gallium/state_trackers/clover/core/event.cpp
@@ -27,7 +27,7 @@ using namespace clover;

 event::event(clover::context &ctx, const ref_vector<event> &deps,
             action action_ok, action action_fail) :
-   context(ctx), _status(0), wait_count(1),
+   context(ctx), wait_count(1), _status(0),
   action_ok(action_ok), action_fail(action_fail) {
   for (auto &ev : deps)
      ev.chain(*this);
@@ -36,36 +36,69 @@ event::event(clover::context &ctx, const ref_vector<event> &deps,
 event::~event() {
 }

+std::vector<intrusive_ref<event>>
+event::trigger_self() {
+   std::lock_guard<std::mutex> lock(mutex);
+   std::vector<intrusive_ref<event>> evs;
+
+   if (!--wait_count)
+      std::swap(_chain, evs);
+
+   return evs;
+}
+
 void
 event::trigger() {
-   if (!--wait_count) {
-      action_ok(*this);
+   auto evs = trigger_self();

-      while (!_chain.empty()) {
-         _chain.back()().trigger();
-         _chain.pop_back();
-      }
+   if (signalled()) {
+      action_ok(*this);
+      cv.notify_all();
   }
+
+   for (event &ev : evs)
+      ev.trigger();
+}
+
+std::vector<intrusive_ref<event>>
+event::abort_self(cl_int status) {
+   std::lock_guard<std::mutex> lock(mutex);
+   std::vector<intrusive_ref<event>> evs;
+
+   _status = status;
+   std::swap(_chain, evs);
+
+   return evs;
 }

 void
 event::abort(cl_int status) {
-   _status = status;
+   auto evs = abort_self(status);
+
   action_fail(*this);

-   while (!_chain.empty()) {
-      _chain.back()().abort(status);
-      _chain.pop_back();
-   }
+   for (event &ev : evs)
+      ev.abort(status);
 }

 bool
 event::signalled() const {
+   std::lock_guard<std::mutex> lock(mutex);
   return !wait_count;
 }

+cl_int
+event::status() const {
+   std::lock_guard<std::mutex> lock(mutex);
+   return _status;
+}
+
 void
 event::chain(event &ev) {
+   std::unique_lock<std::mutex> lock(mutex, std::defer_lock);
+   std::unique_lock<std::mutex> lock_ev(ev.mutex, std::defer_lock);
+   std::lock(lock, lock_ev);
+
   if (wait_count) {
      ev.wait_count++;
      _chain.push_back(ev);
@@ -73,6 +106,15 @@ event::chain(event &ev) {
   ev.deps.push_back(*this);
 }

+void
+event::wait() const {
+   for (event &ev : deps)
+      ev.wait();
+
+   std::unique_lock<std::mutex> lock(mutex);
+   cv.wait(lock, [=]{ return !wait_count; });
+}
+
 hard_event::hard_event(command_queue &q, cl_command_type command,
                       const ref_vector<event> &deps, action action) :
   event(q.context(), deps, profile(q, action), [](event &ev){}),
@@ -93,8 +135,8 @@ cl_int
 hard_event::status() const {
   pipe_screen *screen = queue()->device().pipe;

-   if (_status < 0)
-      return _status;
+   if (event::status() < 0)
+      return event::status();

   else if (!_fence)
      return CL_QUEUED;
@@ -120,6 +162,8 @@ void
 hard_event::wait() const {
   pipe_screen *screen = queue()->device().pipe;

+   event::wait();
+
   if (status() == CL_QUEUED)
      queue()->flush();

@@ -182,8 +226,8 @@ soft_event::soft_event(clover::context &ctx, const ref_vector<event> &deps,

 cl_int
 soft_event::status() const {
-   if (_status < 0)
-      return _status;
+   if (event::status() < 0)
+      return event::status();

   else if (!signalled() ||
            any_of([](const event &ev) {
@@ -207,8 +251,7 @@ soft_event::command() const {

 void
 soft_event::wait() const {
-   for (event &ev : deps)
-      ev.wait();
+   event::wait();

   if (status() != CL_COMPLETE)
      throw error(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST);
--- a/src/gallium/state_trackers/clover/core/event.hpp
+++ b/src/gallium/state_trackers/clover/core/event.hpp
@@ -23,6 +23,7 @@
 #ifndef CLOVER_CORE_EVENT_HPP
 #define CLOVER_CORE_EVENT_HPP

+#include <condition_variable>
 #include <functional>

 #include "core/object.hpp"
@@ -65,24 +66,29 @@ namespace clover {
      void abort(cl_int status);
      bool signalled() const;

-      virtual cl_int status() const = 0;
+      virtual cl_int status() const;
      virtual command_queue *queue() const = 0;
      virtual cl_command_type command() const = 0;
-      virtual void wait() const = 0;
+      virtual void wait() const;

      const intrusive_ref<clover::context> context;

   protected:
      void chain(event &ev);

-      cl_int _status;
      std::vector<intrusive_ref<event>> deps;

   private:
+      std::vector<intrusive_ref<event>> trigger_self();
+      std::vector<intrusive_ref<event>> abort_self(cl_int status);
+
      unsigned wait_count;
+      cl_int _status;
      action action_ok;
      action action_fail;
      std::vector<intrusive_ref<event>> _chain;
+      mutable std::condition_variable cv;
+      mutable std::mutex mutex;
   };

   ///
--- a/src/gallium/state_trackers/clover/core/program.cpp
+++ b/src/gallium/state_trackers/clover/core/program.cpp
@@ -90,6 +90,8 @@ cl_build_status
 program::build_status(const device &dev) const {
   if (_binaries.count(&dev))
      return CL_BUILD_SUCCESS;
+   else if (_logs.count(&dev))
+      return CL_BUILD_ERROR;
   else
      return CL_BUILD_NONE;
 }
--- a/src/gallium/state_trackers/clover/core/queue.cpp
+++ b/src/gallium/state_trackers/clover/core/queue.cpp
@@ -44,6 +44,7 @@ command_queue::flush() {
   pipe_screen *screen = device().pipe;
   pipe_fence_handle *fence = NULL;

+   std::lock_guard<std::mutex> lock(queued_events_mutex);
   if (!queued_events.empty()) {
      pipe->flush(pipe, &fence, 0);

@@ -69,6 +70,7 @@ command_queue::profiling_enabled() const {

 void
 command_queue::sequence(hard_event &ev) {
+   std::lock_guard<std::mutex> lock(queued_events_mutex);
   if (!queued_events.empty())
      queued_events.back()().chain(ev);

--- a/src/gallium/state_trackers/clover/core/queue.hpp
+++ b/src/gallium/state_trackers/clover/core/queue.hpp
@@ -24,6 +24,7 @@
 #define CLOVER_CORE_QUEUE_HPP

 #include <deque>
+#include <mutex>

 #include "core/object.hpp"
 #include "core/context.hpp"
@@ -69,6 +70,7 @@ namespace clover {

      cl_command_queue_properties props;
      pipe_context *pipe;
+      std::mutex queued_events_mutex;
      std::deque<intrusive_ref<hard_event>> queued_events;
   };
 }
--- a/src/gallium/targets/osmesa/Makefile.am
+++ b/src/gallium/targets/osmesa/Makefile.am
@@ -42,7 +42,6 @@ nodist_EXTRA_lib@OSMESA_LIB@_la_SOURCES = dummy.cpp
 lib@OSMESA_LIB@_la_SOURCES = target.c

 lib@OSMESA_LIB@_la_LDFLAGS = \
-	-module \
 	-no-undefined \
 	-version-number @OSMESA_VERSION@ \
 	$(GC_SECTIONS) \
--- a/src/gallium/winsys/freedreno/drm/Android.mk
+++ b/src/gallium/winsys/freedreno/drm/Android.mk
@@ -27,10 +27,7 @@ include $(CLEAR_VARS)

 LOCAL_SRC_FILES := $(C_SOURCES)

-LOCAL_C_INCLUDES := \
-	$(TARGET_OUT_HEADERS)/libdrm \
-	$(TARGET_OUT_HEADERS)/freedreno
-
+LOCAL_SHARED_LIBRARIES := libdrm libdrm_freedreno
 LOCAL_MODULE := libmesa_winsys_freedreno

 include $(GALLIUM_COMMON_MK)
--- a/src/gallium/winsys/i915/drm/Android.mk
+++ b/src/gallium/winsys/i915/drm/Android.mk
@@ -30,8 +30,7 @@ include $(CLEAR_VARS)

 LOCAL_SRC_FILES := $(C_SOURCES)

-LOCAL_C_INCLUDES := $(TARGET_OUT_HEADERS)/libdrm
-
+LOCAL_SHARED_LIBRARIES := libdrm libdrm_intel
 LOCAL_MODULE := libmesa_winsys_i915

 include $(GALLIUM_COMMON_MK)
--- a/src/gallium/winsys/intel/drm/Android.mk
+++ b/src/gallium/winsys/intel/drm/Android.mk
@@ -29,8 +29,7 @@ include $(CLEAR_VARS)

 LOCAL_SRC_FILES := $(C_SOURCES)

-LOCAL_C_INCLUDES := $(TARGET_OUT_HEADERS)/libdrm
-
+LOCAL_SHARED_LIBRARIES := libdrm libdrm_intel
 LOCAL_MODULE := libmesa_winsys_intel

 include $(GALLIUM_COMMON_MK)
--- a/src/gallium/winsys/nouveau/drm/Android.mk
+++ b/src/gallium/winsys/nouveau/drm/Android.mk
@@ -30,8 +30,7 @@ include $(CLEAR_VARS)

 LOCAL_SRC_FILES := $(C_SOURCES)

-LOCAL_C_INCLUDES := $(TARGET_OUT_HEADERS)/libdrm
-
+LOCAL_SHARED_LIBRARIES := libdrm libdrm_nouveau
 LOCAL_MODULE := libmesa_winsys_nouveau

 include $(GALLIUM_COMMON_MK)
--- a/src/gallium/winsys/radeon/drm/Android.mk
+++ b/src/gallium/winsys/radeon/drm/Android.mk
@@ -30,8 +30,7 @@ include $(CLEAR_VARS)

 LOCAL_SRC_FILES := $(C_SOURCES)

-LOCAL_C_INCLUDES := $(TARGET_OUT_HEADERS)/libdrm
-
+LOCAL_SHARED_LIBRARIES := libdrm libdrm_radeon
 LOCAL_MODULE := libmesa_winsys_radeon

 include $(GALLIUM_COMMON_MK)
--- a/src/gallium/winsys/svga/drm/Android.mk
+++ b/src/gallium/winsys/svga/drm/Android.mk
@@ -34,9 +34,9 @@ LOCAL_CFLAGS := -D_FILE_OFFSET_BITS=64

 LOCAL_C_INCLUDES := \
 	$(GALLIUM_TOP)/drivers/svga \
-	$(GALLIUM_TOP)/drivers/svga/include \
-	$(TARGET_OUT_HEADERS)/libdrm
+	$(GALLIUM_TOP)/drivers/svga/include

+LOCAL_SHARED_LIBRARIES := libdrm
 LOCAL_MODULE := libmesa_winsys_svga

 include $(GALLIUM_COMMON_MK)
--- a/src/glsl/Android.gen.mk
+++ b/src/glsl/Android.gen.mk
@@ -33,11 +33,23 @@ sources := \
 	glsl_lexer.cpp \
 	glsl_parser.cpp \
 	glcpp/glcpp-lex.c \
-	glcpp/glcpp-parse.c
+	glcpp/glcpp-parse.c \
+	nir/nir_constant_expressions.c \
+	nir/nir_opcodes.c \
+	nir/nir_opcodes.h \
+	nir/nir_opt_algebraic.c

 LOCAL_SRC_FILES := $(filter-out $(sources), $(LOCAL_SRC_FILES))

-LOCAL_C_INCLUDES += $(intermediates) $(intermediates)/glcpp $(MESA_TOP)/src/glsl/glcpp
+LOCAL_C_INCLUDES += \
+	$(intermediates) \
+	$(intermediates)/glcpp \
+	$(intermediates)/nir \
+	$(MESA_TOP)/src/glsl/glcpp \
+	$(MESA_TOP)/src/glsl/nir
+
+LOCAL_EXPORT_C_INCLUDE_DIRS += \
+	$(intermediates)/nir

 sources := $(addprefix $(intermediates)/, $(sources))
 LOCAL_GENERATED_SOURCES += $(sources)
@@ -77,3 +89,42 @@ $(intermediates)/glcpp/glcpp-lex.c: $(LOCAL_PATH)/glcpp/glcpp-lex.l

 $(intermediates)/glcpp/glcpp-parse.c: $(LOCAL_PATH)/glcpp/glcpp-parse.y
 	$(call glsl_local-y-to-c-and-h)
+
+nir_constant_expressions_gen := $(LOCAL_PATH)/nir/nir_constant_expressions.py
+nir_constant_expressions_deps := \
+	$(LOCAL_PATH)/nir/nir_opcodes.py \
+	$(LOCAL_PATH)/nir/nir_constant_expressions.py \
+	$(LOCAL_PATH)/nir/nir_constant_expressions.h
+
+$(intermediates)/nir/nir_constant_expressions.c: $(nir_constant_expressions_deps)
+	@mkdir -p $(dir $@)
+	$(hide) $(MESA_PYTHON2) $(nir_constant_expressions_gen) $< > $@
+
+nir_opcodes_h_gen := $(LOCAL_PATH)/nir/nir_opcodes_h.py
+nir_opcodes_h_deps := \
+	$(LOCAL_PATH)/nir/nir_opcodes.py \
+	$(LOCAL_PATH)/nir/nir_opcodes_h.py
+
+$(intermediates)/nir/nir_opcodes.h: $(nir_opcodes_h_deps)
+	@mkdir -p $(dir $@)
+	$(hide) $(MESA_PYTHON2) $(nir_opcodes_h_gen) $< > $@
+
+$(LOCAL_PATH)/nir/nir.h: $(intermediates)/nir/nir_opcodes.h
+
+nir_opcodes_c_gen := $(LOCAL_PATH)/nir/nir_opcodes_c.py
+nir_opcodes_c_deps := \
+	$(LOCAL_PATH)/nir/nir_opcodes.py \
+	$(LOCAL_PATH)/nir/nir_opcodes_c.py
+
+$(intermediates)/nir/nir_opcodes.c: $(nir_opcodes_c_deps)
+	@mkdir -p $(dir $@)
+	$(hide) $(MESA_PYTHON2) $(nir_opcodes_c_gen) $< > $@
+
+nir_opt_algebraic_gen := $(LOCAL_PATH)/nir/nir_opt_algebraic.py
+nir_opt_algebraic_deps := \
+	$(LOCAL_PATH)/nir/nir_opt_algebraic.py \
+	$(LOCAL_PATH)/nir/nir_algebraic.py
+
+$(intermediates)/nir/nir_opt_algebraic.c: $(nir_opt_algebraic_deps)
+	@mkdir -p $(dir $@)
+	$(hide) $(MESA_PYTHON2) $(nir_opt_algebraic_gen) $< > $@
--- a/src/glsl/Android.mk
+++ b/src/glsl/Android.mk
@@ -35,10 +35,10 @@ include $(CLEAR_VARS)

 LOCAL_SRC_FILES := \
 	$(LIBGLCPP_FILES) \
-	$(LIBGLSL_FILES)
+	$(LIBGLSL_FILES) \
+	$(NIR_FILES)

 LOCAL_C_INCLUDES := \
-	$(MESA_TOP)/src \
 	$(MESA_TOP)/src/mapi \
 	$(MESA_TOP)/src/mesa

@@ -59,7 +59,6 @@ LOCAL_SRC_FILES := \
 	$(GLSL_COMPILER_CXX_FILES)

 LOCAL_C_INCLUDES := \
-	$(MESA_TOP)/src \
 	$(MESA_TOP)/src/mapi \
 	$(MESA_TOP)/src/mesa

--- a/src/glsl/glsl_types.cpp
+++ b/src/glsl/glsl_types.cpp
@@ -690,24 +690,27 @@ glsl_type::record_key_compare(const void *a, const void *b)
 }


+/**
+ * Generate an integer hash value for a glsl_type structure type.
+ */
 unsigned
 glsl_type::record_key_hash(const void *a)
 {
   const glsl_type *const key = (glsl_type *) a;
-   char hash_key[128];
-   unsigned size = 0;
-
-   size = snprintf(hash_key, sizeof(hash_key), "%08x", key->length);
+   uintptr_t hash = key->length;
+   unsigned retval;

   for (unsigned i = 0; i < key->length; i++) {
-      if (size >= sizeof(hash_key))
-	 break;
-
-      size += snprintf(& hash_key[size], sizeof(hash_key) - size,
-		       "%p", (void *) key->fields.structure[i].type);
+      /* casting pointer to uintptr_t */
+      hash = (hash * 13 ) + (uintptr_t) key->fields.structure[i].type;
   }

-   return hash_table_string_hash(& hash_key);
+   if (sizeof(hash) == 8)
+      retval = (hash & 0xffffffff) ^ ((uint64_t) hash >> 32);
+   else
+      retval = hash;
+
+   return retval;
 }


--- a/src/glsl/nir/nir_opt_algebraic.py
+++ b/src/glsl/nir/nir_opt_algebraic.py
@@ -101,7 +101,7 @@ optimizations = [
   (('ishr', 0, a), 0),
   (('ishr', a, 0), a),
   (('ushr', 0, a), 0),
-   (('ushr', a, 0), 0),
+   (('ushr', a, 0), a),
   # Exponential/logarithmic identities
   (('fexp2', ('flog2', a)), a), # 2^lg2(a) = a
   (('fexp',  ('flog',  a)), a), # e^ln(a)  = a
--- a/src/glx/dri3_glx.c
+++ b/src/glx/dri3_glx.c
@@ -1985,6 +1985,11 @@ dri3_create_screen(int screen, struct glx_display * priv)
      goto handle_error;
   }

+   if (psc->is_different_gpu && !psc->image->blitImage) {
+      ErrorMessageF("Different GPU, but blitImage not implemented for this driver\n");
+      goto handle_error;
+   }
+
   if (!psc->is_different_gpu && (
       !psc->texBuffer || psc->texBuffer->base.version < 2 ||
       !psc->texBuffer->setTexBuffer2
--- a/src/loader/Android.mk
+++ b/src/loader/Android.mk
@@ -37,7 +37,7 @@ LOCAL_SRC_FILES := \
 ifeq ($(MESA_GPU_DRIVERS),swrast)
 LOCAL_CFLAGS += -D__NOT_HAVE_DRM_H
 else
-LOCAL_C_INCLUDES += $(TARGET_OUT_HEADERS)/libdrm
+LOCAL_SHARED_LIBRARIES := libdrm
 endif

 LOCAL_MODULE := libmesa_loader
--- a/src/mapi/Makefile.am
+++ b/src/mapi/Makefile.am
@@ -231,7 +231,7 @@ es2api/glapi_mapi_tmp.h: glapi/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps)
 	$(call glapi_gen_mapi,$<,es2api)

 # XXX: Inline vgapi's Makefile.am here.
-EXTRA_DIST += vgapi
+EXTRA_DIST += vgapi mapi.c mapi.h
 # if HAVE_OPENVG
 # SUBDIRS += vgapi
 # endif
--- a/src/mesa/Android.gen.mk
+++ b/src/mesa/Android.gen.mk
@@ -34,6 +34,9 @@ sources := \
 	main/enums.c \
 	main/api_exec.c \
 	main/dispatch.h \
+	main/format_pack.c \
+	main/format_unpack.c \
+	main/format_info.h \
 	main/remap_helper.h \
 	main/get_hash.h

@@ -124,3 +127,21 @@ format_info_deps := \

 $(intermediates)/main/format_info.h: $(format_info_deps)
 	@$(MESA_PYTHON2) $(FORMAT_INFO) $< > $@
+
+FORMAT_PACK := $(LOCAL_PATH)/main/format_pack.py
+format_pack_deps := \
+	$(LOCAL_PATH)/main/formats.csv \
+	$(LOCAL_PATH)/main/format_parser.py \
+	$(FORMAT_PACK)
+
+$(intermediates)/main/format_pack.c: $(format_pack_deps)
+	$(hide) $(MESA_PYTHON2) $(FORMAT_PACK) $< > $@
+
+FORMAT_UNPACK := $(LOCAL_PATH)/main/format_unpack.py
+format_unpack_deps := \
+	$(LOCAL_PATH)/main/formats.csv \
+	$(LOCAL_PATH)/main/format_parser.py \
+	$(FORMAT_UNPACK)
+
+$(intermediates)/main/format_unpack.c: $(format_unpack_deps)
+	$(hide) $(MESA_PYTHON2) $(FORMAT_UNPACK) $< > $@
--- a/src/mesa/Android.libmesa_dricore.mk
+++ b/src/mesa/Android.libmesa_dricore.mk
@@ -49,8 +49,8 @@ endif # MESA_ENABLE_ASM

 ifeq ($(ARCH_X86_HAVE_SSE4_1),true)
 LOCAL_SRC_FILES += \
-	$(SRCDIR)main/streaming-load-memcpy.c \
-	$(SRCDIR)main/sse_minmax.c
+	main/streaming-load-memcpy.c \
+	mesa/main/sse_minmax.c
 LOCAL_CFLAGS := -msse4.1
 endif

@@ -61,8 +61,8 @@ endif

 LOCAL_C_INCLUDES := \
 	$(call intermediates-dir-for STATIC_LIBRARIES,libmesa_program,,) \
-	$(MESA_TOP)/src \
 	$(MESA_TOP)/src/mapi \
+	$(MESA_TOP)/src/mesa/main \
 	$(MESA_TOP)/src/glsl \
 	$(MESA_TOP)/src/gallium/auxiliary

--- a/src/mesa/Android.libmesa_glsl_utils.mk
+++ b/src/mesa/Android.libmesa_glsl_utils.mk
@@ -36,7 +36,6 @@ include $(CLEAR_VARS)
 LOCAL_MODULE := libmesa_glsl_utils

 LOCAL_C_INCLUDES := \
-	$(MESA_TOP)/src \
 	$(MESA_TOP)/src/glsl \
 	$(MESA_TOP)/src/mapi

@@ -59,7 +58,6 @@ LOCAL_IS_HOST_MODULE := true
 LOCAL_CFLAGS := -D_POSIX_C_SOURCE=199309L

 LOCAL_C_INCLUDES := \
-	$(MESA_TOP)/src \
 	$(MESA_TOP)/src/glsl \
 	$(MESA_TOP)/src/mapi

--- a/src/mesa/Android.libmesa_st_mesa.mk
+++ b/src/mesa/Android.libmesa_st_mesa.mk
@@ -53,11 +53,11 @@ endif

 LOCAL_C_INCLUDES := \
 	$(call intermediates-dir-for STATIC_LIBRARIES,libmesa_program,,) \
-	$(MESA_TOP)/src/gallium/auxiliary \
-	$(MESA_TOP)/src/gallium/include \
-	$(MESA_TOP)/src \
+	$(MESA_TOP)/src/mapi \
+	$(MESA_TOP)/src/mesa/main \
 	$(MESA_TOP)/src/glsl \
-	$(MESA_TOP)/src/mapi
+	$(MESA_TOP)/src/gallium/auxiliary \
+	$(MESA_TOP)/src/gallium/include

 LOCAL_WHOLE_STATIC_LIBRARIES := \
 	libmesa_program
--- a/src/mesa/drivers/dri/Android.mk
+++ b/src/mesa/drivers/dri/Android.mk
@@ -35,17 +35,16 @@ MESA_DRI_CFLAGS := \
 	-DHAVE_ANDROID_PLATFORM

 MESA_DRI_C_INCLUDES := \
-	$(MESA_TOP)/src \
 	$(call intermediates-dir-for,STATIC_LIBRARIES,libmesa_dri_common) \
 	$(addprefix $(MESA_TOP)/, $(mesa_dri_common_INCLUDES)) \
-	$(TARGET_OUT_HEADERS)/libdrm \
 	external/expat/lib

 MESA_DRI_WHOLE_STATIC_LIBRARIES := \
 	libmesa_glsl \
 	libmesa_megadriver_stub \
 	libmesa_dri_common \
-	libmesa_dricore
+	libmesa_dricore \
+	libmesa_util

 MESA_DRI_SHARED_LIBRARIES := \
 	libcutils \
--- a/src/mesa/drivers/dri/common/Android.mk
+++ b/src/mesa/drivers/dri/common/Android.mk
@@ -40,6 +40,13 @@ LOCAL_C_INCLUDES := \
    $(intermediates) \
    $(MESA_DRI_C_INCLUDES)

+# swrast only
+ifeq ($(MESA_GPU_DRIVERS),swrast)
+LOCAL_CFLAGS := -D__NOT_HAVE_DRM_H
+else
+LOCAL_SHARED_LIBRARIES := libdrm
+endif
+
 LOCAL_SRC_FILES := $(DRI_COMMON_FILES)

 LOCAL_GENERATED_SOURCES := \
@@ -99,6 +106,13 @@ LOCAL_MODULE_CLASS := STATIC_LIBRARIES
 LOCAL_C_INCLUDES := \
    $(MESA_DRI_C_INCLUDES)

+# swrast only
+ifeq ($(MESA_GPU_DRIVERS),swrast)
+LOCAL_CFLAGS := -D__NOT_HAVE_DRM_H
+else
+LOCAL_SHARED_LIBRARIES := libdrm
+endif
+
 LOCAL_SRC_FILES := $(megadriver_stub_FILES)

 include $(MESA_COMMON_MK)
--- a/src/mesa/drivers/dri/common/drirc
+++ b/src/mesa/drivers/dri/common/drirc
@@ -91,5 +91,9 @@ TODO: document the other workarounds.
        <application name="Dead Island" executable="DeadIslandGame">
            <option name="allow_glsl_extension_directive_midshader" value="true" />
        </application>
+
+        <application name="Second Life" executable="do-not-directly-run-secondlife-bin">
+            <option name="allow_glsl_extension_directive_midshader" value="true" />
+        </application>
    </device>
 </driconf>
--- a/src/mesa/drivers/dri/common/xmlpool/Makefile.am
+++ b/src/mesa/drivers/dri/common/xmlpool/Makefile.am
@@ -52,7 +52,14 @@ POT=xmlpool.pot

 .PHONY: all clean pot po mo

-EXTRA_DIST = gen_xmlpool.py options.h t_options.h $(POS) SConscript
+EXTRA_DIST = \
+	gen_xmlpool.py \
+	options.h \
+	t_options.h \
+	$(POS) \
+	$(MOS) \
+	SConscript
+
 BUILT_SOURCES = options.h
 CLEANFILES = $(MOS) options.h

--- a/src/mesa/drivers/dri/i965/brw_device_info.c
+++ b/src/mesa/drivers/dri/i965/brw_device_info.c
@@ -241,7 +241,7 @@ static const struct brw_device_info brw_device_info_chv = {
   .max_gs_threads = 80,
   .max_wm_threads = 128,
   .urb = {
-      .size = 128,
+      .size = 192,
      .min_vs_entries = 34,
      .max_vs_entries = 640,
      .max_gs_entries = 256,
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1683,6 +1683,15 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst,
      offset_value.file != BAD_FILE && offset_value.file != IMM;
   bool coordinate_done = false;

+   /* The sampler can only meaningfully compute LOD for fragment shader
+    * messages. For all other stages, we change the opcode to ir_txl and
+    * hardcode the LOD to 0.
+    */
+   if (stage != MESA_SHADER_FRAGMENT && op == ir_tex) {
+      op = ir_txl;
+      lod = fs_reg(0.0f);
+   }
+
   /* Set up the LOD info */
   switch (op) {
   case ir_tex:
--- a/src/mesa/drivers/dri/i965/brw_queryobj.c
+++ b/src/mesa/drivers/dri/i965/brw_queryobj.c
@@ -476,6 +476,8 @@ brw_query_counter(struct gl_context *ctx, struct gl_query_object *q)
   drm_intel_bo_unreference(query->bo);
   query->bo = drm_intel_bo_alloc(brw->bufmgr, "timestamp query", 4096, 4096);
   brw_write_timestamp(brw, query->bo, 0);
+
+   query->flushed = false;
 }

 /**
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -476,6 +476,19 @@ vec4_gs_visitor::visit(ir_emit_vertex *ir)
 {
   this->current_annotation = "emit vertex: safety check";

+   /* Haswell and later hardware ignores the "Render Stream Select" bits
+    * from the 3DSTATE_STREAMOUT packet when the SOL stage is disabled,
+    * and instead sends all primitives down the pipeline for rasterization.
+    * If the SOL stage is enabled, "Render Stream Select" is honored and
+    * primitives bound to non-zero streams are discarded after stream output.
+    *
+    * Since the only purpose of primives sent to non-zero streams is to
+    * be recorded by transform feedback, we can simply discard all geometry
+    * bound to these streams when transform feedback is disabled.
+    */
+   if (ir->stream_id() > 0 && shader_prog->TransformFeedback.NumVarying == 0)
+      return;
+
   /* To ensure that we don't output more vertices than the shader specified
    * using max_vertices, do the logic inside a conditional of the form "if
    * (vertex_count < MAX)"
--- a/src/mesa/drivers/dri/i965/gen8_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_gs_state.c
@@ -82,6 +82,8 @@ gen8_upload_gs_state(struct brw_context *brw)
      uint32_t dw7 = (brw->gs.prog_data->control_data_header_size_hwords <<
                      GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT) |
                      brw->gs.prog_data->dispatch_mode |
+                     ((brw->gs.prog_data->invocations - 1) <<
+                      GEN7_GS_INSTANCE_CONTROL_SHIFT) |
                      GEN6_GS_STATISTICS_ENABLE |
                      (brw->gs.prog_data->include_primitive_id ?
                       GEN7_GS_INCLUDE_PRIMITIVE_ID : 0) |
--- a/src/mesa/drivers/dri/i965/gen8_sol_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_sol_state.c
@@ -128,6 +128,15 @@ gen8_upload_3dstate_streamout(struct brw_context *brw, bool active,
      dw2 |= urb_entry_read_offset << SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT;
      dw2 |= (urb_entry_read_length - 1) << SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT;

+      dw2 |= urb_entry_read_offset << SO_STREAM_1_VERTEX_READ_OFFSET_SHIFT;
+      dw2 |= (urb_entry_read_length - 1) << SO_STREAM_1_VERTEX_READ_LENGTH_SHIFT;
+
+      dw2 |= urb_entry_read_offset << SO_STREAM_2_VERTEX_READ_OFFSET_SHIFT;
+      dw2 |= (urb_entry_read_length - 1) << SO_STREAM_2_VERTEX_READ_LENGTH_SHIFT;
+
+      dw2 |= urb_entry_read_offset << SO_STREAM_3_VERTEX_READ_OFFSET_SHIFT;
+      dw2 |= (urb_entry_read_length - 1) << SO_STREAM_3_VERTEX_READ_LENGTH_SHIFT;
+
      /* Set buffer pitches; 0 means unbound. */
      if (xfb_obj->Buffers[0])
         dw3 |= linked_xfb_info->BufferStride[0] * 4;
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -271,6 +271,20 @@ intel_miptree_blit(struct brw_context *brw,
   return true;
 }

+static bool
+alignment_valid(struct brw_context *brw, unsigned offset, uint32_t tiling)
+{
+   /* Tiled buffers must be page-aligned (4K). */
+   if (tiling != I915_TILING_NONE)
+      return (offset & 4095) == 0;
+
+   /* On Gen8+, linear buffers must be cacheline-aligned. */
+   if (brw->gen >= 8)
+      return (offset & 63) == 0;
+
+   return true;
+}
+
 /* Copy BitBlt
 */
 bool
@@ -296,14 +310,11 @@ intelEmitCopyBlit(struct brw_context *brw,
   bool dst_y_tiled = dst_tiling == I915_TILING_Y;
   bool src_y_tiled = src_tiling == I915_TILING_Y;

-   if (dst_tiling != I915_TILING_NONE) {
-      if (dst_offset & 4095)
-	 return false;
-   }
-   if (src_tiling != I915_TILING_NONE) {
-      if (src_offset & 4095)
-	 return false;
-   }
+   if (!alignment_valid(brw, dst_offset, dst_tiling))
+      return false;
+   if (!alignment_valid(brw, src_offset, src_tiling))
+      return false;
+
   if ((dst_y_tiled || src_y_tiled) && brw->gen < 6)
      return false;

@@ -524,6 +535,7 @@ intel_emit_linear_blit(struct brw_context *brw,
 {
   struct gl_context *ctx = &brw->ctx;
   GLuint pitch, height;
+   int16_t src_x, dst_x;
   bool ok;

   /* The pitch given to the GPU must be DWORD aligned, and
@@ -532,11 +544,13 @@ intel_emit_linear_blit(struct brw_context *brw,
    */
   pitch = ROUND_DOWN_TO(MIN2(size, (1 << 15) - 1), 4);
   height = (pitch == 0) ? 1 : size / pitch;
+   src_x = src_offset % 64;
+   dst_x = dst_offset % 64;
   ok = intelEmitCopyBlit(brw, 1,
-			  pitch, src_bo, src_offset, I915_TILING_NONE,
-			  pitch, dst_bo, dst_offset, I915_TILING_NONE,
-			  0, 0, /* src x/y */
-			  0, 0, /* dst x/y */
+			  pitch, src_bo, src_offset - src_x, I915_TILING_NONE,
+			  pitch, dst_bo, dst_offset - dst_x, I915_TILING_NONE,
+			  src_x, 0, /* src x/y */
+			  dst_x, 0, /* dst x/y */
 			  pitch, height, /* w, h */
 			  GL_COPY);
   if (!ok)
@@ -544,15 +558,18 @@ intel_emit_linear_blit(struct brw_context *brw,

   src_offset += pitch * height;
   dst_offset += pitch * height;
+   src_x = src_offset % 64;
+   dst_x = dst_offset % 64;
   size -= pitch * height;
   assert (size < (1 << 15));
   pitch = ALIGN(size, 4);
+
   if (size != 0) {
      ok = intelEmitCopyBlit(brw, 1,
-			     pitch, src_bo, src_offset, I915_TILING_NONE,
-			     pitch, dst_bo, dst_offset, I915_TILING_NONE,
-			     0, 0, /* src x/y */
-			     0, 0, /* dst x/y */
+			     pitch, src_bo, src_offset - src_x, I915_TILING_NONE,
+			     pitch, dst_bo, dst_offset - dst_x, I915_TILING_NONE,
+			     src_x, 0, /* src x/y */
+			     dst_x, 0, /* dst x/y */
 			     size, 1, /* w, h */
 			     GL_COPY);
      if (!ok)
--- a/src/mesa/drivers/dri/i965/intel_pixel_read.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c
@@ -226,8 +226,30 @@ intelReadPixels(struct gl_context * ctx,

   if (_mesa_is_bufferobj(pack->BufferObj)) {
      if (_mesa_meta_pbo_GetTexSubImage(ctx, 2, NULL, x, y, 0, width, height, 1,
-                                        format, type, pixels, pack))
+                                        format, type, pixels, pack)) {
+         /* _mesa_meta_pbo_GetTexSubImage() implements PBO transfers by
+          * binding the user-provided BO as a fake framebuffer and rendering
+          * to it.  This breaks the invariant of the GL that nothing is able
+          * to render to a BO, causing nondeterministic corruption issues
+          * because the render cache is not coherent with a number of other
+          * caches that the BO could potentially be bound to afterwards.
+          *
+          * This could be solved in the same way that we guarantee texture
+          * coherency after a texture is attached to a framebuffer and
+          * rendered to, but that would involve checking *all* BOs bound to
+          * the pipeline for the case we need to emit a cache flush due to
+          * previous rendering to any of them -- Including vertex, index,
+          * uniform, atomic counter, shader image, transform feedback,
+          * indirect draw buffers, etc.
+          *
+          * That would increase the per-draw call overhead even though it's
+          * very unlikely that any of the BOs bound to the pipeline has been
+          * rendered to via a PBO at any point, so it seems better to just
+          * flush here unconditionally.
+          */
+         intel_batchbuffer_emit_mi_flush(brw);
         return;
+      }

      perf_debug("%s: fallback to CPU mapping in PBO case\n", __FUNCTION__);
   }
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -1126,7 +1126,8 @@ intel_screen_make_configs(__DRIscreen *dri_screen)
 {
   static const mesa_format formats[] = {
      MESA_FORMAT_B5G6R5_UNORM,
-      MESA_FORMAT_B8G8R8A8_UNORM
+      MESA_FORMAT_B8G8R8A8_UNORM,
+      MESA_FORMAT_B8G8R8X8_UNORM
   };

   /* GLX_SWAP_COPY_OML is not supported due to page flipping. */
--- a/src/mesa/drivers/dri/i965/intel_tex_image.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
@@ -476,8 +476,15 @@ intel_get_tex_image(struct gl_context *ctx,
      if (_mesa_meta_pbo_GetTexSubImage(ctx, 3, texImage, 0, 0, 0,
                                        texImage->Width, texImage->Height,
                                        texImage->Depth, format, type,
-                                        pixels, &ctx->Pack))
+                                        pixels, &ctx->Pack)) {
+         /* Flush to guarantee coherency between the render cache and other
+          * caches the PBO could potentially be bound to after this point.
+          * See the related comment in intelReadPixels() for a more detailed
+          * explanation.
+          */
+         intel_batchbuffer_emit_mi_flush(brw);
         return;
+      }

      perf_debug("%s: fallback to CPU mapping in PBO case\n", __FUNCTION__);
   }
--- a/src/mesa/drivers/dri/swrast/swrast.c
+++ b/src/mesa/drivers/dri/swrast/swrast.c
@@ -60,6 +60,11 @@
 #include "swrast_priv.h"
 #include "swrast/s_context.h"

+#include <sys/types.h>
+#ifdef HAVE_SYS_SYSCTL_H
+# include <sys/sysctl.h>
+#endif
+
 const __DRIextension **__driDriverGetExtensions_swrast(void);

 const char * const swrast_vendor_string = "Mesa Project";
@@ -136,6 +141,16 @@ swrast_query_renderer_integer(__DRIscreen *psp, int param,
      value[0] = 0;
      return 0;
   case __DRI2_RENDERER_VIDEO_MEMORY: {
+      /* This should probably share code with os_get_total_physical_memory()
+       * from src/gallium/auxiliary/os/os_misc.c
+       */
+#if defined(CTL_HW) && defined(HW_MEMSIZE)
+        int mib[2] = { CTL_HW, HW_MEMSIZE };
+        unsigned long system_memory_bytes;
+        size_t len = sizeof(system_memory_bytes);
+        if (sysctl(mib, 2, &system_memory_bytes, &len, NULL, 0) != 0)
+            return -1;
+#elif defined(_SC_PHYS_PAGES) && defined(_SC_PAGE_SIZE)
      /* XXX: Do we want to return the full amount of system memory ? */
      const long system_memory_pages = sysconf(_SC_PHYS_PAGES);
      const long system_page_size = sysconf(_SC_PAGE_SIZE);
@@ -145,6 +160,9 @@ swrast_query_renderer_integer(__DRIscreen *psp, int param,

      const uint64_t system_memory_bytes = (uint64_t) system_memory_pages
         * (uint64_t) system_page_size;
+#else
+#error "Unsupported platform"
+#endif

      const unsigned system_memory_megabytes =
         (unsigned) (system_memory_bytes / (1024 * 1024));
--- a/src/mesa/drivers/osmesa/Makefile.am
+++ b/src/mesa/drivers/osmesa/Makefile.am
@@ -39,7 +39,6 @@ nodist_EXTRA_lib@OSMESA_LIB@_la_SOURCES = dummy.cpp
 lib@OSMESA_LIB@_la_SOURCES = osmesa.c

 lib@OSMESA_LIB@_la_LDFLAGS = \
-	-module \
 	-no-undefined \
 	-version-number @OSMESA_VERSION@ \
 	$(GC_SECTIONS) \
--- a/Show More
+++ b/Show More
@@ -1 +1 @@
 .5.2
 .5.6