docs: Add 7.11.2 release notes

mesa: Bump version to 7.11.2 (final)
intel: Fix separate stencil in builtin DRI2 backend
2011-11-27 14:04:48 -08:00 · 2011-11-27 14:04:05 -08:00 · 2011-11-21 16:11:50 -08:00 · 2011-11-21 16:10:49 -08:00 · 2011-11-21 16:10:22 -08:00 · 2011-11-21 19:49:48 +01:00
185 changed files with 4010 additions and 1621 deletions
--- a/10
+++ b/10
@@ -183,7 +183,7 @@ ultrix-gcc:

 # Rules for making release tarballs

-VERSION=7.11-rc2
+VERSION=7.11.2
 DIRECTORY = Mesa-$(VERSION)
 LIB_NAME = MesaLib-$(VERSION)
 GLUT_NAME = MesaGLUT-$(VERSION)
@@ -285,9 +285,6 @@ MAIN_FILES = \
 	$(DIRECTORY)/src/mesa/drivers/osmesa/descrip.mms		\
 	$(DIRECTORY)/src/mesa/drivers/osmesa/osmesa.def			\
 	$(DIRECTORY)/src/mesa/drivers/osmesa/*.[ch]			\
-	$(DIRECTORY)/src/mesa/drivers/dri/r300/compiler/*.[ch]		\
-	$(DIRECTORY)/src/mesa/drivers/dri/r300/compiler/Makefile	\
-	$(DIRECTORY)/src/mesa/drivers/dri/r300/compiler/SConscript	\
 	$(DIRECTORY)/src/mesa/drivers/windows/*/*.[ch]			\
 	$(DIRECTORY)/src/mesa/drivers/windows/*/*.def			\
 	$(DIRECTORY)/src/mesa/drivers/x11/Makefile			\
@@ -393,6 +390,7 @@ DRI_FILES = \
 	$(DIRECTORY)/src/mesa/drivers/dri/*/*/*.[chS]			\
 	$(DIRECTORY)/src/mesa/drivers/dri/*/Makefile			\
 	$(DIRECTORY)/src/mesa/drivers/dri/*/*/Makefile			\
+	$(DIRECTORY)/src/mesa/drivers/dri/*/*/SConscript		\
 	$(DIRECTORY)/src/mesa/drivers/dri/*/Doxyfile

 SGI_GLU_FILES = \
@@ -481,13 +479,13 @@ rm_config: parsers
 	rm -f configs/autoconf

 $(LIB_NAME).tar: rm_config
-	cd .. ; tar -cf $(DIRECTORY)/$(LIB_NAME).tar $(LIB_FILES)
+	cd .. ; tar --dereference -cf $(DIRECTORY)/$(LIB_NAME).tar $(LIB_FILES)

 $(LIB_NAME).tar.gz: $(LIB_NAME).tar
 	gzip --stdout --best $(LIB_NAME).tar > $(LIB_NAME).tar.gz

 $(GLUT_NAME).tar:
-	cd .. ; tar -cf $(DIRECTORY)/$(GLUT_NAME).tar $(GLUT_FILES)
+	cd .. ; tar --dereference -cf $(DIRECTORY)/$(GLUT_NAME).tar $(GLUT_FILES)

 $(GLUT_NAME).tar.gz: $(GLUT_NAME).tar
 	gzip --stdout --best $(GLUT_NAME).tar > $(GLUT_NAME).tar.gz
--- a/configure.ac
+++ b/configure.ac
@@ -17,6 +17,10 @@ AC_INIT([Mesa],[mesa_version],
 AC_CONFIG_AUX_DIR([bin])
 AC_CANONICAL_HOST

+dnl Save user CFLAGS and CXXFLAGS so one can override the default ones
+USER_CFLAGS="$CFLAGS"
+USER_CXXFLAGS="$CXXFLAGS"
+
 dnl Versions for external dependencies
 LIBDRM_REQUIRED=2.4.24
 LIBDRM_RADEON_REQUIRED=2.4.24
@@ -85,7 +89,7 @@ if test "x$GCC" = xyes -a "x$CLANG" = xno; then
    GCC_VERSION=`$CC -dumpversion`
    if test $? -eq 0; then
        major=`echo $GCC_VERSION | cut -d. -f1`
-        minor=`echo $GCC_VERSION | cut -d. -f1`
+        minor=`echo $GCC_VERSION | cut -d. -f2`
    fi

    if test $major -lt 3 -o $major -eq 3 -a $minor -lt 3 ; then
@@ -829,7 +833,7 @@ xlib)
        GL_PC_LIB_PRIV="$GL_LIB_DEPS"
        GL_PC_CFLAGS="$X11_INCLUDES"
    fi
-    GL_LIB_DEPS="$GL_LIB_DEPS $SELINUX_LIBS -lm -lpthread"
+    GL_LIB_DEPS="$GL_LIB_DEPS $SELINUX_LIBS -lm -lpthread $DLOPEN_LIBS"
    GL_PC_LIB_PRIV="$GL_PC_LIB_PRIV $SELINUX_LIBS -lm -lpthread"

    # if static, move the external libraries to the programs
@@ -1240,7 +1244,7 @@ if test "x$enable_gbm" = xyes; then
                      AC_MSG_ERROR([gbm needs udev]))
    GBM_LIB_DEPS="$DLOPEN_LIBS $LIBUDEV_LIBS"

-    if test "x$enable_dri" = xyes; then
+    if test "$mesa_driver" = dri; then
        GBM_BACKEND_DIRS="$GBM_BACKEND_DIRS dri"
        if test "$SHARED_GLAPI" -eq 0; then
            AC_MSG_ERROR([gbm_dri requires --enable-shared-glapi])
@@ -1806,6 +1810,11 @@ if test "x$with_gallium_drivers" != x; then
            gallium_check_st "nouveau/drm" "dri-nouveau" "xorg-nouveau"
            ;;
        xswrast)
+            GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS softpipe"
+            if test "x$MESA_LLVM" = x1; then
+                GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS llvmpipe"
+            fi
+
            if test "x$HAVE_ST_DRI" = xyes; then
                GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS dri-swrast"
            fi
@@ -1824,6 +1833,10 @@ dnl Restore LDFLAGS and CPPFLAGS
 LDFLAGS="$_SAVE_LDFLAGS"
 CPPFLAGS="$_SAVE_CPPFLAGS"

+dnl Add user CFLAGS and CXXFLAGS
+CFLAGS="$CFLAGS $USER_CFLAGS"
+CXXFLAGS="$CXXFLAGS $USER_CXXFLAGS"
+
 dnl Substitute the config
 AC_CONFIG_FILES([configs/autoconf])

--- a/docs/llvmpipe.html
+++ b/docs/llvmpipe.html
@@ -22,21 +22,20 @@ It's the fastest software rasterizer for Mesa.

 <h1>Requirements</h1>

-<dl>
-<dt>An x86 or amd64 processor.  64-bit mode is preferred.</dt>
-<dd>
+<ul>
+<li>
+	 <p>An x86 or amd64 processor; 64-bit mode recommended.</p
   <p>
-   Support for sse2 is strongly encouraged.  Support for ssse3, and sse4.1 will
+   Support for SSE2 is strongly encouraged.  Support for SSSE3, and SSE4.1 will
   yield the most efficient code.  The less features the CPU has the more
   likely is that you ran into underperforming, buggy, or incomplete code.  
   </p>
   <p>
   See /proc/cpuinfo to know what your CPU supports.
   </p>
-</dd>
-<dt>LLVM. Version 2.8 recommended. 2.6 or later required.</dt>
-<dd>
-   <p>
+</li>
+<li>
+	 <p>LLVM: version 2.9 recommended; 2.6 or later required.</p>
   <b>NOTE</b>: LLVM 2.8 and earlier will not work on systems that support the
   Intel AVX extensions (e.g. Sandybridge).  LLVM's code generator will
   fail when trying to emit AVX instructions.  This was fixed in LLVM 2.9.
@@ -54,26 +53,25 @@ It's the fastest software rasterizer for Mesa.
 </pre>

   <p>
-   For Windows download pre-built MSVC 9.0 or MinGW binaries from
-   http://people.freedesktop.org/~jrfonseca/llvm/ and set the LLVM environment
-   variable to the extracted path.
-   </p>
+	 For Windows you will need to build LLVM from source with MSVC or MINGW
+	 (either natively or through cross compilers) and CMake, and set the LLVM
+	 environment variable to the directory you installed it to.

-   <p>
-   For MSVC there are two set of binaries: llvm-x.x-msvc32mt.7z and
-   llvm-x.x-msvc32mtd.7z .
-   </p>
+   LLVM will be statically linked, so when building on MSVC it needs to be
+   built with a matching CRT as Mesa, and you'll need to pass
+   -DLLVM_USE_CRT_RELEASE=MTd for debug and checked builds,
+   -DLLVM_USE_CRT_RELEASE=MTd for profile and release builds.

-   <p>
-   You have to set the LLVM=/path/to/llvm-x.x-msvc32mtd env var when passing
-   debug=yes to scons, and LLVM=/path/to/llvm-x.x-msvc32mt when building with
-   debug=no. This is necessary as LLVM builds as static library so the chosen
-   MS CRT must match.
+   You can build only the x86 target by passing -DLLVM_TARGETS_TO_BUILD=X86
+   to cmake.
   </p>
-</dd>
+</li>
+
+<li>
+   <p>scons (optional)</p>
+</li>
+</ul>

-<dt>scons (optional)</dt>
-</dl>



@@ -93,7 +91,7 @@ Alternatively, you can build it with GNU make, if you prefer, by invoking it as

 but the rest of these instructions assume that scons is used.

-For windows is everything the except except the winsys:
+For Windows the procedure is similar except the target:

 <pre>
  scons build=debug libgl-gdi
--- a/docs/news.html
+++ b/docs/news.html
@@ -11,6 +11,20 @@
 <H1>News</H1>


+<h2>November 17, 2011</h2>
+
+<p>
+<a href="relnotes-7.11.1.html">Mesa 7.11.1</a> is released.  This is a bug fix
+release.
+</p>
+
+<h2>July 31, 2011</h2>
+
+<p>
+<a href="relnotes-7.11.html">Mesa 7.11</a> is released.  This is a new
+release with many new features.
+</p>
+
 <h2>June 13, 2011</h2>

 <p>
--- a/docs/relnotes-7.11.1.html
+++ b/docs/relnotes-7.11.1.html
@@ -0,0 +1,391 @@
+<HTML>
+
+<head>
+<TITLE>Mesa Release Notes</TITLE>
+<link rel="stylesheet" type="text/css" href="mesa.css">
+<meta http-equiv="content-type" content="text/html; charset=utf-8" />
+</head>
+
+<BODY>
+
+<body bgcolor="#eeeeee">
+
+<H1>Mesa 7.11.1 Release Notes / November 17, 2011</H1>
+
+<p>
+Mesa 7.11.1 is a bug fix release which fixes bugs found since the 7.11 release.
+</p>
+<p>
+Mesa 7.11 implements the OpenGL 2.1 API, but the version reported by
+glGetString(GL_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 2.1.
+</p>
+<p>
+See the <a href="install.html">Compiling/Installing page</a> for prerequisites
+for DRI hardware acceleration.
+</p>
+
+
+<h2>MD5 checksums</h2>
+<pre>
+ac0181a4076770fb657c1169af43aa09  MesaLib-7.11.1.tar.gz
+a77307102cee844ff6544ffa8fafeac1  MesaLib-7.11.1.tar.bz2
+dfcb11516c1730f3981b55a65a835623  MesaLib-7.11.1.zip
+2cb2b9ecb4fb7d1a6be69346ee886952  MesaGLUT-7.11.1.tar.gz
+3f54e314290d4dacbab089839197080b  MesaGLUT-7.11.1.tar.bz2
+5d66c7ee8c5cc2f27e1ffb037ad4172c  MesaGLUT-7.11.1.zip
+</pre>
+
+<h2>New features</h2>
+<p>None.</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=3165">Bug 3165</a> - texImage.IsCompressed and texImage.CompressedSize issues</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=23525">Bug 23525</a> - Software rendering on QEMU guests badly broken</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28125">Bug 28125</a> - DRI2 prevents indirect glx</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=34628">Bug 34628</a> - [ilk] skybox errors in quake4</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=36371">Bug 36371</a> - r200: piglit readPixSanity failure</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=36669">Bug 36669</a> - EmitNoMainReturn set to 1 doesn't make the GLSL compiler lower all the RET opcodes</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=36939">Bug 36939</a> - multitexturing is messed up in quake wars (regression)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=37907">Bug 37907</a> - [swrast] SIGSEGV swrast/s_depth.c:569</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=38163">Bug 38163</a> - Gnome Shell Display Bug</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=38625">Bug 38625</a> - ast_to_hir.cpp:1761: const glsl_type* process_array_type(YYLTYPE*, const glsl_type*, ast_node*, _mesa_glsl_parse_state*): Assertion `dummy_instructions.is_empty()' failed.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=38729">Bug 38729</a> - [softpipe] sp_quad_depth_test.c:215:convert_quad_stencil: Assertion `0' failed.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=38863">Bug 38863</a> - [IVB]GPU hang when running 3D games like openarena</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=39193">Bug 39193</a> - [llvmpipe and r600g] glCheckFramebufferStatusEXT segfaults in Gallium when checking status on a framebuffer bound to a texture that's bound to a pixmap</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=39651">Bug 39651</a> - [glsl] Assertion failure when implicitly converting out parameters</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=39991">Bug 39991</a> - [regression]GL_PALETTE8_RGBA8_OES format of glCompressedTexImage2D will cause err GL_INVALID_ENUM with GLES1.x</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=40022">Bug 40022</a> - [i915] out-of-bounds write src/mesa/drivers/dri/i915/i915_fragprog.c:321</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=40062">Bug 40062</a> - in etqw the strogg radar is black (regression)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=40324">Bug 40324</a> - [SNB] gpu hang in mesa 7.11</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=40533">Bug 40533</a> - i915: piglit glean/readPixSanity: DRI2SwapBuffers: BadDrawable (invalid Pixmap or Window parameter)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=41096">Bug 41096</a> - [sandybridge-m-gt2+] GPU lockup render.IPEHR: 0x7a000002</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=41969">Bug 41969</a> - The Mesa meta save/restore code doesn't always save the active program</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=42175">Bug 42175</a> - RV730: Display errors in glxgears &amp; WebGL</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=42268">Bug 42268</a> - [bisected] oglc pbo(negative.invalidOffsetValue) aborts on 7.11 branch</li>
+
+<!-- <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=">Bug </a> - </li> -->
+
+</ul>
+
+
+<h2>Changes</h2>
+<p>The full set of changes can be viewed by using the following GIT command:</p>
+
+<pre>
+  git log mesa-7.11..mesa-7.11.1
+</pre>
+
+<p>Adam Jackson (2):
+<ul>
+  <li>drisw: Remove cargo culting that breaks GLX 1.3 ctors</li>
+  <li>glx: Don't enable INTEL_swap_event unconditionally</li>
+</ul></p>
+
+<p>Alex Deucher (1):
+<ul>
+  <li>r600g: fix up vs export handling</li>
+</ul></p>
+
+<p>Ben Widawsky (1):
+<ul>
+  <li>intel: GetBuffer fix</li>
+</ul></p>
+
+<p>Brian Paul (15):
+<ul>
+  <li>docs: add 7.11 md5 sums</li>
+  <li>docs: news item for 7.11 release</li>
+  <li>st/mesa: Convert size assertions to conditionals in st_texture_image_copy.</li>
+  <li>softpipe: add missing stencil format case in convert_quad_stencil()</li>
+  <li>mesa: fix texstore addressing bugs for depth/stencil formats</li>
+  <li>mesa: add missing breaks for GL_TEXTURE_CUBE_MAP_SEAMLESS queries</li>
+  <li>swrast: don't try to do depth testing if there's no depth buffer</li>
+  <li>meta: fix/add checks for GL_EXT_framebuffer_sRGB</li>
+  <li>mesa: fix PACK_COLOR_5551(), PACK_COLOR_1555() macros</li>
+  <li>meta: fix broken sRGB mipmap generation</li>
+  <li>mesa: add _NEW_CURRENT_ATTRIB in _mesa_program_state_flags()</li>
+  <li>mesa: fix error handling for dlist image unpacking</li>
+  <li>mesa: generate GL_INVALID_OPERATION in glIsEnabledIndex() between Begin/End</li>
+  <li>mesa: fix incorrect error code in _mesa_FramebufferTexture1D/3DEXT()</li>
+  <li>mesa: fix format/type check in unpack_image() for bitmaps</li>
+</ul></p>
+
+<p>Carl Simonson (1):
+<ul>
+  <li>i830: Add missing vtable entry for i830 from the hiz work.</li>
+</ul></p>
+
+<p>Carl Worth (5):
+<ul>
+  <li>glcpp: Fix two (or more) successive applications of token pasting</li>
+  <li>glcpp: Test a non-function-like macro using the token paste operator</li>
+  <li>glcpp: Implement token pasting for non-function-like macros</li>
+  <li>glcpp: Raise error if defining any macro containing two consecutive underscores</li>
+  <li>glcpp: Add a test for #elif with an undefined macro.</li>
+</ul></p>
+
+<p>Chad Versace (5):
+<ul>
+  <li>glsl: Add method glsl_type::can_implicitly_convert_to()</li>
+  <li>glsl: Fix implicit conversions in non-constructor function calls</li>
+  <li>glsl: Remove ir_function.cpp:type_compare()</li>
+  <li>glsl: Fix conversions in array constructors</li>
+  <li>x86-64: Fix compile error with clang</li>
+</ul></p>
+
+<p>Chia-I Wu (3):
+<ul>
+  <li>glsl: empty declarations should be valid</li>
+  <li>intel: rename intel_extensions_es2.c to intel_extensions_es.c</li>
+  <li>intel: fix GLESv1 support</li>
+</ul></p>
+
+<p>Chris Wilson (1):
+<ul>
+  <li>i915: out-of-bounds write in calc_live_regs()</li>
+</ul></p>
+
+<p>Christopher James Halse Rogers (1):
+<ul>
+  <li>glx/dri2: Paper over errors in DRI2Connect when indirect</li>
+</ul></p>
+
+<p>David Reveman (1):
+<ul>
+  <li>i915g: Fix off-by-one in scissors.</li>
+</ul></p>
+
+<p>Eric Anholt (16):
+<ul>
+  <li>mesa: Don't skip glGetProgramEnvParam4dvARB if there was already an error.</li>
+  <li>mesa: Fix glGetUniform() type conversions.</li>
+  <li>mesa: Add support for Begin/EndConditionalRender in display lists.</li>
+  <li>mesa: Throw an error instead of asserting for condrender with query == 0.</li>
+  <li>mesa: Throw an error when starting conditional render on an active query.</li>
+  <li>mesa: Don't skip glGetProgramLocalParam4dvARB if there was already an error.</li>
+  <li>glsl: Allow ir_assignment() constructor to not specify condition.</li>
+  <li>glsl: Clarify error message about whole-array assignment in GLSL 1.10.</li>
+  <li>glsl: When assigning to a whole array, mark the array as accessed.</li>
+  <li>glsl: When assiging from a whole array, mark it as used.</li>
+  <li>i965/fs: Respect ARB_color_buffer_float clamping.</li>
+  <li>i965: Add missing _NEW_POLYGON flag to polygon stipple upload.</li>
+  <li>i965: Fix polygon stipple offset state flagging.</li>
+  <li>intel: Mark MESA_FORMAT_X8_Z24 as always supported.</li>
+  <li>mesa: Don't error on glFeedbackBuffer(size = 0, buffer = NULL)</li>
+  <li>glsl: Fix gl_NormalMatrix swizzle setup to match i965's invariants.</li>
+</ul></p>
+
+<p>Henri Verbeet (6):
+<ul>
+  <li>mesa: Also set the remaining draw buffers to GL_NONE when updating just the first buffer in _mesa_drawbuffers().</li>
+  <li>r600g: Support the PIPE_FORMAT_R16_FLOAT colorformat.</li>
+  <li>mesa: Check the texture against all units in unbind_texobj_from_texunits().</li>
+  <li>mesa: Allow sampling from units &gt;= MAX_TEXTURE_UNITS in shaders.</li>
+  <li>mesa: Use the Elements macro for the sampler index assert in validate_samplers().</li>
+  <li>mesa: Fix a couple of TexEnv unit limits.</li>
+</ul></p>
+
+<p>Ian Romanick (17):
+<ul>
+  <li>mesa: Add utility function to get base format from a GL compressed format</li>
+  <li>mesa: Return the correct internal fmt when a generic compressed fmt was used</li>
+  <li>mesa: Make _mesa_get_compressed_formats match the texture compression specs</li>
+  <li>linker: Make linker_error set LinkStatus to false</li>
+  <li>linker: Make linker_{error,warning} generally available</li>
+  <li>mesa: Ensure that gl_shader_program::InfoLog is never NULL</li>
+  <li>ir_to_mesa: Use Add linker_error instead of fail_link</li>
+  <li>ir_to_mesa: Emit warnings instead of errors for IR that can't be lowered</li>
+  <li>i915: Fail without crashing if a Mesa IR program uses too many registers</li>
+  <li>i915: Only emit program errors when INTEL_DEBUG=wm or INTEL_DEBUG=fallbacks</li>
+  <li>mesa: Add GL_OES_compressed_paletted_texture formats to _mesa_is_compressed_format</li>
+  <li>mesa: Add GL_OES_compressed_paletted_texture formats to _mesa_base_tex_format</li>
+  <li>mesa: Refactor expected texture size check in cpal_get_info</li>
+  <li>mesa: Add _mesa_cpal_compressed_format_type</li>
+  <li>mesa: Refactor compressed texture error checks to work with paletted textures</li>
+  <li>mesa: Remove redundant compressed paletted texture error checks</li>
+  <li>mesa: Advertise GL_OES_compressed_paletted_texture in OpenGL ES1.x</li>
+</ul></p>
+
+<p>Jeremy Huddleston (3):
+<ul>
+  <li>apple: Silence some debug spew</li>
+  <li>apple: Use the correct (OpenGL.framework) glViewport and glScissor during init</li>
+  <li>apple: Implement applegl_unbind_context</li>
+</ul></p>
+
+<p>José Fonseca (1):
+<ul>
+  <li>docs: Update llvmpipe docs.</li>
+</ul></p>
+
+<p>Kenneth Graunke (12):
+<ul>
+  <li>glsl: Avoid massive ralloc_strndup overhead in S-Expression parsing.</li>
+  <li>mesa: In validate_program(), initialize errMsg for safety.</li>
+  <li>i965/gen5+: Fix incorrect miptree layout for non-power-of-two cubemaps.</li>
+  <li>i965: Use proper texture alignment units for cubemaps on Gen5+.</li>
+  <li>i965: Fix incorrect maximum PS thread count shift on Ivybridge.</li>
+  <li>i965: Emit depth stalls and flushes before changing depth state on Gen6+.</li>
+  <li>i965/fs: Allow SIMD16 with control flow on Ivybridge.</li>
+  <li>i965: Allow SIMD16 color writes on Ivybridge.</li>
+  <li>i965: Fix inconsistent indentation in brw_eu_emit.c.</li>
+  <li>intel: Depth format fixes</li>
+  <li>i965: Apply post-sync non-zero workaround to homebrew workaround.</li>
+  <li>mesa/get: Move MAX_LIGHTS from GL/ES2 to GL/ES1.</li>
+</ul></p>
+
+<p>Kristian Høgsberg (1):
+<ul>
+  <li>glx: Don't flush twice if we fallback to dri2CopySubBuffer</li>
+</ul></p>
+
+<p>Marc Pignat (1):
+<ul>
+  <li>drisw: Fix 24bpp software rendering, take 2</li>
+</ul></p>
+
+<p>Marcin Baczyński (2):
+<ul>
+  <li>configure: fix gcc version check</li>
+  <li>configure: allow C{,XX}FLAGS override</li>
+</ul></p>
+
+<p>Marcin Slusarz (3):
+<ul>
+  <li>nouveau: fix nouveau_fence leak</li>
+  <li>nouveau: fix crash during fence emission</li>
+  <li>nouveau: fix fence hang</li>
+</ul></p>
+
+<p>Marek Olšák (19):
+<ul>
+  <li>vbo: do not call _mesa_max_buffer_index in debug builds</li>
+  <li>winsys/radeon: fix space checking</li>
+  <li>r300/compiler: fix a warning that a variable may be uninitialized</li>
+  <li>r300/compiler: remove an unused-but-set variable and simplify the code</li>
+  <li>u_vbuf_mgr: cleanup original vs real vertex buffer arrays</li>
+  <li>u_vbuf_mgr: don't take per-instance attribs into acc. when computing max index</li>
+  <li>u_vbuf_mgr: fix max_index computation for large src_offset</li>
+  <li>u_vbuf_mgr: s/u_vbuf_mgr_/u_vbuf_</li>
+  <li>u_vbuf_mgr: remove unused flag U_VBUF_UPLOAD_FLUSHED</li>
+  <li>u_vbuf_mgr: rework user buffer uploads</li>
+  <li>u_vbuf_mgr: fix uploading with a non-zero index bias</li>
+  <li>configure.ac: fix xlib-based softpipe build</li>
+  <li>r600g: add index_bias to index buffer bounds</li>
+  <li>r300g: fix rendering with a non-zero index bias in draw_elements_immediate</li>
+  <li>Revert "r300g: fix rendering with a non-zero index bias in draw_elements_immediate"</li>
+  <li>pb_bufmgr_cache: flush cache when create_buffer fails and try again</li>
+  <li>r300g: don't return NULL in resource_from_handle if the resource is too small</li>
+  <li>r600g: set correct tiling flags in depth info</li>
+  <li>r300g: don't call u_trim_pipe_prim in r300_swtcl_draw_vbo</li>
+</ul></p>
+
+<p>Michel Dänzer (4):
+<ul>
+  <li>st/mesa: Finalize texture on render-to-texture.</li>
+  <li>glx/dri2: Don't call X server for SwapBuffers when there's no back buffer.</li>
+  <li>gallium/util: Add macros for converting from little endian to CPU byte order.</li>
+  <li>r300g: Fix queries on big endian hosts.</li>
+</ul></p>
+
+<p>Neil Roberts (1):
+<ul>
+  <li>meta: Fix saving the active program</li>
+</ul></p>
+
+<p>Paul Berry (18):
+<ul>
+  <li>glsl: Lower unconditional return statements.</li>
+  <li>glsl: Refactor logic for determining whether to lower return statements.</li>
+  <li>glsl: lower unconditional returns and continues in loops.</li>
+  <li>glsl: Use foreach_list in lower_jumps.cpp</li>
+  <li>glsl: In lower_jumps.cpp, lower both branches of a conditional.</li>
+  <li>glsl: Lower break instructions when necessary at the end of a loop.</li>
+  <li>glsl: improve the accuracy of the radians() builtin function</li>
+  <li>glsl: improve the accuracy of the atan(x,y) builtin function.</li>
+  <li>Revert "glsl: Skip processing the first function's body in do_dead_functions()."</li>
+  <li>glsl: Emit function signatures at toplevel, even for built-ins.</li>
+  <li>glsl: Constant-fold built-in functions before outputting IR</li>
+  <li>glsl: Check array size is const before asserting that no IR was generated.</li>
+  <li>glsl: Perform implicit type conversions on function call out parameters.</li>
+  <li>glsl: Fix type error when lowering integer divisions</li>
+  <li>glsl: Rework oversize array check for gl_TexCoord.</li>
+  <li>glsl: Remove field array_lvalue from ir_variable.</li>
+  <li>glsl hierarchical visitor: Do not overwrite base_ir for parameter lists.</li>
+  <li>glsl: improve the accuracy of the asin() builtin function.</li>
+</ul></p>
+
+<p>Tobias Droste (1):
+<ul>
+  <li>r300/compiler: simplify code in peephole_add_presub_add</li>
+</ul></p>
+
+<p>Tom Fogal (1):
+<ul>
+  <li>Only use gcc visibility support with gcc4+.</li>
+</ul></p>
+
+<p>Tom Stellard (1):
+<ul>
+  <li>r300/compiler: Fix regalloc for values with multiple writers</li>
+</ul></p>
+
+<p>Vadim Girlin (5):
+<ul>
+  <li>st/mesa: flush bitmap cache on query and conditional render boundaries</li>
+  <li>r600g: use backend mask for occlusion queries</li>
+  <li>r600g: take into account force_add_cf in pops</li>
+  <li>r600g: fix check_and_set_bank_swizzle</li>
+  <li>r600g: fix replace_gpr_with_pv_ps</li>
+</ul></p>
+
+<p>Yuanhan Liu (17):
+<ul>
+  <li>i965: fix the constant interp bitmask for flat mode</li>
+  <li>mesa: fix error handling for glEvalMesh1/2D</li>
+  <li>mesa: fix error handling for some glGet* functions</li>
+  <li>mesa: fix error handling for glTexEnv</li>
+  <li>mesa: fix error handling for glIsEnabled</li>
+  <li>mesa: fix error handling for glPixelZoom</li>
+  <li>mesa: fix error handling for glSelectBuffer</li>
+  <li>mesa: fix error handling for glMapBufferRange</li>
+  <li>mesa: fix error handling for glMaterial*</li>
+  <li>intel: fix the wrong code to detect null texture.</li>
+  <li>mesa: add a function to do the image data copy stuff for save_CompressedTex(Sub)Image</li>
+  <li>i965: setup address rounding enable bits</li>
+  <li>mesa: generate error if pbo offset is not aligned with the size of specified type</li>
+  <li>mesa: fix inverted pbo test error at _mesa_GetnCompressedTexImageARB</li>
+  <li>mesa: handle the pbo case for save_Bitmap</li>
+  <li>mesa: handle PBO access error in display list mode</li>
+  <li>intel: don't call unmap pbo if pbo is not mapped</li>
+</ul></p>
+
+</body>
+</html>
--- a/docs/relnotes-7.11.2.html
+++ b/docs/relnotes-7.11.2.html
@@ -0,0 +1,80 @@
+<HTML>
+
+<head>
+<TITLE>Mesa Release Notes</TITLE>
+<link rel="stylesheet" type="text/css" href="mesa.css">
+<meta http-equiv="content-type" content="text/html; charset=utf-8" />
+</head>
+
+<BODY>
+
+<body bgcolor="#eeeeee">
+
+<H1>Mesa 7.11.2 Release Notes / November 27, 2011</H1>
+
+<p>
+Mesa 7.11.2 is a bug fix release which fixes bugs found since the 7.11 release.
+</p>
+<p>
+Mesa 7.11 implements the OpenGL 2.1 API, but the version reported by
+glGetString(GL_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 2.1.
+</p>
+<p>
+See the <a href="install.html">Compiling/Installing page</a> for prerequisites
+for DRI hardware acceleration.
+</p>
+
+
+<h2>MD5 checksums</h2>
+<pre>
+TBD
+</pre>
+
+<h2>New features</h2>
+<p>None.</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=43143">Bug 43143</a> - Mesa 7.11.1 fails to build at main/dlist.c:4532 with error message: "format not a string literal and no format arguments"</li>
+
+<li>Incorrect handling of CopyTexImage from RGBA window to LA texture.</li>
+
+<!-- <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=">Bug </a> - </li> -->
+</ul>
+
+
+<h2>Changes</h2>
+<p>The full set of changes can be viewed by using the following GIT command:</p>
+
+<pre>
+  git log mesa-7.11.1..mesa-7.11.2
+</pre>
+
+<p>Brian Paul (4):
+<ul>
+  <li>mesa: stop using ctx-&gt;Driver.CopyTexImage1D/2D() hooks</li>
+  <li>mesa: fix format selection for meta CopyTexSubImage()</li>
+  <li>docs: update news.html and relnotes.html for 7.11.1 release</li>
+  <li>mesa: use format string in _mesa_error() call to silence warning</li>
+</ul></p>
+
+<p>Chad Versace (3):
+<ul>
+  <li>intel: Simplify stencil detiling arithmetic</li>
+  <li>intel: Fix region dimensions for stencil buffers received from DDX</li>
+  <li>intel: Fix separate stencil in builtin DRI2 backend</li>
+</ul></p>
+
+<p>Ian Romanick (4):
+<ul>
+  <li>docs: Add 7.11.1 release md5sums</li>
+  <li>mesa: set version string to 7.11.2-devel</li>
+  <li>mesa: Bump version to 7.11.2 (final)</li>
+</ul></p>
+
+</body>
+</html>
--- a/docs/relnotes-7.11.html
+++ b/docs/relnotes-7.11.html
@@ -10,7 +10,7 @@

 <body bgcolor="#eeeeee">

-<H1>Mesa 7.11 Release Notes / (release date TBD)</H1>
+<H1>Mesa 7.11 Release Notes / July 31, 2011</H1>

 <p>
 Mesa 7.11 is a new development release.
@@ -30,7 +30,12 @@ for DRI hardware acceleration.

 <h2>MD5 checksums</h2>
 <pre>
-tbd
+fa2c7068503133fb2453244cda11cb2a  MesaLib-7.11.tar.gz
+ff03aca82d0560009a076a87c888cf13  MesaLib-7.11.tar.bz2
+ede1ac0976f6f05df586093fc17d63ed  MesaLib-7.11.zip
+b4fb81a47c5caedaefad49af7702c23d  MesaGLUT-7.11.tar.gz
+77a9a0bbd7f8bca882aa5709b88cb071  MesaGLUT-7.11.tar.bz2
+c19ef0c6eb61188c96ed4ccedd70717c  MesaGLUT-7.11.zip
 </pre>


@@ -64,11 +69,263 @@ tbd
 <li>GL_ATI_texture_float (gallium, i965)
 <li>GL_NV_conditional_render (i965)
 <li>GL_NV_texture_barrier (gallium drivers)
+<li>Enable 16-wide fragment shader execution in i965 driver.  This should improve performance in many applications.
+<li>Initial alpha-level support for Intel "Ivybridge" chipsets in the i965 driver.
 </ul>


 <h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.  This list only includes bug fixes not
+included in the previous release (7.10.3).  Many of these are regressions that
+did not exist in the 7.10 release series at all.</p>
+
 <ul>
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=25871">Bug 25871</a> - nearest neighbour samples wrong texel (precision/rounding problem)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=29162">Bug 29162</a> - mesa/darwin is severly broken</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=30080">Bug 30080</a> - [i915] piglit nodepth-test fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=30217">Bug 30217</a> - Possible sources of memory leaks reported by valgrind</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=30266">Bug 30266</a> - Regression, segfault in libdrm_intel when calling glBitmap</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=31561">Bug 31561</a> - [i915] intel_regions.c:289: intel_region_release: Assertion `region-&gt;map_refcount == 0' failed.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=31744">Bug 31744</a> - [GLSL] overriding built-in function impacts another shader</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=32308">Bug 32308</a> - [llvmpipe] src/gallium/auxiliary/gallivm/lp_bld_init.c:319:gallivm_register_garbage_collector_callback: Assertion `NumCallbacks &lt; 32' failed.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=32309">Bug 32309</a> - [softpipe] SIGSEGV sp_state_derived.c:204</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=32459">Bug 32459</a> - [softpipe] glean depthStencil regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=32460">Bug 32460</a> - [softpipe] piglit texwrap 1D regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=32534">Bug 32534</a> - [arrandale/sandybridge] Mesa swallowing GEM ioctl failures</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=32560">Bug 32560</a> - To fix: 64-bit-portabilty-issue state_tracker/st_program.c:427</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=32634">Bug 32634</a> - [r300g, bisected] Massive corruption in Unigine Sanctuary</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=32768">Bug 32768</a> - VBO rendering using glDrawArrays causes program termination and "cs IB too big" message</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=32804">Bug 32804</a> - [swrast] glean pixelFormats regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=32814">Bug 32814</a> - Build error in osmesa.c due to change in _mesa_update_framebuffer_visual() signature</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=32859">Bug 32859</a> - Mesa doesn't compile under NetBSD</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=32912">Bug 32912</a> - [bisected, r300g] Unigine Sanctuary: r300_emit.c:902:r300_emit_vertex_arrays: Assertion `&amp;buf-&gt;b.b' failed with RADEON_HYPERZ=1</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=32945">Bug 32945</a> - [RADEON:KMS:R300G] HiZ: Weird behavior with 3 pipes</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=33046">Bug 33046</a> - [bisected]glean/pixelFormats and 3 oglc cases segfault</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=33185">Bug 33185</a> - [RADEON:KMS:R300G] X crashes when kwin effects are turned on</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=33215">Bug 33215</a> - [llvmpipe] piglit fbo-drawbuffers2-blend regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=33247">Bug 33247</a> - [swrast] tnl/t_draw.c:471: _tnl_draw_prims: Assertion `prim[i].num_instances &gt; 0' failed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=33284">Bug 33284</a> - [llvmpipe] piglit fbo-drawbuffers-fragcolor fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=33306">Bug 33306</a> - [glsl] GLSL integer division by zero crashes GLSL compiler</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=33353">Bug 33353</a> - [softpipe] piglit fbo-srgb looks incorrect</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=33360">Bug 33360</a> - inclusion of $(TALLOC_LIBS) in src/mesa/drivers/osmesa/Makefile causes a build failure</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=33374">Bug 33374</a> - [bisect] FTBFS on commit 9767d3b5 (glapi: Fix OpenGL ES 1.1 and 2.0 interop)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=33508">Bug 33508</a> - [glsl] GLSL compiler modulus by zero crash</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=33555">Bug 33555</a> - [softpipe] tgsi/tgsi_sse2.c:1527:emit_tex: Assertion `0' failed.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=33885">Bug 33885</a> - [glsl] GLSL compiler allows recursion</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=33823">Bug 33823</a> - [glsl] ralloc.c:78: get_header: Assertion `info-&gt;canary == 0x5A1106' failed.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=33934">Bug 33934</a> - 3D blitting is orders of magnitude slower than equivalent 2D blitting.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=33946">Bug 33946</a> - Crash: Mesa checks for invalid pointer, then uses it anyway.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=34008">Bug 34008</a> - r600g: piglit failure (regression)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=34009">Bug 34009</a> - Automatic Mipmap Generation produces very blurry image.</li>
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=34042">Bug 34042</a> - Surfaceless eglMakeCurrent() fails if the supplied EGLContext is not a dummy context</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=34049">Bug 34049</a> - r600g: assertion failure (regression)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=34119">Bug 34119</a> - [glsl] piglit glsl-texcoord-array regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=34323">Bug 34323</a> - [i915 GLSL gles2] gl_FragCoord.w not correct</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=34346">Bug 34346</a> - src/glsl relies on $PWD which can be unset</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=34378">Bug 34378</a> - st/mesa: 2a904fd6a0cb80eec6dec2bae07fd8778b04caf3 breaks sauerbraten</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=34419">Bug 34419</a> - Kwin crashes screensaver exits</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=34463">Bug 34463</a> - state_tracker/st_texture.c:370:st_texture_image_copy: Assertion `u_minify(src-&gt;width0, srcLevel) == width' failed.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=34468">Bug 34468</a> - src/glsl/Makefile fix</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=34541">Bug 34541</a> - [ilk, wine] massive render corruption after recent patchset</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=34589">Bug 34589</a> - [pineview bisected]many cases regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=34595">Bug 34595</a> - [bisected piketon]oglc half_float_vertex(misc.fillmode.wireframe) regressed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=34597">Bug 34597</a> - [bisected piketon]oglc blend-constcolor and 7 draw-buffers2 subcases regressed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=34603">Bug 34603</a> - [bisected piketon]oglc vbo subcase basic.bufferdata regressed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=34604">Bug 34604</a> - [bisected piketon]piglit fbo/fbo-depth-sample-compare regressed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=34646">Bug 34646</a> - [bisected piketon]ogles2conform GL2Tests/GL/gl_FragCoord/gl_FragCoord_w_frag.test</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=34656">Bug 34656</a> - i965: Crash when running WebGL Conformance Test in firefox-4 nightly build</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=34691">Bug 34691</a> - [GLSL] matrix array member assignment with a complex subscript fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=34906">Bug 34906</a> - [Pineview] Some WebGL conformance tests will crash firefox</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=34968">Bug 34968</a> - Bad fps in Lightsmark benchmark</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=35025">Bug 35025</a> - [Patch] Serious compiler warnings</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=35312">Bug 35312</a> - r600g: Automatic mipmap generation doesn't work properly</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=35373">Bug 35373</a> - [[GM45] OpenGL] GL_EXT_texture_sRGB_decode broken</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=35434">Bug 35434</a> - [RADEON:KMS:R600G] etqw: broken ground textures</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=35441">Bug 35441</a> - [PATCH] Mesa does not find nouveau include files with --enable-shared-dricore</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=35614">Bug 35614</a> - [SNB] random hang on piglit case shaders/glsl-max-varyings</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=35820">Bug 35820</a> - [bisected SNB] System hangs when Gnome with compiz start up</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=35822">Bug 35822</a> - [bisected pineview] many cases related to depth and stencil failed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=35849">Bug 35849</a> - when sampling textures from both fragment and vertex shaders the vertex texture has the incorrect texture bound</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=35852">Bug 35852</a> - [bisected pineview] oglc case pxconv-read failed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=35961">Bug 35961</a> - src/gallium/auxiliary/util/u_draw.c:77:util_draw_max_index: Assertion `buffer_size - format_size &lt;= buffer_size' failed.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=36032">Bug 36032</a> - piglit fdo9833 regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=36033">Bug 36033</a> - main/shaderapi.c:1044: validate_samplers: Assertion `(sizeof(targetName)/sizeof(*(targetName))) == NUM_TEXTURE_TARGETS' failed.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=36086">Bug 36086</a> - [wine] Segfault r300_resource_copy_region with some wine apps and RADEON_HYPERZ</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=36182">Bug 36182</a> - Game Trine from http://www.humblebundle.com/ needs ATI_draw_buffers</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=36268">Bug 36268</a> - [r300g, bisected] minor flickering in Unigine Sanctuary</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=36282">Bug 36282</a> - 34a5d3b9f4740601708c82093e2114356d749e65: glxgears segfaults when compiled with shared glapi</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=36333">Bug 36333</a> - can't build demos if mesa build with --enable-selinux</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=36473">Bug 36473</a> - [bisected] piglit bugs/fdo23670-depth_test failed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=36572">Bug 36572</a> - [bisected]oglc case texenv segfaults</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=36609">Bug 36609</a> - 45920d2ecb38b14fdda5253fecce996570c22863 breaks sauerbraten on r300g</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=36648">Bug 36648</a> - [bisected SNB]piglit fbo/fbo-alphatest-nocolor failed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=36649">Bug 36649</a> - [bisected SNB]oglc draw-buffers2 failed with 16-wide</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=36753">Bug 36753</a> - Some textures now rendered as completely black after register allocator rewrite.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=36821">Bug 36821</a> - [bisected SNB]oglc api-texcoord causes GPU hang</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=36914">Bug 36914</a> - r600g: add rv670 flushing workaround. Causes games and some mesa demos to segfault.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=36917">Bug 36917</a> - Rendering glitches in ETQW</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=36939">Bug 36939</a> - multitexturing is messed up in quake wars (regression)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=36987">Bug 36987</a> - Intel GMA 4500 ARB_shader_texture_lod support</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=37028">Bug 37028</a> - Amnesia/HPL2 Demo: Strange graphical bugs on r600g</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=37150">Bug 37150</a> - sRGB textures are too bright in Starcraft 2</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=37154">Bug 37154</a> - main/texstore.c:4187: _mesa_texstore_rgb9_e5: Assertion `baseInternalFormat == 0x1907' failed.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=37157">Bug 37157</a> - [bisected] KDE KWin crashes on start with delayed BO mapping</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=37168">Bug 37168</a> - Regression: Severe memory leak when running Second Life</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=37366">Bug 37366</a> - [i965 bisected ILK] Fragment shader discard tests occasionally fail</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=37383">Bug 37383</a> - incorrect GLSL optimization</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=37476">Bug 37476</a> - [wine] Devil May Cry 4: TXD tgsi opcode unsupported / translation from TGSI failed / missing vertex shader</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=37743">Bug 37743</a> - [bisected i965]oglc GLSLlinker subcase negative.varying.beyondMaxVaryingFloats aborted</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=37766">Bug 37766</a> - Crash in dri2InvalidateBuffers when resizing Java window with OpenGL pipeline enabled</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=37839">Bug 37839</a> - main/teximage.c:2393: _mesa_choose_texture_format: Assertion `f != MESA_FORMAT_NONE' failed.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=37934">Bug 37934</a> - Corruption with topogun trace</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=38015">Bug 38015</a> - Some extensions enabled even when not supported by the underlying driver</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=38134">Bug 38134</a> - [bisected i965]piglit fbo/fbo-blit-d24s8 crashed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=38145">Bug 38145</a> - r600g/evergreen: Incorrect rendering of some effects in doom3</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=38440">Bug 38440</a> - ETQW: Model in team select rendering too bright</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=38566">Bug 38566</a> - [regression] ETQW crashes with 21972c85ea734dbfcf69629c6b0b940efb42d4ba</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=38584">Bug 38584</a> - MESA_GLSL=dump causes SEGV in ir_print_visitor::unique_name</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=38599">Bug 38599</a> - THe value of WGL_PBUFFER_HEIGHT_ARB is returned as width in wglQueryPbufferARB</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=38602">Bug 38602</a> - [bisected] Wrong display after "prefer native texture formats when possible" commit</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=38624">Bug 38624</a> - program/ir_to_mesa.cpp:1440: virtual void ir_to_mesa_visitor::visit(ir_dereference_variable*): Assertion `var-&gt;location != -1' failed.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=38626">Bug 38626</a> - vbo: Don't discount stride breaks piglit on softpipe/r600g</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=38649">Bug 38649</a> - piglit fbo-copyteximage regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=38762">Bug 38762</a> - [IVB bisected]3D demos like glxgears abort</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=38771">Bug 38771</a> - [[GM45] DRI] GPU hangs with current Mesa GIT when running certain OpenGL applications</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=38842">Bug 38842</a> - Various valid GLX attributes are rejected by MESA glxChooseFBConfig</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=38971">Bug 38971</a> - [bisected]oglc glsl-autointconv subcase negative.function.ambiguousMatch failed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=38987">Bug 38987</a> - sampler allowed as non-uniform / non-function parameters</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=39024">Bug 39024</a> - [Pineview webgl] many webgl conformance cases crash the browser</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=39083">Bug 39083</a> - [regression, bisected, r600g] Wrong rendering of Bubbles3D screensaver</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=39119">Bug 39119</a> - setting SQ_LDS_RESOURCE_MGMT register to zero in other applications muddles up font rendering permanently</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=39209">Bug 39209</a> - [bisected] Wrong display after "prefer native texture formats when possible" commit - part2</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=39219">Bug 39219</a> - libgl conflict with xbmc causes lock up on xbmc exit</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=39257">Bug 39257</a> - [bisected SNB]Mesa demos engine causes GPU hang</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=39487">Bug 39487</a> - [i965] brw_wm_surface_state.c:495: brw_update_renderbuffer_surface: Assertion `brw-&gt>has_surface_tile_offset || (tile_x == 0 &amp;&amp; tile_y == 0)' failed.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=39515">Bug 39515</a> - FTBFS: libEGL depends on libgbm, but libEGL builds first</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=39572">Bug 39572</a> - Cogs: GPU hang</li>
+
 </ul>


@@ -81,6 +338,11 @@ Building with SCons is an alternative.
 driver that implemented it.
 </ul>

+<p>The full set of changes can be viewed by using the following GIT command:</p>
+
+<pre>
+  git log mesa-7.10..mesa-7.11
+</pre>

 </body>
 </html>
--- a/docs/relnotes.html
+++ b/docs/relnotes.html
@@ -13,6 +13,7 @@ The release notes summarize what's new or changed in each Mesa release.
 </p>

 <UL>
+<LI><A HREF="relnotes-7.11.1.html">7.11.1 release notes</A>
 <LI><A HREF="relnotes-7.11.html">7.11 release notes</A>
 <LI><A HREF="relnotes-7.10.3.html">7.10.3 release notes</A>
 <LI><A HREF="relnotes-7.10.2.html">7.10.2 release notes</A>
--- a/include/GL/gl.h
+++ b/include/GL/gl.h
@@ -67,7 +67,7 @@
 #elif defined(__CYGWIN__) && defined(USE_OPENGL32) /* use native windows opengl32 */
 #  define GLAPI extern
 #  define GLAPIENTRY __stdcall
-#elif defined(__GNUC__)	|| (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))
+#elif (defined(__GNUC__) && __GNUC__ >= 4) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))
 #  define GLAPI __attribute__((visibility("default")))
 #  define GLAPIENTRY
 #endif /* WIN32 && !CYGWIN */
--- a/src/egl/wayland/wayland-drm/protocol/wayland-drm.xml
+++ b/src/egl/wayland/wayland-drm/protocol/wayland-drm.xml
@@ -1,5 +1,32 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <protocol name="drm">
+
+  <copyright>
+    Copyright © 2008-2011 Kristian Høgsberg
+    Copyright © 2010-2011 Intel Corporation
+
+    Permission to use, copy, modify, distribute, and sell this
+    software and its documentation for any purpose is hereby granted
+    without fee, provided that\n the above copyright notice appear in
+    all copies and that both that copyright notice and this permission
+    notice appear in supporting documentation, and that the name of
+    the copyright holders not be used in advertising or publicity
+    pertaining to distribution of the software without specific,
+    written prior permission.  The copyright holders make no
+    representations about the suitability of this software for any
+    purpose.  It is provided "as is" without express or implied
+    warranty.
+
+    THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+    SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+    FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+    SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+    WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+    AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+    ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
+    THIS SOFTWARE.
+  </copyright>
+
  <!-- drm support. This object is created by the server and published
       using the display's global event. -->
  <interface name="wl_drm" version="1">
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
@@ -324,6 +324,13 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr,
      return NULL;
   
   buf->buffer = mgr->provider->create_buffer(mgr->provider, size, desc);
+
+   /* Empty the cache and try again. */
+   if (!buf->buffer) {
+      mgr->base.flush(&mgr->base);
+      buf->buffer = mgr->provider->create_buffer(mgr->provider, size, desc);
+   }
+
   if(!buf->buffer) {
      FREE(buf);
      return NULL;
--- a/src/gallium/auxiliary/util/u_format_s3tc.c
+++ b/src/gallium/auxiliary/util/u_format_s3tc.c
@@ -119,8 +119,15 @@ util_format_s3tc_init(void)

   library = util_dl_open(DXTN_LIBNAME);
   if (!library) {
-      debug_printf("couldn't open " DXTN_LIBNAME ", software DXTn "
-         "compression/decompression unavailable\n");
+      if (getenv("force_s3tc_enable") &&
+          !strcmp(getenv("force_s3tc_enable"), "true")) {
+         debug_printf("couldn't open " DXTN_LIBNAME ", enabling DXTn due to "
+            "force_s3tc_enable=true environment variable\n");
+         util_format_s3tc_enabled = TRUE;
+      } else {
+         debug_printf("couldn't open " DXTN_LIBNAME ", software DXTn "
+            "compression/decompression unavailable\n");
+      }
      return;
   }

--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -547,6 +547,19 @@ util_bitcount(unsigned n)
 }


+/**
+ * Convert from little endian to CPU byte order.
+ */
+
+#ifdef PIPE_ARCH_BIG_ENDIAN
+#define util_le32_to_cpu(x) util_bswap32(x)
+#define util_le16_to_cpu(x) util_bswap16(x)
+#else
+#define util_le32_to_cpu(x) (x)
+#define util_le16_to_cpu(x) (x)
+#endif
+
+
 /**
 * Reverse byte order of a 32 bit word.
 */
--- a/src/gallium/auxiliary/util/u_vbuf_mgr.c
+++ b/src/gallium/auxiliary/util/u_vbuf_mgr.c
@@ -49,7 +49,7 @@ struct u_vbuf_caps {
   unsigned fetch_dword_unaligned:1;
 };

-struct u_vbuf_mgr_elements {
+struct u_vbuf_elements {
   unsigned count;
   struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS];

@@ -67,7 +67,7 @@ struct u_vbuf_mgr_elements {
   boolean incompatible_layout;
 };

-struct u_vbuf_mgr_priv {
+struct u_vbuf_priv {
   struct u_vbuf_mgr b;
   struct u_vbuf_caps caps;
   struct pipe_context *pipe;
@@ -75,17 +75,15 @@ struct u_vbuf_mgr_priv {
   struct translate_cache *translate_cache;
   unsigned translate_vb_slot;

-   struct u_vbuf_mgr_elements *ve;
+   struct u_vbuf_elements *ve;
   void *saved_ve, *fallback_ve;
   boolean ve_binding_lock;

-   unsigned saved_buffer_offset[PIPE_MAX_ATTRIBS];
-
   boolean any_user_vbs;
   boolean incompatible_vb_layout;
 };

-static void u_vbuf_mgr_init_format_caps(struct u_vbuf_mgr_priv *mgr)
+static void u_vbuf_init_format_caps(struct u_vbuf_priv *mgr)
 {
   struct pipe_screen *screen = mgr->pipe->screen;

@@ -115,13 +113,13 @@ static void u_vbuf_mgr_init_format_caps(struct u_vbuf_mgr_priv *mgr)
 }

 struct u_vbuf_mgr *
-u_vbuf_mgr_create(struct pipe_context *pipe,
-                  unsigned upload_buffer_size,
-                  unsigned upload_buffer_alignment,
-                  unsigned upload_buffer_bind,
-                  enum u_fetch_alignment fetch_alignment)
+u_vbuf_create(struct pipe_context *pipe,
+              unsigned upload_buffer_size,
+              unsigned upload_buffer_alignment,
+              unsigned upload_buffer_bind,
+              enum u_fetch_alignment fetch_alignment)
 {
-   struct u_vbuf_mgr_priv *mgr = CALLOC_STRUCT(u_vbuf_mgr_priv);
+   struct u_vbuf_priv *mgr = CALLOC_STRUCT(u_vbuf_priv);

   mgr->pipe = pipe;
   mgr->translate_cache = translate_cache_create();
@@ -133,19 +131,21 @@ u_vbuf_mgr_create(struct pipe_context *pipe,
   mgr->caps.fetch_dword_unaligned =
         fetch_alignment == U_VERTEX_FETCH_BYTE_ALIGNED;

-   u_vbuf_mgr_init_format_caps(mgr);
+   u_vbuf_init_format_caps(mgr);

   return &mgr->b;
 }

-void u_vbuf_mgr_destroy(struct u_vbuf_mgr *mgrb)
+void u_vbuf_destroy(struct u_vbuf_mgr *mgrb)
 {
-   struct u_vbuf_mgr_priv *mgr = (struct u_vbuf_mgr_priv*)mgrb;
+   struct u_vbuf_priv *mgr = (struct u_vbuf_priv*)mgrb;
   unsigned i;

-   for (i = 0; i < mgr->b.nr_real_vertex_buffers; i++) {
+   for (i = 0; i < mgr->b.nr_vertex_buffers; i++) {
      pipe_resource_reference(&mgr->b.vertex_buffer[i].buffer, NULL);
-      pipe_resource_reference(&mgr->b.real_vertex_buffer[i], NULL);
+   }
+   for (i = 0; i < mgr->b.nr_real_vertex_buffers; i++) {
+      pipe_resource_reference(&mgr->b.real_vertex_buffer[i].buffer, NULL);
   }

   translate_cache_destroy(mgr->translate_cache);
@@ -154,8 +154,8 @@ void u_vbuf_mgr_destroy(struct u_vbuf_mgr *mgrb)
 }


-static enum u_vbuf_return_flags
-u_vbuf_translate_begin(struct u_vbuf_mgr_priv *mgr,
+static void
+u_vbuf_translate_begin(struct u_vbuf_priv *mgr,
                       int min_index, int max_index)
 {
   struct translate_key key;
@@ -282,9 +282,9 @@ u_vbuf_translate_begin(struct u_vbuf_mgr_priv *mgr,
   if (mgr->translate_vb_slot != ~0) {
      /* Setup the new vertex buffer. */
      pipe_resource_reference(
-            &mgr->b.real_vertex_buffer[mgr->translate_vb_slot], out_buffer);
-      mgr->b.vertex_buffer[mgr->translate_vb_slot].buffer_offset = out_offset;
-      mgr->b.vertex_buffer[mgr->translate_vb_slot].stride = key.output_stride;
+            &mgr->b.real_vertex_buffer[mgr->translate_vb_slot].buffer, out_buffer);
+      mgr->b.real_vertex_buffer[mgr->translate_vb_slot].buffer_offset = out_offset;
+      mgr->b.real_vertex_buffer[mgr->translate_vb_slot].stride = key.output_stride;

      /* Setup new vertex elements. */
      for (i = 0; i < mgr->ve->count; i++) {
@@ -311,11 +311,9 @@ u_vbuf_translate_begin(struct u_vbuf_mgr_priv *mgr,
   }

   pipe_resource_reference(&out_buffer, NULL);
-
-   return upload_flushed ? U_VBUF_UPLOAD_FLUSHED : 0;
 }

-static void u_vbuf_translate_end(struct u_vbuf_mgr_priv *mgr)
+static void u_vbuf_translate_end(struct u_vbuf_priv *mgr)
 {
   if (mgr->fallback_ve == NULL) {
      return;
@@ -328,7 +326,7 @@ static void u_vbuf_translate_end(struct u_vbuf_mgr_priv *mgr)
   mgr->fallback_ve = NULL;

   /* Delete the now-unused VBO. */
-   pipe_resource_reference(&mgr->b.real_vertex_buffer[mgr->translate_vb_slot],
+   pipe_resource_reference(&mgr->b.real_vertex_buffer[mgr->translate_vb_slot].buffer,
                           NULL);
   mgr->b.nr_real_vertex_buffers = mgr->b.nr_vertex_buffers;
 }
@@ -336,15 +334,15 @@ static void u_vbuf_translate_end(struct u_vbuf_mgr_priv *mgr)
 #define FORMAT_REPLACE(what, withwhat) \
    case PIPE_FORMAT_##what: format = PIPE_FORMAT_##withwhat; break

-struct u_vbuf_mgr_elements *
-u_vbuf_mgr_create_vertex_elements(struct u_vbuf_mgr *mgrb,
-                                  unsigned count,
-                                  const struct pipe_vertex_element *attribs,
-                                  struct pipe_vertex_element *native_attribs)
+struct u_vbuf_elements *
+u_vbuf_create_vertex_elements(struct u_vbuf_mgr *mgrb,
+                              unsigned count,
+                              const struct pipe_vertex_element *attribs,
+                              struct pipe_vertex_element *native_attribs)
 {
-   struct u_vbuf_mgr_priv *mgr = (struct u_vbuf_mgr_priv*)mgrb;
+   struct u_vbuf_priv *mgr = (struct u_vbuf_priv*)mgrb;
   unsigned i;
-   struct u_vbuf_mgr_elements *ve = CALLOC_STRUCT(u_vbuf_mgr_elements);
+   struct u_vbuf_elements *ve = CALLOC_STRUCT(u_vbuf_elements);

   ve->count = count;

@@ -440,11 +438,11 @@ u_vbuf_mgr_create_vertex_elements(struct u_vbuf_mgr *mgrb,
   return ve;
 }

-void u_vbuf_mgr_bind_vertex_elements(struct u_vbuf_mgr *mgrb,
-                                     void *cso,
-                                     struct u_vbuf_mgr_elements *ve)
+void u_vbuf_bind_vertex_elements(struct u_vbuf_mgr *mgrb,
+                                 void *cso,
+                                 struct u_vbuf_elements *ve)
 {
-   struct u_vbuf_mgr_priv *mgr = (struct u_vbuf_mgr_priv*)mgrb;
+   struct u_vbuf_priv *mgr = (struct u_vbuf_priv*)mgrb;

   if (!cso) {
      return;
@@ -456,17 +454,17 @@ void u_vbuf_mgr_bind_vertex_elements(struct u_vbuf_mgr *mgrb,
   }
 }

-void u_vbuf_mgr_destroy_vertex_elements(struct u_vbuf_mgr *mgr,
-                                        struct u_vbuf_mgr_elements *ve)
+void u_vbuf_destroy_vertex_elements(struct u_vbuf_mgr *mgr,
+                                    struct u_vbuf_elements *ve)
 {
   FREE(ve);
 }

-void u_vbuf_mgr_set_vertex_buffers(struct u_vbuf_mgr *mgrb,
-                                   unsigned count,
-                                   const struct pipe_vertex_buffer *bufs)
+void u_vbuf_set_vertex_buffers(struct u_vbuf_mgr *mgrb,
+                               unsigned count,
+                               const struct pipe_vertex_buffer *bufs)
 {
-   struct u_vbuf_mgr_priv *mgr = (struct u_vbuf_mgr_priv*)mgrb;
+   struct u_vbuf_priv *mgr = (struct u_vbuf_priv*)mgrb;
   unsigned i;

   mgr->any_user_vbs = FALSE;
@@ -489,8 +487,13 @@ void u_vbuf_mgr_set_vertex_buffers(struct u_vbuf_mgr *mgrb,
      const struct pipe_vertex_buffer *vb = &bufs[i];

      pipe_resource_reference(&mgr->b.vertex_buffer[i].buffer, vb->buffer);
-      pipe_resource_reference(&mgr->b.real_vertex_buffer[i], NULL);
-      mgr->saved_buffer_offset[i] = vb->buffer_offset;
+      pipe_resource_reference(&mgr->b.real_vertex_buffer[i].buffer, NULL);
+
+      mgr->b.real_vertex_buffer[i].buffer_offset =
+      mgr->b.vertex_buffer[i].buffer_offset = vb->buffer_offset;
+
+      mgr->b.real_vertex_buffer[i].stride =
+      mgr->b.vertex_buffer[i].stride = vb->stride;

      if (!vb->buffer) {
         continue;
@@ -501,79 +504,95 @@ void u_vbuf_mgr_set_vertex_buffers(struct u_vbuf_mgr *mgrb,
         continue;
      }

-      pipe_resource_reference(&mgr->b.real_vertex_buffer[i], vb->buffer);
+      pipe_resource_reference(&mgr->b.real_vertex_buffer[i].buffer, vb->buffer);
   }

-   for (; i < mgr->b.nr_real_vertex_buffers; i++) {
+   for (i = count; i < mgr->b.nr_vertex_buffers; i++) {
      pipe_resource_reference(&mgr->b.vertex_buffer[i].buffer, NULL);
-      pipe_resource_reference(&mgr->b.real_vertex_buffer[i], NULL);
   }
-
-   memcpy(mgr->b.vertex_buffer, bufs,
-          sizeof(struct pipe_vertex_buffer) * count);
+   for (i = count; i < mgr->b.nr_real_vertex_buffers; i++) {
+      pipe_resource_reference(&mgr->b.real_vertex_buffer[i].buffer, NULL);
+   }

   mgr->b.nr_vertex_buffers = count;
   mgr->b.nr_real_vertex_buffers = count;
 }

-static enum u_vbuf_return_flags
-u_vbuf_upload_buffers(struct u_vbuf_mgr_priv *mgr,
+static void
+u_vbuf_upload_buffers(struct u_vbuf_priv *mgr,
                      int min_index, int max_index,
                      unsigned instance_count)
 {
-   unsigned i, nr = mgr->ve->count;
+   unsigned i;
   unsigned count = max_index + 1 - min_index;
-   boolean uploaded[PIPE_MAX_ATTRIBS] = {0};
-   enum u_vbuf_return_flags retval = 0;
+   unsigned nr_velems = mgr->ve->count;
+   unsigned nr_vbufs = mgr->b.nr_vertex_buffers;
+   unsigned start_offset[PIPE_MAX_ATTRIBS];
+   unsigned end_offset[PIPE_MAX_ATTRIBS] = {0};

-   for (i = 0; i < nr; i++) {
-      unsigned index = mgr->ve->ve[i].vertex_buffer_index;
+   /* Determine how much data needs to be uploaded. */
+   for (i = 0; i < nr_velems; i++) {
+      struct pipe_vertex_element *velem = &mgr->ve->ve[i];
+      unsigned index = velem->vertex_buffer_index;
+      unsigned instance_div = velem->instance_divisor;
      struct pipe_vertex_buffer *vb = &mgr->b.vertex_buffer[index];
+      unsigned first, size;

-      if (vb->buffer &&
-          u_vbuf_resource(vb->buffer)->user_ptr &&
-          !uploaded[index]) {
-         unsigned first, size;
-         boolean flushed;
-         unsigned instance_div = mgr->ve->ve[i].instance_divisor;
+      assert(vb->buffer);

-         if (instance_div) {
-            first = 0;
-            size = vb->stride *
-                   ((instance_count + instance_div - 1) / instance_div);
-         } else if (vb->stride) {
-            first = vb->stride * min_index;
-            size = vb->stride * count;
+      if (!u_vbuf_resource(vb->buffer)->user_ptr) {
+         continue;
+      }

-            /* Unusual case when stride is smaller than the format size.
-             * XXX This won't work with interleaved arrays. */
-            if (mgr->ve->native_format_size[i] > vb->stride)
-               size += mgr->ve->native_format_size[i] - vb->stride;
-         } else {
-            first = 0;
-            size = mgr->ve->native_format_size[i];
-         }
+      first = vb->buffer_offset + velem->src_offset;

-         u_upload_data(mgr->b.uploader, first, size,
-                       u_vbuf_resource(vb->buffer)->user_ptr + first,
-                       &vb->buffer_offset,
-                       &mgr->b.real_vertex_buffer[index],
-                       &flushed);
-
-         vb->buffer_offset -= first;
-
-         uploaded[index] = TRUE;
-         if (flushed)
-            retval |= U_VBUF_UPLOAD_FLUSHED;
+      if (!vb->stride) {
+         /* Constant attrib. */
+         size = mgr->ve->src_format_size[i];
+      } else if (instance_div) {
+         /* Per-instance attrib. */
+         unsigned count = (instance_count + instance_div - 1) / instance_div;
+         size = vb->stride * (count - 1) + mgr->ve->src_format_size[i];
      } else {
-         assert(mgr->b.real_vertex_buffer[index]);
+         /* Per-vertex attrib. */
+         first += vb->stride * min_index;
+         size = vb->stride * (count - 1) + mgr->ve->src_format_size[i];
+      }
+
+      /* Update offsets. */
+      if (!end_offset[index]) {
+         start_offset[index] = first;
+         end_offset[index] = first + size;
+      } else {
+         if (first < start_offset[index])
+            start_offset[index] = first;
+         if (first + size > end_offset[index])
+            end_offset[index] = first + size;
      }
   }

-   return retval;
+   /* Upload buffers. */
+   for (i = 0; i < nr_vbufs; i++) {
+      unsigned start = start_offset[i];
+      unsigned end = end_offset[i];
+      boolean flushed;
+
+      if (!end) {
+         continue;
+      }
+      assert(start < end);
+
+      u_upload_data(mgr->b.uploader, start, end - start,
+                    u_vbuf_resource(mgr->b.vertex_buffer[i].buffer)->user_ptr + start,
+                    &mgr->b.real_vertex_buffer[i].buffer_offset,
+                    &mgr->b.real_vertex_buffer[i].buffer,
+                    &flushed);
+
+      mgr->b.real_vertex_buffer[i].buffer_offset -= start;
+   }
 }

-static void u_vbuf_mgr_compute_max_index(struct u_vbuf_mgr_priv *mgr)
+static void u_vbuf_compute_max_index(struct u_vbuf_priv *mgr)
 {
   unsigned i, nr = mgr->ve->count;

@@ -582,25 +601,26 @@ static void u_vbuf_mgr_compute_max_index(struct u_vbuf_mgr_priv *mgr)
   for (i = 0; i < nr; i++) {
      struct pipe_vertex_buffer *vb =
            &mgr->b.vertex_buffer[mgr->ve->ve[i].vertex_buffer_index];
-      int unused;
-      unsigned max_index;
+      unsigned max_index, src_size, unused;

      if (!vb->buffer ||
          !vb->stride ||
-          u_vbuf_resource(vb->buffer)->user_ptr) {
+          u_vbuf_resource(vb->buffer)->user_ptr ||
+          mgr->ve->ve[i].instance_divisor) {
         continue;
      }

-      /* How many bytes is unused after the last vertex.
-       * width0 may be "count*stride - unused" and we have to compensate
-       * for that when dividing by stride. */
-      unused = vb->stride -
-               (mgr->ve->ve[i].src_offset + mgr->ve->src_format_size[i]);
+      src_size = mgr->ve->ve[i].src_offset + mgr->ve->src_format_size[i];

      /* If src_offset is greater than stride (which means it's a buffer
       * offset rather than a vertex offset)... */
-      if (unused < 0) {
+      if (src_size >= vb->stride) {
         unused = 0;
+      } else {
+         /* How many bytes is unused after the last vertex.
+          * width0 may be "count*stride - unused" and we have to compensate
+          * for that when dividing by stride. */
+         unused = vb->stride - src_size;
      }

      /* Compute the maximum index for this vertex element. */
@@ -613,50 +633,43 @@ static void u_vbuf_mgr_compute_max_index(struct u_vbuf_mgr_priv *mgr)
 }

 enum u_vbuf_return_flags
-u_vbuf_mgr_draw_begin(struct u_vbuf_mgr *mgrb,
-                      const struct pipe_draw_info *info)
+u_vbuf_draw_begin(struct u_vbuf_mgr *mgrb,
+                  const struct pipe_draw_info *info)
 {
-   struct u_vbuf_mgr_priv *mgr = (struct u_vbuf_mgr_priv*)mgrb;
+   struct u_vbuf_priv *mgr = (struct u_vbuf_priv*)mgrb;
   int min_index, max_index;
-   enum u_vbuf_return_flags retval = 0;

-   u_vbuf_mgr_compute_max_index(mgr);
+   u_vbuf_compute_max_index(mgr);

-   min_index = info->min_index - info->index_bias;
-   if (info->max_index == ~0) {
-      max_index = mgr->b.max_index;
+   if (info->indexed) {
+      min_index = info->min_index;
+      if (info->max_index == ~0) {
+         max_index = mgr->b.max_index;
+      } else {
+         max_index = MIN2(info->max_index, mgr->b.max_index);
+      }
+      min_index += info->index_bias;
+      max_index += info->index_bias;
   } else {
-      max_index = MIN2(info->max_index - info->index_bias, mgr->b.max_index);
+      min_index = info->start;
+      max_index = info->start + info->count - 1;
   }

   /* Translate vertices with non-native layouts or formats. */
   if (mgr->incompatible_vb_layout || mgr->ve->incompatible_layout) {
-      retval |= u_vbuf_translate_begin(mgr, min_index, max_index);
-
-      if (mgr->fallback_ve) {
-         retval |= U_VBUF_BUFFERS_UPDATED;
-      }
+      u_vbuf_translate_begin(mgr, min_index, max_index);
   }

   /* Upload user buffers. */
   if (mgr->any_user_vbs) {
-      retval |= u_vbuf_upload_buffers(mgr, min_index, max_index,
-                                      info->instance_count);
-      retval |= U_VBUF_BUFFERS_UPDATED;
+      u_vbuf_upload_buffers(mgr, min_index, max_index, info->instance_count);
   }
-   return retval;
+   return mgr->any_user_vbs || mgr->fallback_ve ? U_VBUF_BUFFERS_UPDATED : 0;
 }

-void u_vbuf_mgr_draw_end(struct u_vbuf_mgr *mgrb)
+void u_vbuf_draw_end(struct u_vbuf_mgr *mgrb)
 {
-   struct u_vbuf_mgr_priv *mgr = (struct u_vbuf_mgr_priv*)mgrb;
-   unsigned i;
-
-   /* buffer offsets were modified in u_vbuf_upload_buffers */
-   if (mgr->any_user_vbs) {
-      for (i = 0; i < mgr->b.nr_vertex_buffers; i++)
-         mgr->b.vertex_buffer[i].buffer_offset = mgr->saved_buffer_offset[i];
-   }
+   struct u_vbuf_priv *mgr = (struct u_vbuf_priv*)mgrb;

   if (mgr->fallback_ve) {
      u_vbuf_translate_end(mgr);
--- a/src/gallium/auxiliary/util/u_vbuf_mgr.h
+++ b/src/gallium/auxiliary/util/u_vbuf_mgr.h
@@ -49,7 +49,7 @@ struct u_vbuf_mgr {
   /* Contains only real vertex buffers.
    * Hardware drivers should use real_vertex_buffers[i]
    * instead of vertex_buffers[i].buffer. */
-   struct pipe_resource *real_vertex_buffer[PIPE_MAX_ATTRIBS];
+   struct pipe_vertex_buffer real_vertex_buffer[PIPE_MAX_ATTRIBS];
   int nr_real_vertex_buffers;

   /* Precomputed max_index for hardware vertex buffers. */
@@ -71,7 +71,7 @@ struct u_vbuf_resource {
 };

 /* Opaque type containing information about vertex elements for the manager. */
-struct u_vbuf_mgr_elements;
+struct u_vbuf_elements;

 enum u_fetch_alignment {
   U_VERTEX_FETCH_BYTE_ALIGNED,
@@ -79,42 +79,40 @@ enum u_fetch_alignment {
 };

 enum u_vbuf_return_flags {
-   U_VBUF_BUFFERS_UPDATED = 1,
-   U_VBUF_UPLOAD_FLUSHED = 2
+   U_VBUF_BUFFERS_UPDATED = 1
 };


 struct u_vbuf_mgr *
-u_vbuf_mgr_create(struct pipe_context *pipe,
-                  unsigned upload_buffer_size,
-                  unsigned upload_buffer_alignment,
-                  unsigned upload_buffer_bind,
-                  enum u_fetch_alignment fetch_alignment);
+u_vbuf_create(struct pipe_context *pipe,
+              unsigned upload_buffer_size,
+              unsigned upload_buffer_alignment,
+              unsigned upload_buffer_bind,
+              enum u_fetch_alignment fetch_alignment);

-void u_vbuf_mgr_destroy(struct u_vbuf_mgr *mgr);
+void u_vbuf_destroy(struct u_vbuf_mgr *mgr);

-struct u_vbuf_mgr_elements *
-u_vbuf_mgr_create_vertex_elements(struct u_vbuf_mgr *mgr,
-                                  unsigned count,
-                                  const struct pipe_vertex_element *attrs,
-                                  struct pipe_vertex_element *native_attrs);
+struct u_vbuf_elements *
+u_vbuf_create_vertex_elements(struct u_vbuf_mgr *mgr,
+                              unsigned count,
+                              const struct pipe_vertex_element *attrs,
+                              struct pipe_vertex_element *native_attrs);

-void u_vbuf_mgr_bind_vertex_elements(struct u_vbuf_mgr *mgr,
-                                     void *cso,
-                                     struct u_vbuf_mgr_elements *ve);
+void u_vbuf_bind_vertex_elements(struct u_vbuf_mgr *mgr,
+                                 void *cso,
+                                 struct u_vbuf_elements *ve);

-void u_vbuf_mgr_destroy_vertex_elements(struct u_vbuf_mgr *mgr,
-                                        struct u_vbuf_mgr_elements *ve);
+void u_vbuf_destroy_vertex_elements(struct u_vbuf_mgr *mgr,
+                                    struct u_vbuf_elements *ve);

-void u_vbuf_mgr_set_vertex_buffers(struct u_vbuf_mgr *mgr,
-                                   unsigned count,
-                                   const struct pipe_vertex_buffer *bufs);
+void u_vbuf_set_vertex_buffers(struct u_vbuf_mgr *mgr,
+                               unsigned count,
+                               const struct pipe_vertex_buffer *bufs);

-enum u_vbuf_return_flags
-u_vbuf_mgr_draw_begin(struct u_vbuf_mgr *mgr,
-                      const struct pipe_draw_info *info);
+enum u_vbuf_return_flags u_vbuf_draw_begin(struct u_vbuf_mgr *mgr,
+                                           const struct pipe_draw_info *info);

-void u_vbuf_mgr_draw_end(struct u_vbuf_mgr *mgr);
+void u_vbuf_draw_end(struct u_vbuf_mgr *mgr);


 static INLINE struct u_vbuf_resource *u_vbuf_resource(struct pipe_resource *r)
--- a/src/gallium/drivers/i915/i915_state_dynamic.c
+++ b/src/gallium/drivers/i915/i915_state_dynamic.c
@@ -268,8 +268,8 @@ static void upload_SCISSOR_RECT(struct i915_context *i915)
 {
   unsigned x1 = i915->scissor.minx;
   unsigned y1 = i915->scissor.miny;
-   unsigned x2 = i915->scissor.maxx;
-   unsigned y2 = i915->scissor.maxy;
+   unsigned x2 = i915->scissor.maxx - 1;
+   unsigned y2 = i915->scissor.maxy - 1;
   unsigned sc[3];

   sc[0] = _3DSTATE_SCISSOR_RECT_0_CMD;
--- a/src/gallium/drivers/nouveau/nouveau_fence.c
+++ b/src/gallium/drivers/nouveau/nouveau_fence.c
@@ -86,14 +86,10 @@ nouveau_fence_emit(struct nouveau_fence *fence)
 {
   struct nouveau_screen *screen = fence->screen;

-   fence->sequence = ++screen->fence.sequence;
-
   assert(fence->state == NOUVEAU_FENCE_STATE_AVAILABLE);

   /* set this now, so that if fence.emit triggers a flush we don't recurse */
-   fence->state = NOUVEAU_FENCE_STATE_EMITTED;
-
-   screen->fence.emit(&screen->base, fence->sequence);
+   fence->state = NOUVEAU_FENCE_STATE_EMITTING;

   ++fence->ref;

@@ -103,6 +99,11 @@ nouveau_fence_emit(struct nouveau_fence *fence)
      screen->fence.head = fence;

   screen->fence.tail = fence;
+
+   screen->fence.emit(&screen->base, &fence->sequence);
+
+   assert(fence->state == NOUVEAU_FENCE_STATE_EMITTING);
+   fence->state = NOUVEAU_FENCE_STATE_EMITTED;
 }

 void
@@ -162,7 +163,8 @@ nouveau_fence_update(struct nouveau_screen *screen, boolean flushed)

   if (flushed) {
      for (fence = next; fence; fence = fence->next)
-         fence->state = NOUVEAU_FENCE_STATE_FLUSHED;
+         if (fence->state == NOUVEAU_FENCE_STATE_EMITTED)
+            fence->state = NOUVEAU_FENCE_STATE_FLUSHED;
   }
 }

@@ -185,6 +187,9 @@ nouveau_fence_wait(struct nouveau_fence *fence)
   struct nouveau_screen *screen = fence->screen;
   uint32_t spins = 0;

+   /* wtf, someone is waiting on a fence in flush_notify handler? */
+   assert(fence->state != NOUVEAU_FENCE_STATE_EMITTING);
+
   if (fence->state < NOUVEAU_FENCE_STATE_EMITTED) {
      nouveau_fence_emit(fence);

@@ -216,8 +221,10 @@ nouveau_fence_wait(struct nouveau_fence *fence)
 void
 nouveau_fence_next(struct nouveau_screen *screen)
 {
-   if (screen->fence.current->state < NOUVEAU_FENCE_STATE_EMITTED)
+   if (screen->fence.current->state < NOUVEAU_FENCE_STATE_EMITTING)
      nouveau_fence_emit(screen->fence.current);

+   nouveau_fence_ref(NULL, &screen->fence.current);
+
   nouveau_fence_new(screen, &screen->fence.current, FALSE);
 }
--- a/src/gallium/drivers/nouveau/nouveau_fence.h
+++ b/src/gallium/drivers/nouveau/nouveau_fence.h
@@ -6,9 +6,10 @@
 #include "util/u_double_list.h"

 #define NOUVEAU_FENCE_STATE_AVAILABLE 0
-#define NOUVEAU_FENCE_STATE_EMITTED   1
-#define NOUVEAU_FENCE_STATE_FLUSHED   2
-#define NOUVEAU_FENCE_STATE_SIGNALLED 3
+#define NOUVEAU_FENCE_STATE_EMITTING  1
+#define NOUVEAU_FENCE_STATE_EMITTED   2
+#define NOUVEAU_FENCE_STATE_FLUSHED   3
+#define NOUVEAU_FENCE_STATE_SIGNALLED 4

 struct nouveau_fence_work {
   struct list_head list;
--- a/src/gallium/drivers/nouveau/nouveau_screen.h
+++ b/src/gallium/drivers/nouveau/nouveau_screen.h
@@ -24,7 +24,7 @@ struct nouveau_screen {
 		struct nouveau_fence *current;
 		u32 sequence;
 		u32 sequence_ack;
-		void (*emit)(struct pipe_screen *, u32 sequence);
+		void (*emit)(struct pipe_screen *, u32 *sequence);
 		u32  (*update)(struct pipe_screen *);
 	} fence;

--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -244,16 +244,20 @@ nv50_screen_destroy(struct pipe_screen *pscreen)
 }

 static void
-nv50_screen_fence_emit(struct pipe_screen *pscreen, u32 sequence)
+nv50_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence)
 {
   struct nv50_screen *screen = nv50_screen(pscreen);
   struct nouveau_channel *chan = screen->base.channel;

   MARK_RING (chan, 5, 2);
+
+   /* we need to do it after possible flush in MARK_RING */
+   *sequence = ++screen->base.fence.sequence;
+
   BEGIN_RING(chan, RING_3D(QUERY_ADDRESS_HIGH), 4);
   OUT_RELOCh(chan, screen->fence.bo, 0, NOUVEAU_BO_WR);
   OUT_RELOCl(chan, screen->fence.bo, 0, NOUVEAU_BO_WR);
-   OUT_RING  (chan, sequence);
+   OUT_RING  (chan, *sequence);
   OUT_RING  (chan, NV50_3D_QUERY_GET_MODE_WRITE_UNK0 |
                    NV50_3D_QUERY_GET_UNK4 |
                    NV50_3D_QUERY_GET_UNIT_CROP |
--- a/src/gallium/drivers/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nvc0/nvc0_screen.c
@@ -308,16 +308,20 @@ nvc0_magic_3d_init(struct nouveau_channel *chan)
 }

 static void
-nvc0_screen_fence_emit(struct pipe_screen *pscreen, u32 sequence)
+nvc0_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence)
 {
   struct nvc0_screen *screen = nvc0_screen(pscreen);
   struct nouveau_channel *chan = screen->base.channel;

   MARK_RING (chan, 5, 2);
+
+   /* we need to do it after possible flush in MARK_RING */
+   *sequence = ++screen->base.fence.sequence;
+
   BEGIN_RING(chan, RING_3D(QUERY_ADDRESS_HIGH), 4);
   OUT_RELOCh(chan, screen->fence.bo, 0, NOUVEAU_BO_WR);
   OUT_RELOCl(chan, screen->fence.bo, 0, NOUVEAU_BO_WR);
-   OUT_RING  (chan, sequence);
+   OUT_RING  (chan, *sequence);
   OUT_RING  (chan, NVC0_3D_QUERY_GET_FENCE | NVC0_3D_QUERY_GET_SHORT |
              (0xf << NVC0_3D_QUERY_GET_UNIT__SHIFT));
 }
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -106,7 +106,7 @@ static void r300_destroy_context(struct pipe_context* context)
        draw_destroy(r300->draw);

    if (r300->vbuf_mgr)
-        u_vbuf_mgr_destroy(r300->vbuf_mgr);
+        u_vbuf_destroy(r300->vbuf_mgr);

    /* XXX: This function assumes r300->query_list was initialized */
    r300_release_referenced_objects(r300);
@@ -437,7 +437,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
    r300_init_state_functions(r300);
    r300_init_resource_functions(r300);

-    r300->vbuf_mgr = u_vbuf_mgr_create(&r300->context, 1024 * 1024, 16,
+    r300->vbuf_mgr = u_vbuf_create(&r300->context, 1024 * 1024, 16,
                                       PIPE_BIND_VERTEX_BUFFER |
                                       PIPE_BIND_INDEX_BUFFER,
                                       U_VERTEX_FETCH_DWORD_ALIGNED);
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -425,7 +425,7 @@ struct r300_vertex_element_state {
    struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
    unsigned format_size[PIPE_MAX_ATTRIBS];

-    struct u_vbuf_mgr_elements *vmgr_elements;
+    struct u_vbuf_elements *vmgr_elements;

    /* The size of the vertex, in dwords. */
    unsigned vertex_size_dwords;
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -822,8 +822,7 @@ void r300_emit_textures_state(struct r300_context *r300,
 void r300_emit_vertex_arrays(struct r300_context* r300, int offset,
                             boolean indexed, int instance_id)
 {
-    struct pipe_vertex_buffer *vbuf = r300->vbuf_mgr->vertex_buffer;
-    struct pipe_resource **valid_vbuf = r300->vbuf_mgr->real_vertex_buffer;
+    struct pipe_vertex_buffer *vbuf = r300->vbuf_mgr->real_vertex_buffer;
    struct pipe_vertex_element *velem = r300->velems->velem;
    struct r300_resource *buf;
    int i;
@@ -861,7 +860,7 @@ void r300_emit_vertex_arrays(struct r300_context* r300, int offset,
        }

        for (i = 0; i < vertex_array_count; i++) {
-            buf = r300_resource(valid_vbuf[velem[i].vertex_buffer_index]);
+            buf = r300_resource(vbuf[velem[i].vertex_buffer_index].buffer);
            OUT_CS_RELOC(buf);
        }
    } else {
@@ -913,7 +912,7 @@ void r300_emit_vertex_arrays(struct r300_context* r300, int offset,
        }

        for (i = 0; i < vertex_array_count; i++) {
-            buf = r300_resource(valid_vbuf[velem[i].vertex_buffer_index]);
+            buf = r300_resource(vbuf[velem[i].vertex_buffer_index].buffer);
            OUT_CS_RELOC(buf);
        }
    }
@@ -1221,15 +1220,17 @@ validate:
                                r300_resource(r300->vbo)->domain, 0);
    /* ...vertex buffers for HWTCL path... */
    if (do_validate_vertex_buffers && r300->vertex_arrays_dirty) {
-        struct pipe_resource **buf = r300->vbuf_mgr->real_vertex_buffer;
-        struct pipe_resource **last = r300->vbuf_mgr->real_vertex_buffer +
+        struct pipe_vertex_buffer *vbuf = r300->vbuf_mgr->real_vertex_buffer;
+        struct pipe_vertex_buffer *last = r300->vbuf_mgr->real_vertex_buffer +
                                      r300->vbuf_mgr->nr_real_vertex_buffers;
-        for (; buf != last; buf++) {
-            if (!*buf)
+        struct pipe_resource *buf;
+        for (; vbuf != last; vbuf++) {
+            buf = vbuf->buffer;
+            if (!buf)
                continue;

-            r300->rws->cs_add_reloc(r300->cs, r300_resource(*buf)->cs_buf,
-                                    r300_resource(*buf)->domain, 0);
+            r300->rws->cs_add_reloc(r300->cs, r300_resource(buf)->cs_buf,
+                                    r300_resource(buf)->domain, 0);
        }
    }
    /* ...and index buffer for HWTCL path. */
@@ -1237,13 +1238,12 @@ validate:
        r300->rws->cs_add_reloc(r300->cs, r300_resource(index_buffer)->cs_buf,
                                r300_resource(index_buffer)->domain, 0);

-    /* Now do the validation. */
+    /* Now do the validation (flush is called inside cs_validate on failure). */
    if (!r300->rws->cs_validate(r300->cs)) {
        /* Ooops, an infinite loop, give up. */
        if (flushed)
            return FALSE;

-        r300_flush(&r300->context, RADEON_FLUSH_ASYNC, NULL);
        flushed = TRUE;
        goto validate;
    }
--- a/src/gallium/drivers/r300/r300_query.c
+++ b/src/gallium/drivers/r300/r300_query.c
@@ -138,7 +138,8 @@ static boolean r300_get_query_result(struct pipe_context* pipe,
    /* Sum up the results. */
    temp = 0;
    for (i = 0; i < q->num_results; i++) {
-        temp += *map;
+        /* Convert little endian values written by GPU to CPU byte order */
+        temp += util_le32_to_cpu(*map);
        map++;
    }

--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -146,7 +146,7 @@ static void r300_emit_draw_init(struct r300_context *r300, unsigned mode,
 static void r300_split_index_bias(struct r300_context *r300, int index_bias,
                                  int *buffer_offset, int *index_offset)
 {
-    struct pipe_vertex_buffer *vb, *vbufs = r300->vbuf_mgr->vertex_buffer;
+    struct pipe_vertex_buffer *vb, *vbufs = r300->vbuf_mgr->real_vertex_buffer;
    struct pipe_vertex_element *velem = r300->velems->velem;
    unsigned i, size;
    int max_neg_bias;
@@ -338,7 +338,7 @@ static boolean immd_is_good_idea(struct r300_context *r300,
        vbi = velem->vertex_buffer_index;

        if (!checked[vbi]) {
-            buf = r300->vbuf_mgr->real_vertex_buffer[vbi];
+            buf = r300->vbuf_mgr->real_vertex_buffer[vbi].buffer;

            if ((r300_resource(buf)->domain != RADEON_DOMAIN_GTT)) {
                return FALSE;
@@ -389,13 +389,13 @@ static void r300_draw_arrays_immediate(struct r300_context *r300,
        velem = &r300->velems->velem[i];
        size[i] = r300->velems->format_size[i] / 4;
        vbi = velem->vertex_buffer_index;
-        vbuf = &r300->vbuf_mgr->vertex_buffer[vbi];
+        vbuf = &r300->vbuf_mgr->real_vertex_buffer[vbi];
        stride[i] = vbuf->stride / 4;

        /* Map the buffer. */
        if (!map[vbi]) {
            map[vbi] = (uint32_t*)r300->rws->buffer_map(
-                r300_resource(r300->vbuf_mgr->real_vertex_buffer[vbi])->buf,
+                r300_resource(vbuf->buffer)->buf,
                r300->cs, PIPE_TRANSFER_READ | PIPE_TRANSFER_UNSYNCHRONIZED);
            map[vbi] += (vbuf->buffer_offset / 4) + stride[i] * info->start;
        }
@@ -423,7 +423,7 @@ static void r300_draw_arrays_immediate(struct r300_context *r300,
        vbi = r300->velems->velem[i].vertex_buffer_index;

        if (map[vbi]) {
-            r300->rws->buffer_unmap(r300_resource(r300->vbuf_mgr->real_vertex_buffer[vbi])->buf);
+            r300->rws->buffer_unmap(r300_resource(r300->vbuf_mgr->real_vertex_buffer[vbi].buffer)->buf);
            map[vbi] = NULL;
        }
    }
@@ -779,7 +779,7 @@ static void r300_draw_vbo(struct pipe_context* pipe,
    r300_update_derived_state(r300);

    /* Start the vbuf manager and update buffers if needed. */
-    if (u_vbuf_mgr_draw_begin(r300->vbuf_mgr, &info) & U_VBUF_BUFFERS_UPDATED) {
+    if (u_vbuf_draw_begin(r300->vbuf_mgr, &info) & U_VBUF_BUFFERS_UPDATED) {
        r300->vertex_arrays_dirty = TRUE;
    }

@@ -810,7 +810,7 @@ static void r300_draw_vbo(struct pipe_context* pipe,
        }
    }

-    u_vbuf_mgr_draw_end(r300->vbuf_mgr);
+    u_vbuf_draw_end(r300->vbuf_mgr);
 }

 /****************************************************************************
@@ -825,7 +825,6 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe,
    struct r300_context* r300 = r300_context(pipe);
    struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS];
    struct pipe_transfer *ib_transfer = NULL;
-    unsigned count = info->count;
    int i;
    void *indices = NULL;
    boolean indexed = info->indexed && r300->index_buffer.buffer;
@@ -834,10 +833,6 @@ static void r300_swtcl_draw_vbo(struct pipe_context* pipe,
        return;
    }

-    if (!u_trim_pipe_prim(info->mode, &count)) {
-        return;
-    }
-
    r300_update_derived_state(r300);

    r300_reserve_cs_dwords(r300,
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -1583,7 +1583,7 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe,
        count = 1;
    }

-    u_vbuf_mgr_set_vertex_buffers(r300->vbuf_mgr, count, buffers);
+    u_vbuf_set_vertex_buffers(r300->vbuf_mgr, count, buffers);

    if (r300->screen->caps.has_tcl) {
        /* HW TCL. */
@@ -1691,7 +1691,7 @@ static void* r300_create_vertex_elements_state(struct pipe_context* pipe,

    velems->count = count;
    velems->vmgr_elements =
-        u_vbuf_mgr_create_vertex_elements(r300->vbuf_mgr, count, attribs,
+        u_vbuf_create_vertex_elements(r300->vbuf_mgr, count, attribs,
                                          velems->velem);

    if (r300_screen(pipe->screen)->caps.has_tcl) {
@@ -1721,7 +1721,7 @@ static void r300_bind_vertex_elements_state(struct pipe_context *pipe,

    r300->velems = velems;

-    u_vbuf_mgr_bind_vertex_elements(r300->vbuf_mgr, state, velems->vmgr_elements);
+    u_vbuf_bind_vertex_elements(r300->vbuf_mgr, state, velems->vmgr_elements);

    if (r300->draw) {
        draw_set_vertex_elements(r300->draw, velems->count, velems->velem);
@@ -1738,7 +1738,7 @@ static void r300_delete_vertex_elements_state(struct pipe_context *pipe, void *s
    struct r300_context *r300 = r300_context(pipe);
    struct r300_vertex_element_state *velems = state;

-    u_vbuf_mgr_destroy_vertex_elements(r300->vbuf_mgr, velems->vmgr_elements);
+    u_vbuf_destroy_vertex_elements(r300->vbuf_mgr, velems->vmgr_elements);
    FREE(state);
 }

--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -824,10 +824,10 @@ static void r300_texture_setup_fb_state(struct r300_surface *surf)
    }
 }

-boolean r300_resource_set_properties(struct pipe_screen *screen,
-                                     struct pipe_resource *tex,
-                                     unsigned offset,
-                                     const struct pipe_resource *new_properties)
+void r300_resource_set_properties(struct pipe_screen *screen,
+                                  struct pipe_resource *tex,
+                                  unsigned offset,
+                                  const struct pipe_resource *new_properties)
 {
    struct r300_screen *rscreen = r300_screen(screen);
    struct r300_resource *res = r300_resource(tex);
@@ -837,14 +837,9 @@ boolean r300_resource_set_properties(struct pipe_screen *screen,
        util_format_short_name(tex->format),
        util_format_short_name(new_properties->format));

-    if (!r300_texture_desc_init(rscreen, res, new_properties)) {
-        fprintf(stderr, "r300: ERROR: Cannot set texture properties.\n");
-        return FALSE;
-    }
+    r300_texture_desc_init(rscreen, res, new_properties);
    res->tex_offset = offset;
    r300_texture_setup_format_state(rscreen, res, 0, &res->tx_format);
-
-    return TRUE;
 }

 static void r300_texture_destroy(struct pipe_screen *screen,
@@ -915,12 +910,7 @@ r300_texture_create_object(struct r300_screen *rscreen,
                  RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT;
    tex->buf_size = max_buffer_size;

-    if (!r300_resource_set_properties(&rscreen->screen, &tex->b.b.b, 0, base)) {
-        if (buffer)
-            pb_reference(&buffer, NULL);
-        FREE(tex);
-        return NULL;
-    }
+    r300_resource_set_properties(&rscreen->screen, &tex->b.b.b, 0, base);

    /* Create the backing buffer if needed. */
    if (!buffer) {
--- a/src/gallium/drivers/r300/r300_texture.h
+++ b/src/gallium/drivers/r300/r300_texture.h
@@ -50,10 +50,10 @@ uint32_t r300_translate_texformat(enum pipe_format format,

 uint32_t r500_tx_format_msb_bit(enum pipe_format format);

-boolean r300_resource_set_properties(struct pipe_screen *screen,
-                                     struct pipe_resource *tex,
-                                     unsigned offset,
-                                     const struct pipe_resource *new_properties);
+void r300_resource_set_properties(struct pipe_screen *screen,
+                                  struct pipe_resource *tex,
+                                  unsigned offset,
+                                  const struct pipe_resource *new_properties);

 boolean r300_is_colorbuffer_format_supported(enum pipe_format format);

--- a/src/gallium/drivers/r300/r300_texture_desc.c
+++ b/src/gallium/drivers/r300/r300_texture_desc.c
@@ -473,9 +473,9 @@ static void r300_tex_print_info(struct r300_resource *tex,
            util_format_short_name(tex->b.b.b.format));
 }

-boolean r300_texture_desc_init(struct r300_screen *rscreen,
-                               struct r300_resource *tex,
-                               const struct pipe_resource *base)
+void r300_texture_desc_init(struct r300_screen *rscreen,
+                            struct r300_resource *tex,
+                            const struct pipe_resource *base)
 {
    tex->b.b.b.target = base->target;
    tex->b.b.b.format = base->format;
@@ -518,11 +518,15 @@ boolean r300_texture_desc_init(struct r300_screen *rscreen,
    if (tex->buf_size) {
        /* Make sure the buffer we got is large enough. */
        if (tex->tex.size_in_bytes > tex->buf_size) {
-            fprintf(stderr, "r300: texture_desc_init: The buffer is not "
-                            "large enough. Got: %i, Need: %i, Info:\n",
-                            tex->buf_size, tex->tex.size_in_bytes);
+            fprintf(stderr,
+                "r300: I got a pre-allocated buffer to use it as a texture "
+                "storage, but the buffer is too small. I'll use the buffer "
+                "anyway, because I can't crash here, but it's dangerous. "
+                "This can be a DDX bug. Got: %iB, Need: %iB, Info:\n",
+                tex->buf_size, tex->tex.size_in_bytes);
            r300_tex_print_info(tex, "texture_desc_init");
-            return FALSE;
+            /* Ooops, what now. Apps will break if we fail this,
+             * so just pretend everything's okay. */
        }

        tex->tex.buffer_size_in_bytes = tex->buf_size;
@@ -532,8 +536,6 @@ boolean r300_texture_desc_init(struct r300_screen *rscreen,

    if (SCREEN_DBG_ON(rscreen, DBG_TEX))
        r300_tex_print_info(tex, "texture_desc_init");
-
-    return TRUE;
 }

 unsigned r300_texture_get_offset(struct r300_resource *tex,
--- a/src/gallium/drivers/r300/r300_texture_desc.h
+++ b/src/gallium/drivers/r300/r300_texture_desc.h
@@ -43,9 +43,9 @@ unsigned r300_get_pixel_alignment(enum pipe_format format,
                                  enum radeon_bo_layout macrotile,
                                  enum r300_dim dim, boolean is_rs690);

-boolean r300_texture_desc_init(struct r300_screen *rscreen,
-                               struct r300_resource *tex,
-                               const struct pipe_resource *base);
+void r300_texture_desc_init(struct r300_screen *rscreen,
+                            struct r300_resource *tex,
+                            const struct pipe_resource *base);

 unsigned r300_texture_get_offset(struct r300_resource *tex,
                                 unsigned level, unsigned layer);
--- a/src/gallium/drivers/r600/eg_state_inlines.h
+++ b/src/gallium/drivers/r600/eg_state_inlines.h
@@ -327,6 +327,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
 		return V_028C70_SWAP_STD;

 	case PIPE_FORMAT_R16_UNORM:
+	case PIPE_FORMAT_R16_FLOAT:
 		return V_028C70_SWAP_STD;

 	/* 32-bit buffers. */
@@ -430,6 +431,9 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
 	case PIPE_FORMAT_R16_UNORM:
 		return V_028C70_COLOR_16;

+	case PIPE_FORMAT_R16_FLOAT:
+		return V_028C70_COLOR_16_FLOAT;
+
 	/* 32-bit buffers. */
 	case PIPE_FORMAT_A8B8G8R8_SRGB:
 	case PIPE_FORMAT_A8B8G8R8_UNORM:
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -809,9 +809,7 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state
 	struct r600_resource_texture *rtex;
 	struct r600_resource *rbuffer;
 	struct r600_surface *surf;
-	unsigned level;
-	unsigned pitch, slice, format, stencil_format;
-	unsigned offset;
+	unsigned level, pitch, slice, format, stencil_format, offset, array_mode;

 	if (state->zsbuf == NULL)
 		return;
@@ -823,9 +821,13 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state

 	rbuffer = &rtex->resource;

-	/* XXX quite sure for dx10+ hw don't need any offset hacks */
 	offset = r600_texture_get_offset((struct r600_resource_texture *)state->zsbuf->texture,
 					 level, state->zsbuf->u.tex.first_layer);
+
+	/* XXX remove this once tiling is properly supported */
+	array_mode = rtex->array_mode[level] ? rtex->array_mode[level] :
+					       V_028C70_ARRAY_1D_TILED_THIN1;
+
 	pitch = rtex->pitch_in_blocks[level] / 8 - 1;
 	slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1;
 	format = r600_translate_dbformat(state->zsbuf->texture->format);
@@ -851,7 +853,7 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state
 				S_028044_FORMAT(stencil_format), 0xFFFFFFFF, rbuffer->bo);

 	r600_pipe_state_add_reg(rstate, R_028040_DB_Z_INFO,
-				S_028040_ARRAY_MODE(rtex->array_mode[level]) | S_028040_FORMAT(format),
+				S_028040_ARRAY_MODE(array_mode) | S_028040_FORMAT(format),
 				0xFFFFFFFF, rbuffer->bo);
 	r600_pipe_state_add_reg(rstate, R_028058_DB_DEPTH_SIZE,
 				S_028058_PITCH_TILE_MAX(pitch),
@@ -1726,7 +1728,7 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader
 	struct r600_pipe_state *rstate = &shader->rstate;
 	struct r600_shader *rshader = &shader->shader;
 	unsigned spi_vs_out_id[10];
-	unsigned i, tmp;
+	unsigned i, tmp, nparams;

 	/* clear previous register */
 	rstate->nregs = 0;
@@ -1745,9 +1747,17 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader
 					spi_vs_out_id[i], 0xFFFFFFFF, NULL);
 	}

+	/* Certain attributes (position, psize, etc.) don't count as params.
+	 * VS is required to export at least one param and r600_shader_from_tgsi()
+	 * takes care of adding a dummy export.
+	 */
+	nparams = rshader->noutput - rshader->npos;
+	if (nparams < 1)
+		nparams = 1;
+
 	r600_pipe_state_add_reg(rstate,
 			R_0286C4_SPI_VS_OUT_CONFIG,
-			S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2),
+			S_0286C4_VS_EXPORT_COUNT(nparams - 1),
 			0xFFFFFFFF, NULL);
 	r600_pipe_state_add_reg(rstate,
 			R_028860_SQ_PGM_RESOURCES_VS,
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -94,6 +94,8 @@ struct r600_tiling_info *r600_get_tiling_info(struct radeon *radeon);
 unsigned r600_get_clock_crystal_freq(struct radeon *radeon);
 unsigned r600_get_minor_version(struct radeon *radeon);
 unsigned r600_get_num_backends(struct radeon *radeon);
+unsigned r600_get_num_tile_pipes(struct radeon *radeon);
+unsigned r600_get_backend_map(struct radeon *radeon);

 /* r600_bo.c */
 struct r600_bo;
@@ -261,6 +263,7 @@ struct r600_context {
 	u32			*pm4;
 	struct list_head	query_list;
 	unsigned		num_query_running;
+	unsigned		backend_mask;
 	struct list_head	fenced_bo;
 	unsigned                max_db; /* for OQ */
 	unsigned                num_dest_buffers;
@@ -282,6 +285,7 @@ struct r600_draw {
 	struct r600_bo		*indices;
 };

+void r600_get_backend_mask(struct r600_context *ctx);
 int r600_context_init(struct r600_context *ctx, struct radeon *radeon);
 void r600_context_fini(struct r600_context *ctx);
 void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_state *state);
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -732,15 +732,19 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc,
 {
 	struct alu_bank_swizzle bs;
 	int bank_swizzle[5];
-	int i, r = 0, forced = 0;
+	int i, r = 0, forced = 1;
 	boolean scalar_only = bc->chiprev == CHIPREV_CAYMAN ? false : true;
 	int max_slots = bc->chiprev == CHIPREV_CAYMAN ? 4 : 5;

 	for (i = 0; i < max_slots; i++) {
-		if (slots[i] && slots[i]->bank_swizzle_force) {
-			slots[i]->bank_swizzle = slots[i]->bank_swizzle_force;
-			forced = 1;
+		if (slots[i]) {
+			if (slots[i]->bank_swizzle_force) {
+				slots[i]->bank_swizzle = slots[i]->bank_swizzle_force;
+			} else {
+				forced = 0;
+			}
 		}
+
 		if (i < 4 && slots[i])
 			scalar_only = false;
 	}
@@ -750,7 +754,11 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc,
 	/* Just check every possible combination of bank swizzle.
 	 * Not very efficent, but works on the first try in most of the cases. */
 	for (i = 0; i < 4; i++)
-		bank_swizzle[i] = SQ_ALU_VEC_012;
+		if (!slots[i] || !slots[i]->bank_swizzle_force)
+			bank_swizzle[i] = SQ_ALU_VEC_012;
+		else
+			bank_swizzle[i] = slots[i]->bank_swizzle;
+
 	bank_swizzle[4] = SQ_ALU_SCL_210;
 	while(bank_swizzle[4] <= SQ_ALU_SCL_221) {

@@ -787,11 +795,13 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc,
 			bank_swizzle[4]++;
 		} else {
 			for (i = 0; i < max_slots; i++) {
-				bank_swizzle[i]++;
-				if (bank_swizzle[i] <= SQ_ALU_VEC_210)
-					break;
-				else
-					bank_swizzle[i] = SQ_ALU_VEC_012;
+				if (!slots[i] || !slots[i]->bank_swizzle_force) {
+					bank_swizzle[i]++;
+					if (bank_swizzle[i] <= SQ_ALU_VEC_210)
+						break;
+					else
+						bank_swizzle[i] = SQ_ALU_VEC_012;
+				}
 			}
 		}
 	}
@@ -813,7 +823,7 @@ static int replace_gpr_with_pv_ps(struct r600_bc *bc,
 		return r;

 	for (i = 0; i < max_slots; ++i) {
-		if(prev[i] && prev[i]->dst.write && !prev[i]->dst.rel) {
+		if (prev[i] && (prev[i]->dst.write || prev[i]->is_op3) && !prev[i]->dst.rel) {
 			gpr[i] = prev[i]->dst.sel;
 			/* cube writes more than PV.X */
 			if (!is_alu_cube_inst(bc, prev[i]) && is_alu_reduction_inst(bc, prev[i]))
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -171,7 +171,7 @@ static void r600_destroy_context(struct pipe_context *context)
 		free(rctx->states[i]);
 	}

-	u_vbuf_mgr_destroy(rctx->vbuf_mgr);
+	u_vbuf_destroy(rctx->vbuf_mgr);
 	util_slab_destroy(&rctx->pool_transfers);

 	if (rctx->fences.bo) {
@@ -274,7 +274,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
 			 sizeof(struct pipe_transfer), 64,
 			 UTIL_SLAB_SINGLETHREADED);

-	rctx->vbuf_mgr = u_vbuf_mgr_create(&rctx->context, 1024 * 1024, 256,
+	rctx->vbuf_mgr = u_vbuf_create(&rctx->context, 1024 * 1024, 256,
 					   PIPE_BIND_VERTEX_BUFFER |
 					   PIPE_BIND_INDEX_BUFFER |
 					   PIPE_BIND_CONSTANT_BUFFER,
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -110,7 +110,7 @@ struct r600_vertex_element
 {
 	unsigned			count;
 	struct pipe_vertex_element	elements[PIPE_MAX_ATTRIBS];
-	struct u_vbuf_mgr_elements	*vmgr_elements;
+	struct u_vbuf_elements	*vmgr_elements;
 	struct r600_bo			*fetch_shader;
 	unsigned			fs_size;
 	struct r600_pipe_state		rstate;
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -333,6 +333,12 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
 		ctx->shader->output[i].sid = d->Semantic.Index;
 		ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + i;
 		ctx->shader->output[i].interpolate = d->Declaration.Interpolate;
+		if (ctx->type == TGSI_PROCESSOR_VERTEX) {
+			/* these don't count as vertex param exports */
+			if ((ctx->shader->output[i].name == TGSI_SEMANTIC_POSITION) ||
+			    (ctx->shader->output[i].name == TGSI_SEMANTIC_PSIZE))
+				ctx->shader->npos++;
+		}
 		break;
 	case TGSI_FILE_CONSTANT:
 	case TGSI_FILE_TEMPORARY:
@@ -2929,25 +2935,34 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)

 static int pops(struct r600_shader_ctx *ctx, int pops)
 {
-	int alu_pop = 3;
-	if (ctx->bc->cf_last) {
-		if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3)
-			alu_pop = 0;
-		else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3)
-			alu_pop = 1;
+	unsigned force_pop = ctx->bc->force_add_cf;
+
+	if (!force_pop) {
+		int alu_pop = 3;
+		if (ctx->bc->cf_last) {
+			if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3)
+				alu_pop = 0;
+			else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3)
+				alu_pop = 1;
+		}
+		alu_pop += pops;
+		if (alu_pop == 1) {
+			ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3;
+			ctx->bc->force_add_cf = 1;
+		} else if (alu_pop == 2) {
+			ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3;
+			ctx->bc->force_add_cf = 1;
+		} else {
+			force_pop = 1;
+		}
 	}
-	alu_pop += pops;
-	if (alu_pop == 1) {
-		ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3;
-		ctx->bc->force_add_cf = 1;
-	} else if (alu_pop == 2) {
-		ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3;
-		ctx->bc->force_add_cf = 1;
-	} else {
+
+	if (force_pop) {
 		r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
 		ctx->bc->cf_last->pop_count = pops;
 		ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
 	}
+
 	return 0;
 }

--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -40,6 +40,7 @@ struct r600_shader {
 	struct r600_bc		bc;
 	unsigned		ninput;
 	unsigned		noutput;
+	unsigned		npos;
 	unsigned		nlds;
 	struct r600_shader_io	input[32];
 	struct r600_shader_io	output[32];
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -886,9 +886,7 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
 	struct r600_resource_texture *rtex;
 	struct r600_resource *rbuffer;
 	struct r600_surface *surf;
-	unsigned level;
-	unsigned pitch, slice, format;
-	unsigned offset;
+	unsigned level, pitch, slice, format, offset, array_mode;

 	if (state->zsbuf == NULL)
 		return;
@@ -900,6 +898,10 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta

 	rbuffer = &rtex->resource;

+	/* XXX remove this once tiling is properly supported */
+	array_mode = rtex->array_mode[level] ? rtex->array_mode[level] :
+					       V_0280A0_ARRAY_1D_TILED_THIN1;
+
 	/* XXX quite sure for dx10+ hw don't need any offset hacks */
 	offset = r600_texture_get_offset((struct r600_resource_texture *)state->zsbuf->texture,
 					 level, state->zsbuf->u.tex.first_layer);
@@ -914,7 +916,7 @@ static void r600_db(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
 				0xFFFFFFFF, NULL);
 	r600_pipe_state_add_reg(rstate, R_028004_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL);
 	r600_pipe_state_add_reg(rstate, R_028010_DB_DEPTH_INFO,
-				S_028010_ARRAY_MODE(rtex->array_mode[level]) | S_028010_FORMAT(format),
+				S_028010_ARRAY_MODE(array_mode) | S_028010_FORMAT(format),
 				0xFFFFFFFF, rbuffer->bo);
 	r600_pipe_state_add_reg(rstate, R_028D34_DB_PREFETCH_LIMIT,
 				(surf->aligned_height / 8) - 1, 0xFFFFFFFF, NULL);
@@ -1481,7 +1483,7 @@ void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shad
 	struct r600_pipe_state *rstate = &shader->rstate;
 	struct r600_shader *rshader = &shader->shader;
 	unsigned spi_vs_out_id[10];
-	unsigned i, tmp;
+	unsigned i, tmp, nparams;

 	/* clear previous register */
 	rstate->nregs = 0;
@@ -1503,9 +1505,17 @@ void r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader *shad
 					spi_vs_out_id[i], 0xFFFFFFFF, NULL);
 	}

+	/* Certain attributes (position, psize, etc.) don't count as params.
+	 * VS is required to export at least one param and r600_shader_from_tgsi()
+	 * takes care of adding a dummy export.
+	 */
+	nparams = rshader->noutput - rshader->npos;
+	if (nparams < 1)
+		nparams = 1;
+
 	r600_pipe_state_add_reg(rstate,
 			R_0286C4_SPI_VS_OUT_CONFIG,
-			S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2),
+			S_0286C4_VS_EXPORT_COUNT(nparams - 1),
 			0xFFFFFFFF, NULL);
 	r600_pipe_state_add_reg(rstate,
 			R_028868_SQ_PGM_RESOURCES_VS,
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -162,7 +162,7 @@ void r600_bind_vertex_elements(struct pipe_context *ctx, void *state)

 	rctx->vertex_elements = v;
 	if (v) {
-		u_vbuf_mgr_bind_vertex_elements(rctx->vbuf_mgr, state,
+		u_vbuf_bind_vertex_elements(rctx->vbuf_mgr, state,
 						v->vmgr_elements);

 		rctx->states[v->rstate.id] = &v->rstate;
@@ -182,7 +182,7 @@ void r600_delete_vertex_element(struct pipe_context *ctx, void *state)
 		rctx->vertex_elements = NULL;

 	r600_bo_reference(rctx->radeon, &v->fetch_shader, NULL);
-	u_vbuf_mgr_destroy_vertex_elements(rctx->vbuf_mgr, v->vmgr_elements);
+	u_vbuf_destroy_vertex_elements(rctx->vbuf_mgr, v->vmgr_elements);
 	FREE(state);
 }

@@ -227,7 +227,7 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count,
 		}
 	}

-	u_vbuf_mgr_set_vertex_buffers(rctx->vbuf_mgr, count, buffers);
+	u_vbuf_set_vertex_buffers(rctx->vbuf_mgr, count, buffers);
 }

 void *r600_create_vertex_elements(struct pipe_context *ctx,
@@ -243,7 +243,7 @@ void *r600_create_vertex_elements(struct pipe_context *ctx,

 	v->count = count;
 	v->vmgr_elements =
-		u_vbuf_mgr_create_vertex_elements(rctx->vbuf_mgr, count,
+		u_vbuf_create_vertex_elements(rctx->vbuf_mgr, count,
 						  elements, v->elements);

 	if (r600_vertex_elements_build_fetch_shader(rctx, v)) {
@@ -507,13 +507,13 @@ static void r600_vertex_buffer_update(struct r600_pipe_context *rctx)
 			/* one resource per vertex elements */
 			unsigned vbuffer_index;
 			vbuffer_index = rctx->vertex_elements->elements[i].vertex_buffer_index;
-			vertex_buffer = &rctx->vbuf_mgr->vertex_buffer[vbuffer_index];
-			rbuffer = (struct r600_resource*)rctx->vbuf_mgr->real_vertex_buffer[vbuffer_index];
+			vertex_buffer = &rctx->vbuf_mgr->real_vertex_buffer[vbuffer_index];
+			rbuffer = (struct r600_resource*)vertex_buffer->buffer;
 			offset = rctx->vertex_elements->vbuffer_offset[i];
 		} else {
 			/* bind vertex buffer once */
-			vertex_buffer = &rctx->vbuf_mgr->vertex_buffer[i];
-			rbuffer = (struct r600_resource*)rctx->vbuf_mgr->real_vertex_buffer[i];
+			vertex_buffer = &rctx->vbuf_mgr->real_vertex_buffer[i];
+			rbuffer = (struct r600_resource*)vertex_buffer->buffer;
 			offset = 0;
 		}
 		if (vertex_buffer == NULL || rbuffer == NULL)
@@ -565,10 +565,15 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 		if (rctx->have_depth_fb || rctx->have_depth_texture)
 			r600_flush_depth_textures(rctx);
 	}
-	u_vbuf_mgr_draw_begin(rctx->vbuf_mgr, info);
+	u_vbuf_draw_begin(rctx->vbuf_mgr, info);
 	r600_vertex_buffer_update(rctx);

 	draw.info = *info;
+	if (draw.info.max_index != ~0) {
+		draw.info.min_index += info->index_bias;
+		draw.info.max_index += info->index_bias;
+	}
+
 	draw.ctx = ctx;
 	draw.index_buffer = NULL;
 	if (info->indexed && rctx->index_buffer.buffer) {
@@ -669,7 +674,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)

 	pipe_resource_reference(&draw.index_buffer, NULL);

-	u_vbuf_mgr_draw_end(rctx->vbuf_mgr);
+	u_vbuf_draw_end(rctx->vbuf_mgr);
 }

 void _r600_pipe_state_add_reg(struct r600_context *ctx,
--- a/src/gallium/drivers/r600/r600_state_inlines.h
+++ b/src/gallium/drivers/r600/r600_state_inlines.h
@@ -319,6 +319,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
 		return V_0280A0_SWAP_STD;

 	case PIPE_FORMAT_R16_UNORM:
+	case PIPE_FORMAT_R16_FLOAT:
 		return V_0280A0_SWAP_STD;

 	/* 32-bit buffers. */
@@ -423,6 +424,9 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
 	case PIPE_FORMAT_R16_UNORM:
 		return V_0280A0_COLOR_16;

+	case PIPE_FORMAT_R16_FLOAT:
+		return V_0280A0_COLOR_16_FLOAT;
+
 	/* 32-bit buffers. */
 	case PIPE_FORMAT_A8B8G8R8_SRGB:
 	case PIPE_FORMAT_A8B8G8R8_UNORM:
--- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c
+++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c
@@ -205,10 +205,9 @@ convert_quad_stencil( struct depth_data *data,
   case PIPE_FORMAT_Z24_UNORM_S8_USCALED:
   case PIPE_FORMAT_X8Z24_UNORM:
   case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
-      {
-         for (j = 0; j < QUAD_SIZE; j++) {
-            data->shader_stencil_refs[j] = ((unsigned)(quad->output.stencil[j]));
-         }
+   case PIPE_FORMAT_S8_USCALED:
+      for (j = 0; j < QUAD_SIZE; j++) {
+         data->shader_stencil_refs[j] = ((unsigned)(quad->output.stencil[j]));
      }
      break;
   default:
--- a/src/gallium/include/pipe/p_config.h
+++ b/src/gallium/include/pipe/p_config.h
@@ -99,9 +99,9 @@
 #endif
 #endif

-#if defined(__PPC__)
+#if defined(__ppc__) || defined(__ppc64__) || defined(__PPC__)
 #define PIPE_ARCH_PPC
-#if defined(__PPC64__)
+#if defined(__ppc64__) || defined(__PPC64__)
 #define PIPE_ARCH_PPC_64
 #endif
 #endif
@@ -120,6 +120,15 @@
 # define PIPE_ARCH_BIG_ENDIAN
 #endif

+#elif defined(__APPLE__)
+#include <machine/endian.h>
+
+#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
+# define PIPE_ARCH_LITTLE_ENDIAN
+#elif __DARWIN_BYTE_ORDER == __DARWIN_BIG_ENDIAN
+# define PIPE_ARCH_BIG_ENDIAN
+#endif
+
 #else

 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
--- a/src/gallium/targets/egl-static/Makefile
+++ b/src/gallium/targets/egl-static/Makefile
@@ -141,10 +141,18 @@ egl_LIBS += \
 	$(TOP)/src/gallium/drivers/svga/libsvga.a
 endif

-# swrast
+# softpipe
+ifneq ($(findstring softpipe,$(GALLIUM_DRIVERS_DIRS)),)
 egl_CPPFLAGS += -DGALLIUM_SOFTPIPE -DGALLIUM_RBUG -DGALLIUM_TRACE
 egl_LIBS += $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a
 egl_SYS += -lm
+endif
+
+# llvmpipe
+ifneq ($(findstring llvmpipe,$(GALLIUM_DRIVERS_DIRS)),)
+egl_CPPFLAGS += -DGALLIUM_LLVMPIPE
+egl_LIBS += $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a
+endif

 # sort to remove duplicates
 egl_CPPFLAGS := $(sort $(egl_CPPFLAGS))
@@ -158,8 +166,6 @@ st_GL_SYS := -lm -lpthread $(DLOPEN_LIBS)

 # LLVM
 ifeq ($(MESA_LLVM),1)
-egl_CPPFLAGS += -DGALLIUM_LLVMPIPE
-egl_LIBS += $(TOP)/src/gallium/drivers/llvmpipe/libllvmpipe.a
 egl_SYS += $(LLVM_LIBS)
 LDFLAGS += $(LLVM_LDFLAGS)

--- a/src/gallium/winsys/r600/drm/evergreen_hw_context.c
+++ b/src/gallium/winsys/r600/drm/evergreen_hw_context.c
@@ -1019,6 +1019,8 @@ int evergreen_context_init(struct r600_context *ctx, struct radeon *radeon)

 	LIST_INITHEAD(&ctx->fenced_bo);

+	r600_get_backend_mask(ctx);
+
 	return 0;
 out_err:
 	r600_context_fini(ctx);
--- a/src/gallium/winsys/r600/drm/r600_drm.c
+++ b/src/gallium/winsys/r600/drm/r600_drm.c
@@ -50,6 +50,14 @@
 #define RADEON_INFO_NUM_BACKENDS 0xa
 #endif

+#ifndef RADEON_INFO_NUM_TILE_PIPES
+#define RADEON_INFO_NUM_TILE_PIPES 0xb
+#endif
+
+#ifndef RADEON_INFO_BACKEND_MAP
+#define RADEON_INFO_BACKEND_MAP 0xd
+#endif
+
 enum radeon_family r600_get_family(struct radeon *r600)
 {
 	return r600->family;
@@ -75,6 +83,16 @@ unsigned r600_get_num_backends(struct radeon *radeon)
 	return radeon->num_backends;
 }

+unsigned r600_get_num_tile_pipes(struct radeon *radeon)
+{
+	return radeon->num_tile_pipes;
+}
+
+unsigned r600_get_backend_map(struct radeon *radeon)
+{
+	return radeon->backend_map;
+}
+
 unsigned r600_get_minor_version(struct radeon *radeon)
 {
 	return radeon->minor_version;
@@ -241,6 +259,42 @@ static int radeon_get_num_backends(struct radeon *radeon)
 	return 0;
 }

+static int radeon_get_num_tile_pipes(struct radeon *radeon)
+{
+	struct drm_radeon_info info = {};
+	uint32_t num_tile_pipes = 0;
+	int r;
+
+	info.request = RADEON_INFO_NUM_TILE_PIPES;
+	info.value = (uintptr_t)&num_tile_pipes;
+	r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info,
+			sizeof(struct drm_radeon_info));
+	if (r)
+		return r;
+
+	radeon->num_tile_pipes = num_tile_pipes;
+	return 0;
+}
+
+static int radeon_get_backend_map(struct radeon *radeon)
+{
+	struct drm_radeon_info info = {};
+	uint32_t backend_map = 0;
+	int r;
+
+	info.request = RADEON_INFO_BACKEND_MAP;
+	info.value = (uintptr_t)&backend_map;
+	r = drmCommandWriteRead(radeon->fd, DRM_RADEON_INFO, &info,
+			sizeof(struct drm_radeon_info));
+	if (r)
+		return r;
+
+	radeon->backend_map = backend_map;
+	radeon->backend_map_valid = TRUE;
+
+	return 0;
+}
+

 static int radeon_init_fence(struct radeon *radeon)
 {
@@ -362,6 +416,11 @@ static struct radeon *radeon_new(int fd, unsigned device)
 	if (radeon->minor_version >= 9)
 		radeon_get_num_backends(radeon);

+	if (radeon->minor_version >= 11) {
+		radeon_get_num_tile_pipes(radeon);
+		radeon_get_backend_map(radeon);
+	}
+
 	radeon->bomgr = r600_bomgr_create(radeon, 1000000);
 	if (radeon->bomgr == NULL) {
 		return NULL;
--- a/src/gallium/winsys/r600/drm/r600_hw_context.c
+++ b/src/gallium/winsys/r600/drm/r600_hw_context.c
@@ -40,6 +40,91 @@

 #define GROUP_FORCE_NEW_BLOCK	0

+/* Get backends mask */
+void r600_get_backend_mask(struct r600_context *ctx)
+{
+	struct r600_bo * buffer;
+	u32 * results;
+	unsigned num_backends = r600_get_num_backends(ctx->radeon);
+	unsigned i, mask = 0;
+
+	/* if backend_map query is supported by the kernel */
+	if (ctx->radeon->backend_map_valid) {
+		unsigned num_tile_pipes = r600_get_num_tile_pipes(ctx->radeon);
+		unsigned backend_map = r600_get_backend_map(ctx->radeon);
+		unsigned item_width, item_mask;
+
+		if (ctx->radeon->chip_class >= EVERGREEN) {
+			item_width = 4;
+			item_mask = 0x7;
+		} else {
+			item_width = 2;
+			item_mask = 0x3;
+		}
+
+		while(num_tile_pipes--) {
+			i = backend_map & item_mask;
+			mask |= (1<<i);
+			backend_map >>= item_width;
+		}
+		if (mask != 0) {
+			ctx->backend_mask = mask;
+			return;
+		}
+	}
+
+	/* otherwise backup path for older kernels */
+
+	/* create buffer for event data */
+	buffer = r600_bo(ctx->radeon, ctx->max_db*16, 1, 0,
+				PIPE_USAGE_STAGING);
+	if (!buffer)
+		goto err;
+
+	/* initialize buffer with zeroes */
+	results = r600_bo_map(ctx->radeon, buffer, PB_USAGE_CPU_WRITE, NULL);
+	if (results) {
+		memset(results, 0, ctx->max_db * 4 * 4);
+		r600_bo_unmap(ctx->radeon, buffer);
+
+		/* emit EVENT_WRITE for ZPASS_DONE */
+		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
+		ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
+		ctx->pm4[ctx->pm4_cdwords++] = 0;
+		ctx->pm4[ctx->pm4_cdwords++] = 0;
+
+		ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
+		ctx->pm4[ctx->pm4_cdwords++] = 0;
+		r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], buffer);
+
+		/* execute */
+		r600_context_flush(ctx);
+
+		/* analyze results */
+		results = r600_bo_map(ctx->radeon, buffer, PB_USAGE_CPU_READ, NULL);
+		if (results) {
+			for(i = 0; i < ctx->max_db; i++) {
+				/* at least highest bit will be set if backend is used */
+				if (results[i*4 + 1])
+					mask |= (1<<i);
+			}
+			r600_bo_unmap(ctx->radeon, buffer);
+		}
+	}
+
+	r600_bo_reference(ctx->radeon, &buffer, NULL);
+
+	if (mask != 0) {
+		ctx->backend_mask = mask;
+		return;
+	}
+
+err:
+	/* fallback to old method - set num_backends lower bits to 1 */
+	ctx->backend_mask = (~((u32)0))>>(32-num_backends);
+	return;
+}
+
 static inline void r600_context_ps_partial_flush(struct r600_context *ctx)
 {
 	if (!(ctx->flags & R600_CONTEXT_DRAW_PENDING))
@@ -899,6 +984,8 @@ int r600_context_init(struct r600_context *ctx, struct radeon *radeon)

 	ctx->max_db = 4;

+	r600_get_backend_mask(ctx);
+
 	return 0;
 out_err:
 	r600_context_fini(ctx);
@@ -1731,7 +1818,6 @@ static boolean r600_query_result(struct r600_context *ctx, struct r600_query *qu
 void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
 {
 	unsigned required_space, new_results_end;
-	int num_backends = r600_get_num_backends(ctx->radeon);

 	/* query request needs 6/8 dwords for begin + 6/8 dwords for end */
 	if (query->type == PIPE_QUERY_TIME_ELAPSED)
@@ -1777,9 +1863,11 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
 			memset(results, 0, query->result_size);

 			/* Set top bits for unused backends */
-			for (i = num_backends; i < ctx->max_db; i++) {
-				results[(i * 4)+1] = 0x80000000;
-				results[(i * 4)+3] = 0x80000000;
+			for (i = 0; i < ctx->max_db; i++) {
+				if (!(ctx->backend_mask & (1<<i))) {
+					results[(i * 4)+1] = 0x80000000;
+					results[(i * 4)+3] = 0x80000000;
+				}
 			}
 			r600_bo_unmap(ctx->radeon, query->buffer);
 		}
--- a/src/gallium/winsys/r600/drm/r600_priv.h
+++ b/src/gallium/winsys/r600/drm/r600_priv.h
@@ -55,6 +55,9 @@ struct radeon {
 	struct r600_bo			*fence_bo;
 	unsigned			clock_crystal_freq;
 	unsigned			num_backends;
+	unsigned			num_tile_pipes;
+	unsigned			backend_map;
+	boolean				backend_map_valid;
 	unsigned                        minor_version;

        /* List of buffer handles and its mutex. */
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -115,6 +115,7 @@ static void radeon_cs_context_cleanup(struct radeon_cs_context *csc)
    }

    csc->crelocs = 0;
+    csc->validated_crelocs = 0;
    csc->chunks[0].length_dw = 0;
    csc->chunks[1].length_dw = 0;
    csc->used_gart = 0;
@@ -307,9 +308,37 @@ static void radeon_drm_cs_add_reloc(struct radeon_winsys_cs *rcs,
 static boolean radeon_drm_cs_validate(struct radeon_winsys_cs *rcs)
 {
    struct radeon_drm_cs *cs = radeon_drm_cs(rcs);
+    boolean status =
+        cs->csc->used_gart < cs->ws->gart_size * 0.8 &&
+        cs->csc->used_vram < cs->ws->vram_size * 0.8;

-    return cs->csc->used_gart < cs->ws->gart_size * 0.8 &&
-           cs->csc->used_vram < cs->ws->vram_size * 0.8;
+    if (status) {
+        cs->csc->validated_crelocs = cs->csc->crelocs;
+    } else {
+        /* Remove lately-added relocations. The validation failed with them
+         * and the CS is about to be flushed because of that. Keep only
+         * the already-validated relocations. */
+        unsigned i;
+
+        for (i = cs->csc->validated_crelocs; i < cs->csc->crelocs; i++) {
+            p_atomic_dec(&cs->csc->relocs_bo[i]->num_cs_references);
+            radeon_bo_reference(&cs->csc->relocs_bo[i], NULL);
+        }
+        cs->csc->crelocs = cs->csc->validated_crelocs;
+
+        /* Flush if there are any relocs. Clean up otherwise. */
+        if (cs->csc->crelocs) {
+            cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC);
+        } else {
+            radeon_cs_context_cleanup(cs->csc);
+
+            assert(cs->base.cdw == 0);
+            if (cs->base.cdw != 0) {
+                fprintf(stderr, "radeon: Unexpected error in %s.\n", __func__);
+            }
+        }
+    }
+    return status;
 }

 static void radeon_drm_cs_write_reloc(struct radeon_winsys_cs *rcs,
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
@@ -41,6 +41,7 @@ struct radeon_cs_context {
    /* Relocs. */
    unsigned                    nrelocs;
    unsigned                    crelocs;
+    unsigned			validated_crelocs;
    struct radeon_bo            **relocs_bo;
    struct drm_radeon_cs_reloc  *relocs;

--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
@@ -271,7 +271,9 @@ struct radeon_winsys {

    /**
     * Return TRUE if there is enough memory in VRAM and GTT for the relocs
-     * added so far.
+     * added so far. If the validation fails, all the relocations which have
+     * been added since the last call of cs_validate will be removed and
+     * the CS will be flushed (provided there are still any relocations).
     *
     * \param cs        A command stream to validate.
     */
--- a/src/glsl/ast.h
+++ b/src/glsl/ast.h
@@ -730,7 +730,6 @@ _mesa_ast_field_selection_to_hir(const ast_expression *expr,
 				 struct _mesa_glsl_parse_state *state);

 void
-emit_function(_mesa_glsl_parse_state *state, exec_list *instructions,
-	      ir_function *f);
+emit_function(_mesa_glsl_parse_state *state, ir_function *f);

 #endif /* AST_H */
--- a/src/glsl/ast_function.cpp
+++ b/src/glsl/ast_function.cpp
@@ -125,7 +125,7 @@ match_function_by_name(exec_list *instructions, const char *name,
 	    if (f == NULL) {
 	       f = new(ctx) ir_function(name);
 	       state->symbols->add_global_function(f);
-	       emit_function(state, instructions, f);
+	       emit_function(state, f);
 	    }

 	    f->add_signature(sig->clone_prototype(f, NULL));
@@ -134,6 +134,8 @@ match_function_by_name(exec_list *instructions, const char *name,
      }
   }

+   exec_list post_call_conversions;
+
   if (sig != NULL) {
      /* Verify that 'out' and 'inout' actual parameters are lvalues.  This
       * isn't done in ir_function::matching_signature because that function
@@ -141,6 +143,12 @@ match_function_by_name(exec_list *instructions, const char *name,
       *
       * Also, validate that 'const_in' formal parameters (an extension of our
       * IR) correspond to ir_constant actual parameters.
+       *
+       * Also, perform implicit conversion of arguments.  Note: to implicitly
+       * convert out parameters, we need to place them in a temporary
+       * variable, and do the conversion after the call takes place.  Since we
+       * haven't emitted the call yet, we'll place the post-call conversions
+       * in a temporary exec_list, and emit them later.
       */
      exec_list_iterator actual_iter = actual_parameters->iterator();
      exec_list_iterator formal_iter = sig->parameters.iterator();
@@ -185,8 +193,63 @@ match_function_by_name(exec_list *instructions, const char *name,
 	 }

 	 if (formal->type->is_numeric() || formal->type->is_boolean()) {
-	    ir_rvalue *converted = convert_component(actual, formal->type);
-	    actual->replace_with(converted);
+            switch (formal->mode) {
+            case ir_var_in: {
+               ir_rvalue *converted
+                  = convert_component(actual, formal->type);
+               actual->replace_with(converted);
+               break;
+            }
+            case ir_var_out:
+               if (actual->type != formal->type) {
+                  /* To convert an out parameter, we need to create a
+                   * temporary variable to hold the value before conversion,
+                   * and then perform the conversion after the function call
+                   * returns.
+                   *
+                   * This has the effect of transforming code like this:
+                   *
+                   *   void f(out int x);
+                   *   float value;
+                   *   f(value);
+                   *
+                   * Into IR that's equivalent to this:
+                   *
+                   *   void f(out int x);
+                   *   float value;
+                   *   int out_parameter_conversion;
+                   *   f(out_parameter_conversion);
+                   *   value = float(out_parameter_conversion);
+                   */
+                  ir_variable *tmp =
+                     new(ctx) ir_variable(formal->type,
+                                          "out_parameter_conversion",
+                                          ir_var_temporary);
+                  instructions->push_tail(tmp);
+                  ir_dereference_variable *deref_tmp_1
+                     = new(ctx) ir_dereference_variable(tmp);
+                  ir_dereference_variable *deref_tmp_2
+                     = new(ctx) ir_dereference_variable(tmp);
+                  ir_rvalue *converted_tmp
+                     = convert_component(deref_tmp_1, actual->type);
+                  ir_assignment *assignment
+                     = new(ctx) ir_assignment(actual, converted_tmp);
+                  post_call_conversions.push_tail(assignment);
+                  actual->replace_with(deref_tmp_2);
+               }
+               break;
+            case ir_var_inout:
+               /* Inout parameters should never require conversion, since that
+                * would require an implicit conversion to exist both to and
+                * from the formal parameter type, and there are no
+                * bidirectional implicit conversions.
+                */
+               assert (actual->type == formal->type);
+               break;
+            default:
+               assert (!"Illegal formal parameter mode");
+               break;
+            }
 	 }

 	 actual_iter.next();
@@ -196,11 +259,27 @@ match_function_by_name(exec_list *instructions, const char *name,
      /* Always insert the call in the instruction stream, and return a deref
       * of its return val if it returns a value, since we don't know if
       * the rvalue is going to be assigned to anything or not.
+       *
+       * Also insert any out parameter conversions after the call.
       */
      ir_call *call = new(ctx) ir_call(sig, actual_parameters);
+      ir_dereference_variable *deref;
      if (!sig->return_type->is_void()) {
+         /* If the function call is a constant expression, don't
+          * generate the instructions to call it; just generate an
+          * ir_constant representing the constant value.
+          *
+          * Function calls can only be constant expressions starting
+          * in GLSL 1.20.
+          */
+         if (state->language_version >= 120) {
+            ir_constant *const_val = call->constant_expression_value();
+            if (const_val) {
+               return const_val;
+            }
+         }
+
 	 ir_variable *var;
-	 ir_dereference_variable *deref;

 	 var = new(ctx) ir_variable(sig->return_type,
 				    ralloc_asprintf(ctx, "%s_retval",
@@ -211,15 +290,14 @@ match_function_by_name(exec_list *instructions, const char *name,
 	 deref = new(ctx) ir_dereference_variable(var);
 	 ir_assignment *assign = new(ctx) ir_assignment(deref, call, NULL);
 	 instructions->push_tail(assign);
-	 if (state->language_version >= 120)
-	    var->constant_value = call->constant_expression_value();

 	 deref = new(ctx) ir_dereference_variable(var);
-	 return deref;
      } else {
 	 instructions->push_tail(call);
-	 return NULL;
+	 deref = NULL;
      }
+      instructions->append_list(&post_call_conversions);
+      return deref;
   } else {
      char *str = prototype_string(NULL, name, actual_parameters);

@@ -419,13 +497,21 @@ process_array_constructor(exec_list *instructions,
      ir_rvalue *ir = (ir_rvalue *) n;
      ir_rvalue *result = ir;

-      /* Apply implicit conversions (not the scalar constructor rules!) */
+      /* Apply implicit conversions (not the scalar constructor rules!). See
+       * the spec quote above. */
      if (constructor_type->element_type()->is_float()) {
 	 const glsl_type *desired_type =
 	    glsl_type::get_instance(GLSL_TYPE_FLOAT,
 				    ir->type->vector_elements,
 				    ir->type->matrix_columns);
-	 result = convert_component(ir, desired_type);
+	 if (result->type->can_implicitly_convert_to(desired_type)) {
+	    /* Even though convert_component() implements the constructor
+	     * conversion rules (not the implicit conversion rules), its safe
+	     * to use it here because we already checked that the implicit
+	     * conversion is legal.
+	     */
+	    result = convert_component(ir, desired_type);
+	 }
      }

      if (result->type != constructor_type->element_type()) {
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -66,6 +66,8 @@ _mesa_ast_to_hir(exec_list *instructions, struct _mesa_glsl_parse_state *state)

   state->current_function = NULL;

+   state->toplevel_ir = instructions;
+
   /* Section 4.2 of the GLSL 1.20 specification states:
    * "The built-in functions are scoped in a scope outside the global scope
    *  users declare global variables in.  That is, a shader's global scope,
@@ -85,6 +87,8 @@ _mesa_ast_to_hir(exec_list *instructions, struct _mesa_glsl_parse_state *state)
      ast->hir(instructions, state);

   detect_recursion_unlinked(state, instructions);
+
+   state->toplevel_ir = NULL;
 }


@@ -641,6 +645,16 @@ validate_assignment(struct _mesa_glsl_parse_state *state,
   return NULL;
 }

+static void
+mark_whole_array_access(ir_rvalue *access)
+{
+   ir_dereference_variable *deref = access->as_dereference_variable();
+
+   if (deref && deref->var) {
+      deref->var->max_array_access = deref->type->length - 1;
+   }
+}
+
 ir_rvalue *
 do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state,
 	      ir_rvalue *lhs, ir_rvalue *rhs, bool is_initializer,
@@ -657,16 +671,20 @@ do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state,
                          lhs->variable_referenced()->name);
         error_emitted = true;

+      } else if (state->language_version <= 110 && lhs->type->is_array()) {
+	 /* From page 32 (page 38 of the PDF) of the GLSL 1.10 spec:
+	  *
+	  *    "Other binary or unary expressions, non-dereferenced
+	  *     arrays, function names, swizzles with repeated fields,
+	  *     and constants cannot be l-values."
+	  */
+	 _mesa_glsl_error(&lhs_loc, state, "whole array assignment is not "
+			  "allowed in GLSL 1.10 or GLSL ES 1.00.");
+	 error_emitted = true;
      } else if (!lhs->is_lvalue()) {
 	 _mesa_glsl_error(& lhs_loc, state, "non-lvalue in assignment");
 	 error_emitted = true;
      }
-
-      if (state->es_shader && lhs->type->is_array()) {
-	 _mesa_glsl_error(&lhs_loc, state, "whole array assignment is not "
-			  "allowed in GLSL ES 1.00.");
-	 error_emitted = true;
-      }
   }

   ir_rvalue *new_rhs =
@@ -701,6 +719,8 @@ do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state,
 						   rhs->type->array_size());
 	 d->type = var->type;
      }
+      mark_whole_array_access(rhs);
+      mark_whole_array_access(lhs);
   }

   /* Most callers of do_assignment (assign, add_assign, pre_inc/dec,
@@ -761,16 +781,6 @@ ast_node::hir(exec_list *instructions,
   return NULL;
 }

-static void
-mark_whole_array_access(ir_rvalue *access)
-{
-   ir_dereference_variable *deref = access->as_dereference_variable();
-
-   if (deref) {
-      deref->var->max_array_access = deref->type->length - 1;
-   }
-}
-
 static ir_rvalue *
 do_comparison(void *mem_ctx, int operation, ir_rvalue *op0, ir_rvalue *op1)
 {
@@ -880,6 +890,29 @@ get_scalar_boolean_operand(exec_list *instructions,
   return new(ctx) ir_constant(true);
 }

+/**
+ * If name refers to a builtin array whose maximum allowed size is less than
+ * size, report an error and return true.  Otherwise return false.
+ */
+static bool
+check_builtin_array_max_size(const char *name, unsigned size,
+                             YYLTYPE loc, struct _mesa_glsl_parse_state *state)
+{
+   if ((strcmp("gl_TexCoord", name) == 0)
+       && (size > state->Const.MaxTextureCoords)) {
+      /* From page 54 (page 60 of the PDF) of the GLSL 1.20 spec:
+       *
+       *     "The size [of gl_TexCoord] can be at most
+       *     gl_MaxTextureCoords."
+       */
+      _mesa_glsl_error(&loc, state, "`gl_TexCoord' array size cannot "
+                       "be larger than gl_MaxTextureCoords (%u)\n",
+                       state->Const.MaxTextureCoords);
+      return true;
+   }
+   return false;
+}
+
 ir_rvalue *
 ast_expression::hir(exec_list *instructions,
 		    struct _mesa_glsl_parse_state *state)
@@ -1537,8 +1570,15 @@ ast_expression::hir(exec_list *instructions,
 	     * FINISHME: array access limits be added to ir_dereference?
 	     */
 	    ir_variable *const v = array->whole_variable_referenced();
-	    if ((v != NULL) && (unsigned(idx) > v->max_array_access))
+	    if ((v != NULL) && (unsigned(idx) > v->max_array_access)) {
 	       v->max_array_access = idx;
+
+               /* Check whether this access will, as a side effect, implicitly
+                * cause the size of a built-in array to be too large.
+                */
+               if (check_builtin_array_max_size(v->name, idx+1, loc, state))
+                  error_emitted = true;
+            }
 	 }
      } else if (array->type->array_size() == 0) {
 	 _mesa_glsl_error(&loc, state, "unsized array index must be constant");
@@ -1757,11 +1797,6 @@ process_array_type(YYLTYPE *loc, const glsl_type *base, ast_node *array_size,
      ir_rvalue *const ir = array_size->hir(& dummy_instructions, state);
      YYLTYPE loc = array_size->get_location();

-      /* FINISHME: Verify that the grammar forbids side-effects in array
-       * FINISHME: sizes.   i.e., 'vec4 [x = 12] data'
-       */
-      assert(dummy_instructions.is_empty());
-
      if (ir != NULL) {
 	 if (!ir->type->is_integer()) {
 	    _mesa_glsl_error(& loc, state, "array size must be integer type");
@@ -1778,6 +1813,14 @@ process_array_type(YYLTYPE *loc, const glsl_type *base, ast_node *array_size,
 	    } else {
 	       assert(size->type == ir->type);
 	       length = size->value.u[0];
+
+               /* If the array size is const (and we've verified that
+                * it is) then no instructions should have been emitted
+                * when we converted it to HIR.  If they were emitted,
+                * then either the array size isn't const after all, or
+                * we are emitting unnecessary instructions.
+                */
+               assert(dummy_instructions.is_empty());
 	    }
 	 }
      }
@@ -2054,10 +2097,6 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
       var->depth_layout = ir_depth_layout_unchanged;
   else
       var->depth_layout = ir_depth_layout_none;
-
-   if (var->type->is_array() && state->language_version != 110) {
-      var->array_lvalue = true;
-   }
 }

 /**
@@ -2105,18 +2144,9 @@ get_variable_being_redeclared(ir_variable *var, ast_declaration *decl,
       * FINISHME: required or not.
       */

-      /* From page 54 (page 60 of the PDF) of the GLSL 1.20 spec:
-       *
-       *     "The size [of gl_TexCoord] can be at most
-       *     gl_MaxTextureCoords."
-       */
      const unsigned size = unsigned(var->type->array_size());
-      if ((strcmp("gl_TexCoord", var->name) == 0)
-	  && (size > state->Const.MaxTextureCoords)) {
-	 _mesa_glsl_error(& loc, state, "`gl_TexCoord' array size cannot "
-			  "be larger than gl_MaxTextureCoords (%u)\n",
-			  state->Const.MaxTextureCoords);
-      } else if ((size > 0) && (size <= earlier->max_array_access)) {
+      check_builtin_array_max_size(var->name, size, loc, state);
+      if ((size > 0) && (size <= earlier->max_array_access)) {
 	 _mesa_glsl_error(& loc, state, "array size must be > %u due to "
 			  "previous access",
 			  earlier->max_array_access);
@@ -2391,12 +2421,12 @@ ast_declarator_list::hir(exec_list *instructions,

   decl_type = this->type->specifier->glsl_type(& type_name, state);
   if (this->declarations.is_empty()) {
-      /* The only valid case where the declaration list can be empty is when
-       * the declaration is setting the default precision of a built-in type
-       * (e.g., 'precision highp vec4;').
-       */
-
      if (decl_type != NULL) {
+	 /* Warn if this empty declaration is not for declaring a structure.
+	  */
+	 if (this->type->specifier->structure == NULL) {
+	    _mesa_glsl_warning(&loc, state, "empty declaration");
+	 }
      } else {
 	    _mesa_glsl_error(& loc, state, "incomplete declaration");
      }
@@ -2881,6 +2911,26 @@ ast_parameter_declarator::hir(exec_list *instructions,
      type = glsl_type::error_type;
   }

+   /* From page 39 (page 45 of the PDF) of the GLSL 1.10 spec:
+    *
+    *    "When calling a function, expressions that do not evaluate to
+    *     l-values cannot be passed to parameters declared as out or inout."
+    *
+    * From page 32 (page 38 of the PDF) of the GLSL 1.10 spec:
+    *
+    *    "Other binary or unary expressions, non-dereferenced arrays,
+    *     function names, swizzles with repeated fields, and constants
+    *     cannot be l-values."
+    *
+    * So for GLSL 1.10, passing an array as an out or inout parameter is not
+    * allowed.  This restriction is removed in GLSL 1.20, and in GLSL ES.
+    */
+   if ((var->mode == ir_var_inout || var->mode == ir_var_out)
+       && type->is_array() && state->language_version == 110) {
+      _mesa_glsl_error(&loc, state, "Arrays cannot be out or inout parameters in GLSL 1.10");
+      type = glsl_type::error_type;
+   }
+
   instructions->push_tail(var);

   /* Parameter declarations do not have r-values.
@@ -2918,23 +2968,16 @@ ast_parameter_declarator::parameters_to_hir(exec_list *ast_parameters,


 void
-emit_function(_mesa_glsl_parse_state *state, exec_list *instructions,
-	      ir_function *f)
+emit_function(_mesa_glsl_parse_state *state, ir_function *f)
 {
-   /* Emit the new function header */
-   if (state->current_function == NULL) {
-      instructions->push_tail(f);
-   } else {
-      /* IR invariants disallow function declarations or definitions nested
-       * within other function definitions.  Insert the new ir_function
-       * block in the instruction sequence before the ir_function block
-       * containing the current ir_function_signature.
-       */
-      ir_function *const curr =
-	 const_cast<ir_function *>(state->current_function->function());
-
-      curr->insert_before(f);
-   }
+   /* IR invariants disallow function declarations or definitions
+    * nested within other function definitions.  But there is no
+    * requirement about the relative order of function declarations
+    * and definitions with respect to one another.  So simply insert
+    * the new ir_function block at the end of the toplevel instruction
+    * list.
+    */
+   state->toplevel_ir->push_tail(f);
 }


@@ -3061,7 +3104,7 @@ ast_function::hir(exec_list *instructions,
 	 return NULL;
      }

-      emit_function(state, instructions, f);
+      emit_function(state, f);
   }

   /* Verify the return type of main() */
--- a/src/glsl/builtins/ir/asin
+++ b/src/glsl/builtins/ir/asin
@@ -5,23 +5,26 @@
     ((return (expression float *
 	       (expression float sign (var_ref x))
 	       (expression float -
-		(expression float *
-		 (constant float (3.1415926))
-		 (constant float (0.5)))
+		(constant float (1.5707964))
 		(expression float *
 		 (expression float sqrt
 		  (expression float -
 		   (constant float (1.0))
 		   (expression float abs (var_ref x))))
 		 (expression float +
-		  (constant float (1.5707288))
+		  (constant float (1.5707964))
 		  (expression float *
 		   (expression float abs (var_ref x))
 		   (expression float +
-		    (constant float (-0.2121144))
+		    (constant float (-0.21460183))
 		    (expression float *
-		     (constant float (0.0742610))
-		     (expression float abs (var_ref x))))))))))))
+		     (expression float abs (var_ref x))
+                     (expression float +
+                      (constant float (0.086566724))
+                      (expression float *
+                       (expression float abs (var_ref x))
+                       (constant float (-0.03102955))
+		     ))))))))))))

   (signature vec2
     (parameters
@@ -29,23 +32,26 @@
     ((return (expression vec2 *
 	       (expression vec2 sign (var_ref x))
 	       (expression vec2 -
-		(expression float *
-		 (constant float (3.1415926))
-		 (constant float (0.5)))
+		(constant float (1.5707964))
 		(expression vec2 *
 		 (expression vec2 sqrt
 		  (expression vec2 -
 		   (constant float (1.0))
 		   (expression vec2 abs (var_ref x))))
 		 (expression vec2 +
-		  (constant float (1.5707288))
+		  (constant float (1.5707964))
 		  (expression vec2 *
 		   (expression vec2 abs (var_ref x))
 		   (expression vec2 +
-		    (constant float (-0.2121144))
+		    (constant float (-0.21460183))
 		    (expression vec2 *
-		     (constant float (0.0742610))
-		     (expression vec2 abs (var_ref x))))))))))))
+		     (expression vec2 abs (var_ref x))
+                     (expression vec2 +
+                      (constant float (0.086566724))
+                      (expression vec2 *
+                       (expression vec2 abs (var_ref x))
+                       (constant float (-0.03102955))
+		     ))))))))))))

   (signature vec3
     (parameters
@@ -53,23 +59,26 @@
     ((return (expression vec3 *
 	       (expression vec3 sign (var_ref x))
 	       (expression vec3 -
-		(expression float *
-		 (constant float (3.1415926))
-		 (constant float (0.5)))
+		(constant float (1.5707964))
 		(expression vec3 *
 		 (expression vec3 sqrt
 		  (expression vec3 -
 		   (constant float (1.0))
 		   (expression vec3 abs (var_ref x))))
 		 (expression vec3 +
-		  (constant float (1.5707288))
+		  (constant float (1.5707964))
 		  (expression vec3 *
 		   (expression vec3 abs (var_ref x))
 		   (expression vec3 +
-		    (constant float (-0.2121144))
+		    (constant float (-0.21460183))
 		    (expression vec3 *
-		     (constant float (0.0742610))
-		     (expression vec3 abs (var_ref x))))))))))))
+		     (expression vec3 abs (var_ref x))
+                     (expression vec3 +
+                      (constant float (0.086566724))
+                      (expression vec3 *
+                       (expression vec3 abs (var_ref x))
+                       (constant float (-0.03102955))
+		     ))))))))))))

   (signature vec4
     (parameters
@@ -77,21 +86,24 @@
     ((return (expression vec4 *
 	       (expression vec4 sign (var_ref x))
 	       (expression vec4 -
-		(expression float *
-		 (constant float (3.1415926))
-		 (constant float (0.5)))
+		(constant float (1.5707964))
 		(expression vec4 *
 		 (expression vec4 sqrt
 		  (expression vec4 -
 		   (constant float (1.0))
 		   (expression vec4 abs (var_ref x))))
 		 (expression vec4 +
-		  (constant float (1.5707288))
+		  (constant float (1.5707964))
 		  (expression vec4 *
 		   (expression vec4 abs (var_ref x))
 		   (expression vec4 +
-		    (constant float (-0.2121144))
+		    (constant float (-0.21460183))
 		    (expression vec4 *
-		     (constant float (0.0742610))
-		     (expression vec4 abs (var_ref x))))))))))))
+		     (expression vec4 abs (var_ref x))
+                     (expression vec4 +
+                      (constant float (0.086566724))
+                      (expression vec4 *
+                       (expression vec4 abs (var_ref x))
+                       (constant float (-0.03102955))
+		     ))))))))))))
 ))
--- a/src/glsl/builtins/ir/atan
+++ b/src/glsl/builtins/ir/atan
@@ -54,7 +54,9 @@
    )
    (
      (declare () float r)
-      (if (expression bool > (expression float abs (var_ref x)) (constant float (0.000100))) (
+      (if (expression bool >
+           (expression float abs (var_ref x))
+           (expression float * (constant float (1.0e-8)) (expression float abs (var_ref y)))) (
        (assign (x) (var_ref r) (call atan ((expression float / (var_ref y) (var_ref x)))))
        (if (expression bool < (var_ref x) (constant float (0.000000)) ) (
          (if (expression bool >= (var_ref y) (constant float (0.000000)) )
--- a/src/glsl/builtins/ir/radians
+++ b/src/glsl/builtins/ir/radians
@@ -2,20 +2,20 @@
   (signature float
     (parameters
       (declare (in) float arg0))
-     ((return (expression float * (var_ref arg0) (constant float (0.017453))))))
+     ((return (expression float * (var_ref arg0) (constant float (0.0174532925))))))

   (signature vec2
     (parameters
       (declare (in) vec2 arg0))
-     ((return (expression vec2 * (var_ref arg0) (constant float (0.017453))))))
+     ((return (expression vec2 * (var_ref arg0) (constant float (0.0174532925))))))

   (signature vec3
     (parameters
       (declare (in) vec3 arg0))
-     ((return (expression vec3 * (var_ref arg0) (constant float (0.017453))))))
+     ((return (expression vec3 * (var_ref arg0) (constant float (0.0174532925))))))

   (signature vec4
     (parameters
       (declare (in) vec4 arg0))
-     ((return (expression vec4 * (var_ref arg0) (constant float (0.017453))))))
+     ((return (expression vec4 * (var_ref arg0) (constant float (0.0174532925))))))
 ))
--- a/src/glsl/glcpp/glcpp-parse.y
+++ b/src/glsl/glcpp/glcpp-parse.y
@@ -1273,6 +1273,48 @@ _glcpp_parser_expand_if (glcpp_parser_t *parser, int type, token_list_t *list)
 	glcpp_parser_lex_from (parser, expanded);
 }

+static void
+_glcpp_parser_apply_pastes (glcpp_parser_t *parser, token_list_t *list)
+{
+	token_node_t *node;
+
+	node = list->head;
+	while (node)
+	{
+		token_node_t *next_non_space;
+
+		/* Look ahead for a PASTE token, skipping space. */
+		next_non_space = node->next;
+		while (next_non_space && next_non_space->token->type == SPACE)
+			next_non_space = next_non_space->next;
+
+		if (next_non_space == NULL)
+			break;
+
+		if (next_non_space->token->type != PASTE) {
+			node = next_non_space;
+			continue;
+		}
+
+		/* Now find the next non-space token after the PASTE. */
+		next_non_space = next_non_space->next;
+		while (next_non_space && next_non_space->token->type == SPACE)
+			next_non_space = next_non_space->next;
+
+		if (next_non_space == NULL) {
+			yyerror (&node->token->location, parser, "'##' cannot appear at either end of a macro expansion\n");
+			return;
+		}
+
+		node->token = _token_paste (parser, node->token, next_non_space->token);
+		node->next = next_non_space->next;
+		if (next_non_space == list->tail)
+			list->tail = node;
+	}
+
+	list->non_space_tail = list->tail;
+}
+
 /* This is a helper function that's essentially part of the
 * implementation of _glcpp_parser_expand_node. It shouldn't be called
 * except for by that function.
@@ -1384,43 +1426,7 @@ _glcpp_parser_expand_function (glcpp_parser_t *parser,

 	_token_list_trim_trailing_space (substituted);

-	node = substituted->head;
-	while (node)
-	{
-		token_node_t *next_non_space;
-
-		/* Look ahead for a PASTE token, skipping space. */
-		next_non_space = node->next;
-		while (next_non_space && next_non_space->token->type == SPACE)
-			next_non_space = next_non_space->next;
-
-		if (next_non_space == NULL)
-			break;
-
-		if (next_non_space->token->type != PASTE) {
-			node = next_non_space;
-			continue;
-		}
-
-		/* Now find the next non-space token after the PASTE. */
-		next_non_space = next_non_space->next;
-		while (next_non_space && next_non_space->token->type == SPACE)
-			next_non_space = next_non_space->next;
-
-		if (next_non_space == NULL) {
-			yyerror (&node->token->location, parser, "'##' cannot appear at either end of a macro expansion\n");
-			return NULL;
-		}
-
-		node->token = _token_paste (parser, node->token, next_non_space->token);
-		node->next = next_non_space->next;
-		if (next_non_space == substituted->tail)
-			substituted->tail = node;
-
-		node = node->next;
-	}
-
-	substituted->non_space_tail = substituted->tail;
+	_glcpp_parser_apply_pastes (parser, substituted);

 	return substituted;
 }
@@ -1490,13 +1496,16 @@ _glcpp_parser_expand_node (glcpp_parser_t *parser,

 	if (! macro->is_function)
 	{
+		token_list_t *replacement;
 		*last = node;

 		/* Replace a macro defined as empty with a SPACE token. */
 		if (macro->replacements == NULL)
 			return _token_list_create_with_one_space (parser);

-		return _token_list_copy (parser, macro->replacements);
+		replacement = _token_list_copy (parser, macro->replacements);
+		_glcpp_parser_apply_pastes (parser, replacement);
+		return replacement;
 	}

 	return _glcpp_parser_expand_function (parser, node, last);
@@ -1652,8 +1661,8 @@ _check_for_reserved_macro_name (glcpp_parser_t *parser, YYLTYPE *loc,
 	/* According to the GLSL specification, macro names starting with "__"
 	 * or "GL_" are reserved for future use.  So, don't allow them.
 	 */
-	if (strncmp(identifier, "__", 2) == 0) {
-		glcpp_error (loc, parser, "Macro names starting with \"__\" are reserved.\n");
+	if (strstr(identifier, "__")) {
+		glcpp_error (loc, parser, "Macro names containing \"__\" are reserved.\n");
 	}
 	if (strncmp(identifier, "GL_", 3) == 0) {
 		glcpp_error (loc, parser, "Macro names starting with \"GL_\" are reserved.\n");
--- a/src/glsl/glcpp/tests/086-reserved-macro-names.c
+++ b/src/glsl/glcpp/tests/086-reserved-macro-names.c
@@ -1,2 +1,3 @@
 #define __BAD reserved
 #define GL_ALSO_BAD() also reserved
+#define THIS__TOO__IS__BAD reserved
--- a/src/glsl/glcpp/tests/086-reserved-macro-names.c.expected
+++ b/src/glsl/glcpp/tests/086-reserved-macro-names.c.expected
@@ -1,7 +1,10 @@
-0:1(10): preprocessor error: Macro names starting with "__" are reserved.
+0:1(10): preprocessor error: Macro names containing "__" are reserved.

 0:2(9): preprocessor error: Macro names starting with "GL_" are reserved.

+0:3(9): preprocessor error: Macro names containing "__" are reserved.
+
+



--- a/src/glsl/glcpp/tests/097-paste-with-non-function-macro.c
+++ b/src/glsl/glcpp/tests/097-paste-with-non-function-macro.c
@@ -0,0 +1,3 @@
+#define PASTE_MACRO one ## token
+PASTE_MACRO
+
--- a/src/glsl/glcpp/tests/097-paste-with-non-function-macro.c.expected
+++ b/src/glsl/glcpp/tests/097-paste-with-non-function-macro.c.expected
@@ -0,0 +1,4 @@
+
+onetoken
+
+
--- a/src/glsl/glcpp/tests/098-elif-undefined.c
+++ b/src/glsl/glcpp/tests/098-elif-undefined.c
@@ -0,0 +1,3 @@
+#if 0
+#elif UNDEFINED_MACRO
+#endif
--- a/src/glsl/glcpp/tests/098-elif-undefined.c.expected
+++ b/src/glsl/glcpp/tests/098-elif-undefined.c.expected
@@ -0,0 +1,5 @@
+0:2(22): preprocessor error: syntax error, unexpected IDENTIFIER
+0:1(7): preprocessor error: Unterminated #if
+
+
+
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -971,13 +971,9 @@ single_declaration:
 	fully_specified_type
 	{
 	   void *ctx = state;
-	   if ($1->specifier->type_specifier != ast_struct) {
-	      _mesa_glsl_error(& @1, state, "empty declaration list\n");
-	      YYERROR;
-	   } else {
-	      $$ = new(ctx) ast_declarator_list($1);
-	      $$->set_location(yylloc);
-	   }
+	   /* Empty declaration list is valid. */
+	   $$ = new(ctx) ast_declarator_list($1);
+	   $$->set_location(yylloc);
 	}
 	| fully_specified_type any_identifier
 	{
--- a/src/glsl/glsl_parser_extras.h
+++ b/src/glsl/glsl_parser_extras.h
@@ -129,6 +129,12 @@ struct _mesa_glsl_parse_state {
    */
   class ir_function_signature *current_function;

+   /**
+    * During AST to IR conversion, pointer to the toplevel IR
+    * instruction list being generated.
+    */
+   exec_list *toplevel_ir;
+
   /** Have we found a return statement in this function? */
   bool found_return;

--- a/src/glsl/glsl_types.cpp
+++ b/src/glsl/glsl_types.cpp
@@ -523,3 +523,19 @@ glsl_type::component_slots() const
      return 0;
   }
 }
+
+bool
+glsl_type::can_implicitly_convert_to(const glsl_type *desired) const
+{
+   if (this == desired)
+      return true;
+
+   /* There is no conversion among matrix types. */
+   if (this->matrix_columns > 1 || desired->matrix_columns > 1)
+      return false;
+
+   /* int and uint can be converted to float. */
+   return desired->is_float()
+          && this->is_integer()
+          && this->vector_elements == desired->vector_elements;
+}
--- a/src/glsl/glsl_types.h
+++ b/src/glsl/glsl_types.h
@@ -224,6 +224,41 @@ struct glsl_type {
    */
   unsigned component_slots() const;

+   /**
+    * \brief Can this type be implicitly converted to another?
+    *
+    * \return True if the types are identical or if this type can be converted
+    *         to \c desired according to Section 4.1.10 of the GLSL spec.
+    *
+    * \verbatim
+    * From page 25 (31 of the pdf) of the GLSL 1.50 spec, Section 4.1.10
+    * Implicit Conversions:
+    *
+    *     In some situations, an expression and its type will be implicitly
+    *     converted to a different type. The following table shows all allowed
+    *     implicit conversions:
+    *
+    *     Type of expression | Can be implicitly converted to
+    *     --------------------------------------------------
+    *     int                  float
+    *     uint
+    *
+    *     ivec2                vec2
+    *     uvec2
+    *
+    *     ivec3                vec3
+    *     uvec3
+    *
+    *     ivec4                vec4
+    *     uvec4
+    *
+    *     There are no implicit array or structure conversions. For example,
+    *     an array of int cannot be implicitly converted to an array of float.
+    *     There are no implicit conversions between signed and unsigned
+    *     integers.
+    * \endverbatim
+    */
+   bool can_implicitly_convert_to(const glsl_type *desired) const;

   /**
    * Query whether or not a type is a scalar (non-vector and non-matrix).
--- a/src/glsl/ir.cpp
+++ b/src/glsl/ir.cpp
@@ -1097,9 +1097,6 @@ ir_dereference::is_lvalue()
   if ((var == NULL) || var->read_only)
      return false;

-   if (this->type->is_array() && !var->array_lvalue)
-      return false;
-
   /* From page 17 (page 23 of the PDF) of the GLSL 1.20 spec:
    *
    *    "Samplers cannot be treated as l-values; hence cannot be used
@@ -1311,7 +1308,7 @@ ir_swizzle::variable_referenced()
 ir_variable::ir_variable(const struct glsl_type *type, const char *name,
 			 ir_variable_mode mode)
   : max_array_access(0), read_only(false), centroid(false), invariant(false),
-     mode(mode), interpolation(ir_var_smooth), array_lvalue(false)
+     mode(mode), interpolation(ir_var_smooth)
 {
   this->ir_type = ir_type_variable;
   this->type = type;
--- a/src/glsl/ir.h
+++ b/src/glsl/ir.h
@@ -345,14 +345,6 @@ public:
    */
   unsigned interpolation:2;

-   /**
-    * Flag that the whole array is assignable
-    *
-    * In GLSL 1.20 and later whole arrays are assignable (and comparable for
-    * equality).  This flag enables this behavior.
-    */
-   unsigned array_lvalue:1;
-
   /**
    * \name ARB_fragment_coord_conventions
    * @{
@@ -682,7 +674,7 @@ public:

 class ir_assignment : public ir_instruction {
 public:
-   ir_assignment(ir_rvalue *lhs, ir_rvalue *rhs, ir_rvalue *condition);
+   ir_assignment(ir_rvalue *lhs, ir_rvalue *rhs, ir_rvalue *condition = NULL);

   /**
    * Construct an assignment with an explicit write mask
--- a/src/glsl/ir_clone.cpp
+++ b/src/glsl/ir_clone.cpp
@@ -47,7 +47,6 @@ ir_variable::clone(void *mem_ctx, struct hash_table *ht) const
   var->centroid = this->centroid;
   var->invariant = this->invariant;
   var->interpolation = this->interpolation;
-   var->array_lvalue = this->array_lvalue;
   var->location = this->location;
   var->warn_extension = this->warn_extension;
   var->origin_upper_left = this->origin_upper_left;
--- a/src/glsl/ir_function.cpp
+++ b/src/glsl/ir_function.cpp
@@ -24,73 +24,25 @@
 #include "glsl_types.h"
 #include "ir.h"

-int
-type_compare(const glsl_type *a, const glsl_type *b)
-{
-   /* If the types are the same, they trivially match.
-    */
-   if (a == b)
-      return 0;
-
-   switch (a->base_type) {
-   case GLSL_TYPE_UINT:
-   case GLSL_TYPE_INT:
-   case GLSL_TYPE_BOOL:
-      /* There is no implicit conversion to or from integer types or bool.
-       */
-      if ((a->is_integer() != b->is_integer())
-	  || (a->is_boolean() != b->is_boolean()))
-	 return -1;
-
-      /* FALLTHROUGH */
-
-   case GLSL_TYPE_FLOAT:
-      if ((a->vector_elements != b->vector_elements)
-	  || (a->matrix_columns != b->matrix_columns))
-	 return -1;
-
-      return 1;
-
-   case GLSL_TYPE_SAMPLER:
-   case GLSL_TYPE_STRUCT:
-      /* Samplers and structures must match exactly.
-       */
-      return -1;
-
-   case GLSL_TYPE_ARRAY:
-      if ((b->base_type != GLSL_TYPE_ARRAY)
-	  || (a->length != b->length))
-	 return -1;
-
-      /* From GLSL 1.50 spec, page 27 (page 33 of the PDF):
-       *    "There are no implicit array or structure conversions."
-       *
-       * If the comparison of the array element types detects that a conversion
-       * would be required, the array types do not match.
-       */
-      return (type_compare(a->fields.array, b->fields.array) == 0) ? 0 : -1;
-
-   case GLSL_TYPE_VOID:
-   case GLSL_TYPE_ERROR:
-   default:
-      /* These are all error conditions.  It is invalid for a parameter to
-       * a function to be declared as error, void, or a function.
-       */
-      return -1;
-   }
-
-   /* This point should be unreachable.
-    */
-   assert(0);
-}
-
-
+/**
+ * \brief Check if two parameter lists match.
+ *
+ * \param list_a Parameters of the function definition.
+ * \param list_b Actual parameters passed to the function.
+ * \return If an exact match, return 0.
+ *         If an inexact match requiring implicit conversion, return 1.
+ *         If not a match, return -1.
+ * \see matching_signature()
+ */
 static int
 parameter_lists_match(const exec_list *list_a, const exec_list *list_b)
 {
   const exec_node *node_a = list_a->head;
   const exec_node *node_b = list_b->head;
-   int total_score = 0;
+
+   /* This is set to true if there is an inexact match requiring an implicit
+    * conversion. */
+   bool inexact_match = false;

   for (/* empty */
 	; !node_a->is_tail_sentinel()
@@ -106,12 +58,11 @@ parameter_lists_match(const exec_list *list_a, const exec_list *list_b)
      const ir_variable *const param = (ir_variable *) node_a;
      const ir_instruction *const actual = (ir_instruction *) node_b;

-      /* Determine whether or not the types match.  If the types are an
-       * exact match, the match score is zero.  If the types don't match
-       * but the actual parameter can be coerced to the type of the declared
-       * parameter, the match score is one.
-       */
-      int score;
+      if (param->type == actual->type)
+	 continue;
+
+      /* Try to find an implicit conversion from actual to param. */
+      inexact_match = true;
      switch ((enum ir_variable_mode)(param->mode)) {
      case ir_var_auto:
      case ir_var_uniform:
@@ -125,11 +76,13 @@ parameter_lists_match(const exec_list *list_a, const exec_list *list_b)

      case ir_var_const_in:
      case ir_var_in:
-	 score = type_compare(param->type, actual->type);
+	 if (!actual->type->can_implicitly_convert_to(param->type))
+	    return -1;
 	 break;

      case ir_var_out:
-	 score = type_compare(actual->type, param->type);
+	 if (!param->type->can_implicitly_convert_to(actual->type))
+	    return -1;
 	 break;

      case ir_var_inout:
@@ -137,17 +90,12 @@ parameter_lists_match(const exec_list *list_a, const exec_list *list_b)
 	  * there is int -> float but no float -> int), inout parameters must
 	  * be exact matches.
 	  */
-	 score = (type_compare(actual->type, param->type) == 0) ? 0 : -1;
-	 break;
+	 return -1;

      default:
 	 assert(false);
-      }
-
-      if (score < 0)
 	 return -1;
-
-      total_score += score;
+      }
   }

   /* If all of the parameters from the other parameter list have been
@@ -157,7 +105,10 @@ parameter_lists_match(const exec_list *list_a, const exec_list *list_b)
   if (!node_b->is_tail_sentinel())
      return -1;

-   return total_score;
+   if (inexact_match)
+      return 1;
+   else
+      return 0;
 }


--- a/src/glsl/ir_function_detect_recursion.cpp
+++ b/src/glsl/ir_function_detect_recursion.cpp
@@ -125,6 +125,7 @@
 #include "glsl_parser_extras.h"
 #include "linker.h"
 #include "program/hash_table.h"
+#include "program.h"

 struct call_node : public exec_node {
   class function *func;
@@ -311,9 +312,7 @@ emit_errors_linked(const void *key, void *data, void *closure)
 				  f->sig->function_name(),
 				  &f->sig->parameters);

-   linker_error_printf(prog,
-		       "function `%s' has static recursion.\n",
-		       proto);
+   linker_error(prog, "function `%s' has static recursion.\n", proto);
   ralloc_free(proto);
   prog->LinkStatus = false;
 }
--- a/src/glsl/ir_hierarchical_visitor.h
+++ b/src/glsl/ir_hierarchical_visitor.h
@@ -178,6 +178,7 @@ void visit_tree(ir_instruction *ir,
 		void (*callback)(class ir_instruction *ir, void *data),
 		void *data);

-ir_visitor_status visit_list_elements(ir_hierarchical_visitor *v, exec_list *l);
+ir_visitor_status visit_list_elements(ir_hierarchical_visitor *v, exec_list *l,
+                                      bool statement_list = true);

 #endif /* IR_HIERARCHICAL_VISITOR_H */
--- a/src/glsl/ir_hv_accept.cpp
+++ b/src/glsl/ir_hv_accept.cpp
@@ -30,7 +30,13 @@
 */

 /**
- * Process a list of nodes using a hierarchical vistor
+ * Process a list of nodes using a hierarchical vistor.
+ *
+ * If statement_list is true (the default), this is a list of statements, so
+ * v->base_ir will be set to point to each statement just before iterating
+ * over it, and restored after iteration is complete.  If statement_list is
+ * false, this is a list that appears inside a statement (e.g. a parameter
+ * list), so v->base_ir will be left alone.
 *
 * \warning
 * This function will operate correctly if a node being processed is removed
@@ -38,19 +44,22 @@
 * processed, some of the added nodes may not be processed.
 */
 ir_visitor_status
-visit_list_elements(ir_hierarchical_visitor *v, exec_list *l)
+visit_list_elements(ir_hierarchical_visitor *v, exec_list *l,
+                    bool statement_list)
 {
   ir_instruction *prev_base_ir = v->base_ir;

   foreach_list_safe(n, l) {
      ir_instruction *const ir = (ir_instruction *) n;
-      v->base_ir = ir;
+      if (statement_list)
+         v->base_ir = ir;
      ir_visitor_status s = ir->accept(v);

      if (s != visit_continue)
 	 return s;
   }
-   v->base_ir = prev_base_ir;
+   if (statement_list)
+      v->base_ir = prev_base_ir;

   return visit_continue;
 }
@@ -129,7 +138,7 @@ ir_function::accept(ir_hierarchical_visitor *v)
   if (s != visit_continue)
      return (s == visit_continue_with_parent) ? visit_continue : s;

-   s = visit_list_elements(v, &this->signatures);
+   s = visit_list_elements(v, &this->signatures, false);
   return (s == visit_stop) ? s : v->visit_leave(this);
 }

@@ -314,7 +323,7 @@ ir_call::accept(ir_hierarchical_visitor *v)
   if (s != visit_continue)
      return (s == visit_continue_with_parent) ? visit_continue : s;

-   s = visit_list_elements(v, &this->actual_parameters);
+   s = visit_list_elements(v, &this->actual_parameters, false);
   if (s == visit_stop)
      return s;

--- a/src/glsl/ir_variable.cpp
+++ b/src/glsl/ir_variable.cpp
@@ -224,11 +224,11 @@ MATRIX(gl_TextureMatrixInverseTranspose,

 static struct gl_builtin_uniform_element gl_NormalMatrix_elements[] = {
   { NULL, { STATE_MODELVIEW_MATRIX, 0, 0, 0, STATE_MATRIX_INVERSE},
-     SWIZZLE_XYZW },
+     MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z) },
   { NULL, { STATE_MODELVIEW_MATRIX, 0, 1, 1, STATE_MATRIX_INVERSE},
-     SWIZZLE_XYZW },
+     MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z) },
   { NULL, { STATE_MODELVIEW_MATRIX, 0, 2, 2, STATE_MATRIX_INVERSE},
-     SWIZZLE_XYZW },
+     MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z) },
 };

 #undef MATRIX
--- a/src/glsl/link_functions.cpp
+++ b/src/glsl/link_functions.cpp
@@ -91,8 +91,8 @@ public:
      if (sig == NULL) {
 	 /* FINISHME: Log the full signature of unresolved function.
 	  */
-	 linker_error_printf(this->prog, "unresolved reference to function "
-			     "`%s'\n", name);
+	 linker_error(this->prog, "unresolved reference to function `%s'\n",
+		      name);
 	 this->success = false;
 	 return visit_stop;
      }
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -164,7 +164,7 @@ private:


 void
-linker_error_printf(gl_shader_program *prog, const char *fmt, ...)
+linker_error(gl_shader_program *prog, const char *fmt, ...)
 {
   va_list ap;

@@ -172,6 +172,21 @@ linker_error_printf(gl_shader_program *prog, const char *fmt, ...)
   va_start(ap, fmt);
   ralloc_vasprintf_append(&prog->InfoLog, fmt, ap);
   va_end(ap);
+
+   prog->LinkStatus = false;
+}
+
+
+void
+linker_warning(gl_shader_program *prog, const char *fmt, ...)
+{
+   va_list ap;
+
+   ralloc_strcat(&prog->InfoLog, "error: ");
+   va_start(ap, fmt);
+   ralloc_vasprintf_append(&prog->InfoLog, fmt, ap);
+   va_end(ap);
+
 }


@@ -243,8 +258,7 @@ validate_vertex_shader_executable(struct gl_shader_program *prog,
   find_assignment_visitor find("gl_Position");
   find.run(shader->ir);
   if (!find.variable_found()) {
-      linker_error_printf(prog,
-			  "vertex shader does not write to `gl_Position'\n");
+      linker_error(prog, "vertex shader does not write to `gl_Position'\n");
      return false;
   }

@@ -271,8 +285,8 @@ validate_fragment_shader_executable(struct gl_shader_program *prog,
   frag_data.run(shader->ir);

   if (frag_color.variable_found() && frag_data.variable_found()) {
-      linker_error_printf(prog,  "fragment shader writes to both "
-			  "`gl_FragColor' and `gl_FragData'\n");
+      linker_error(prog,  "fragment shader writes to both "
+		   "`gl_FragColor' and `gl_FragData'\n");
      return false;
   }

@@ -357,11 +371,11 @@ cross_validate_globals(struct gl_shader_program *prog,
 		     existing->type = var->type;
 		  }
 	       } else {
-		  linker_error_printf(prog, "%s `%s' declared as type "
-				      "`%s' and type `%s'\n",
-				      mode_string(var),
-				      var->name, var->type->name,
-				      existing->type->name);
+		  linker_error(prog, "%s `%s' declared as type "
+			       "`%s' and type `%s'\n",
+			       mode_string(var),
+			       var->name, var->type->name,
+			       existing->type->name);
 		  return false;
 	       }
 	    }
@@ -369,9 +383,9 @@ cross_validate_globals(struct gl_shader_program *prog,
 	    if (var->explicit_location) {
 	       if (existing->explicit_location
 		   && (var->location != existing->location)) {
-		     linker_error_printf(prog, "explicit locations for %s "
-					 "`%s' have differing values\n",
-					 mode_string(var), var->name);
+		     linker_error(prog, "explicit locations for %s "
+				  "`%s' have differing values\n",
+				  mode_string(var), var->name);
 		     return false;
 	       }

@@ -392,12 +406,12 @@ cross_validate_globals(struct gl_shader_program *prog,
           bool layout_declared = var->depth_layout != ir_depth_layout_none;
           bool layout_differs = var->depth_layout != existing->depth_layout;
           if (layout_declared && layout_differs) {
-              linker_error_printf(prog,
+              linker_error(prog,
                 "All redeclarations of gl_FragDepth in all fragment shaders "
                 "in a single program must have the same set of qualifiers.");
           }
           if (var->used && layout_differs) {
-              linker_error_printf(prog,
+              linker_error(prog,
                    "If gl_FragDepth is redeclared with a layout qualifier in"
                    "any fragment shader, it must be redeclared with the same"
                    "layout qualifier in all fragment shaders that have"
@@ -410,9 +424,9 @@ cross_validate_globals(struct gl_shader_program *prog,
 	    if (var->constant_value != NULL) {
 	       if (existing->constant_value != NULL) {
 		  if (!var->constant_value->has_value(existing->constant_value)) {
-		     linker_error_printf(prog, "initializers for %s "
-					 "`%s' have differing values\n",
-					 mode_string(var), var->name);
+		     linker_error(prog, "initializers for %s "
+				  "`%s' have differing values\n",
+				  mode_string(var), var->name);
 		     return false;
 		  }
 	       } else
@@ -433,15 +447,15 @@ cross_validate_globals(struct gl_shader_program *prog,
 	    }

 	    if (existing->invariant != var->invariant) {
-	       linker_error_printf(prog, "declarations for %s `%s' have "
-	                           "mismatching invariant qualifiers\n",
-	                           mode_string(var), var->name);
+	       linker_error(prog, "declarations for %s `%s' have "
+			    "mismatching invariant qualifiers\n",
+			    mode_string(var), var->name);
 	       return false;
 	    }
            if (existing->centroid != var->centroid) {
-               linker_error_printf(prog, "declarations for %s `%s' have "
-                                   "mismatching centroid qualifiers\n",
-                                   mode_string(var), var->name);
+               linker_error(prog, "declarations for %s `%s' have "
+			    "mismatching centroid qualifiers\n",
+			    mode_string(var), var->name);
               return false;
            }
 	 } else
@@ -529,13 +543,12 @@ cross_validate_outputs_to_inputs(struct gl_shader_program *prog,
 	     */
 	    if (!output->type->is_array()
 		|| (strncmp("gl_", output->name, 3) != 0)) {
-	       linker_error_printf(prog,
-				   "%s shader output `%s' declared as "
-				   "type `%s', but %s shader input declared "
-				   "as type `%s'\n",
-				   producer_stage, output->name,
-				   output->type->name,
-				   consumer_stage, input->type->name);
+	       linker_error(prog,
+			    "%s shader output `%s' declared as type `%s', "
+			    "but %s shader input declared as type `%s'\n",
+			    producer_stage, output->name,
+			    output->type->name,
+			    consumer_stage, input->type->name);
 	       return false;
 	    }
 	 }
@@ -543,40 +556,40 @@ cross_validate_outputs_to_inputs(struct gl_shader_program *prog,
 	 /* Check that all of the qualifiers match between stages.
 	  */
 	 if (input->centroid != output->centroid) {
-	    linker_error_printf(prog,
-				"%s shader output `%s' %s centroid qualifier, "
-				"but %s shader input %s centroid qualifier\n",
-				producer_stage,
-				output->name,
-				(output->centroid) ? "has" : "lacks",
-				consumer_stage,
-				(input->centroid) ? "has" : "lacks");
+	    linker_error(prog,
+			 "%s shader output `%s' %s centroid qualifier, "
+			 "but %s shader input %s centroid qualifier\n",
+			 producer_stage,
+			 output->name,
+			 (output->centroid) ? "has" : "lacks",
+			 consumer_stage,
+			 (input->centroid) ? "has" : "lacks");
 	    return false;
 	 }

 	 if (input->invariant != output->invariant) {
-	    linker_error_printf(prog,
-				"%s shader output `%s' %s invariant qualifier, "
-				"but %s shader input %s invariant qualifier\n",
-				producer_stage,
-				output->name,
-				(output->invariant) ? "has" : "lacks",
-				consumer_stage,
-				(input->invariant) ? "has" : "lacks");
+	    linker_error(prog,
+			 "%s shader output `%s' %s invariant qualifier, "
+			 "but %s shader input %s invariant qualifier\n",
+			 producer_stage,
+			 output->name,
+			 (output->invariant) ? "has" : "lacks",
+			 consumer_stage,
+			 (input->invariant) ? "has" : "lacks");
 	    return false;
 	 }

 	 if (input->interpolation != output->interpolation) {
-	    linker_error_printf(prog,
-				"%s shader output `%s' specifies %s "
-				"interpolation qualifier, "
-				"but %s shader input specifies %s "
-				"interpolation qualifier\n",
-				producer_stage,
-				output->name,
-				output->interpolation_string(),
-				consumer_stage,
-				input->interpolation_string());
+	    linker_error(prog,
+			 "%s shader output `%s' specifies %s "
+			 "interpolation qualifier, "
+			 "but %s shader input specifies %s "
+			 "interpolation qualifier\n",
+			 producer_stage,
+			 output->name,
+			 output->interpolation_string(),
+			 consumer_stage,
+			 input->interpolation_string());
 	    return false;
 	 }
      }
@@ -823,9 +836,8 @@ link_intrastage_shaders(void *mem_ctx,

 	       if ((other_sig != NULL) && other_sig->is_defined
 		   && !other_sig->is_builtin) {
-		  linker_error_printf(prog,
-				      "function `%s' is multiply defined",
-				      f->name);
+		  linker_error(prog, "function `%s' is multiply defined",
+			       f->name);
 		  return NULL;
 	       }
 	    }
@@ -849,9 +861,9 @@ link_intrastage_shaders(void *mem_ctx,
   }

   if (main == NULL) {
-      linker_error_printf(prog, "%s shader lacks `main'\n",
-			  (shader_list[0]->Type == GL_VERTEX_SHADER)
-			  ? "vertex" : "fragment");
+      linker_error(prog, "%s shader lacks `main'\n",
+		   (shader_list[0]->Type == GL_VERTEX_SHADER)
+		   ? "vertex" : "fragment");
      return NULL;
   }

@@ -1309,10 +1321,10 @@ assign_attribute_or_color_locations(gl_shader_program *prog,
 	  * attribute overlaps any previously allocated bits.
 	  */
 	 if ((~(use_mask << attr) & used_locations) != used_locations) {
-	    linker_error_printf(prog,
-				"insufficient contiguous attribute locations "
-				"available for vertex shader input `%s'",
-				var->name);
+	    linker_error(prog,
+			 "insufficient contiguous attribute locations "
+			 "available for vertex shader input `%s'",
+			 var->name);
 	    return false;
 	 }

@@ -1353,11 +1365,10 @@ assign_attribute_or_color_locations(gl_shader_program *prog,

 	 if ((var->location >= (int)(max_index + generic_base))
 	     || (var->location < 0)) {
-	    linker_error_printf(prog,
-				"invalid explicit location %d specified for "
-				"`%s'\n",
-				(var->location < 0) ? var->location : attr,
-				var->name);
+	    linker_error(prog,
+			 "invalid explicit location %d specified for `%s'\n",
+			 (var->location < 0) ? var->location : attr,
+			 var->name);
 	    return false;
 	 } else if (var->location >= generic_base) {
 	    used_locations |= (use_mask << attr);
@@ -1406,10 +1417,10 @@ assign_attribute_or_color_locations(gl_shader_program *prog,
 	 const char *const string = (target_index == MESA_SHADER_VERTEX)
 	    ? "vertex shader input" : "fragment shader output";

-	 linker_error_printf(prog,
-			     "insufficient contiguous attribute locations "
-			     "available for %s `%s'",
-			     string, to_assign[i].var->name);
+	 linker_error(prog,
+		      "insufficient contiguous attribute locations "
+		      "available for %s `%s'",
+		      string, to_assign[i].var->name);
 	 return false;
      }

@@ -1525,9 +1536,8 @@ assign_varying_locations(struct gl_context *ctx,
 	     * "glsl1-varying read but not written" in piglit.
 	     */

-	    linker_error_printf(prog, "fragment shader varying %s not written "
-				"by vertex shader\n.", var->name);
-	    prog->LinkStatus = false;
+	    linker_error(prog, "fragment shader varying %s not written "
+			 "by vertex shader\n.", var->name);
 	 }

 	 /* An 'in' variable is only really a shader input if its
@@ -1544,17 +1554,17 @@ assign_varying_locations(struct gl_context *ctx,

   if (ctx->API == API_OPENGLES2 || prog->Version == 100) {
      if (varying_vectors > ctx->Const.MaxVarying) {
-	 linker_error_printf(prog, "shader uses too many varying vectors "
-			     "(%u > %u)\n",
-			     varying_vectors, ctx->Const.MaxVarying);
+	 linker_error(prog, "shader uses too many varying vectors "
+		      "(%u > %u)\n",
+		      varying_vectors, ctx->Const.MaxVarying);
 	 return false;
      }
   } else {
      const unsigned float_components = varying_vectors * 4;
      if (float_components > ctx->Const.MaxVarying * 4) {
-	 linker_error_printf(prog, "shader uses too many varying components "
-			     "(%u > %u)\n",
-			     float_components, ctx->Const.MaxVarying * 4);
+	 linker_error(prog, "shader uses too many varying components "
+		      "(%u > %u)\n",
+		      float_components, ctx->Const.MaxVarying * 4);
 	 return false;
      }
   }
@@ -1618,8 +1628,8 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
   assert(max_version <= 130);
   if ((max_version >= 130 || min_version == 100)
       && min_version != max_version) {
-      linker_error_printf(prog, "all shaders must use same shading "
-			  "language version\n");
+      linker_error(prog, "all shaders must use same shading "
+		   "language version\n");
      goto done;
   }

@@ -1720,12 +1730,10 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
    * FINISHME: at least 16, so hardcode 16 for now.
    */
   if (!assign_attribute_or_color_locations(prog, MESA_SHADER_VERTEX, 16)) {
-      prog->LinkStatus = false;
      goto done;
   }

   if (!assign_attribute_or_color_locations(prog, MESA_SHADER_FRAGMENT, ctx->Const.MaxDrawBuffers)) {
-      prog->LinkStatus = false;
      goto done;
   }

@@ -1742,7 +1750,6 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
      if (!assign_varying_locations(ctx, prog,
 				    prog->_LinkedShaders[prev],
 				    prog->_LinkedShaders[i])) {
-	 prog->LinkStatus = false;
 	 goto done;
      }

@@ -1774,11 +1781,9 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
    */
   if (ctx->API == API_OPENGLES2 || prog->Version == 100) {
      if (prog->_LinkedShaders[MESA_SHADER_VERTEX] == NULL) {
-	 linker_error_printf(prog, "program lacks a vertex shader\n");
-	 prog->LinkStatus = false;
+	 linker_error(prog, "program lacks a vertex shader\n");
      } else if (prog->_LinkedShaders[MESA_SHADER_FRAGMENT] == NULL) {
-	 linker_error_printf(prog, "program lacks a fragment shader\n");
-	 prog->LinkStatus = false;
+	 linker_error(prog, "program lacks a fragment shader\n");
      }
   }

--- a/src/glsl/linker.h
+++ b/src/glsl/linker.h
@@ -25,9 +25,6 @@
 #ifndef GLSL_LINKER_H
 #define GLSL_LINKER_H

-extern void
-linker_error_printf(gl_shader_program *prog, const char *fmt, ...);
-
 extern bool
 link_function_calls(gl_shader_program *prog, gl_shader *main,
 		    gl_shader **shader_list, unsigned num_shaders);
--- a/src/glsl/lower_instructions.cpp
+++ b/src/glsl/lower_instructions.cpp
@@ -166,6 +166,10 @@ lower_instructions_visitor::div_to_mul_rcp(ir_expression *ir)
      else
 	 op0 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[0], NULL);

+      vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
+					 ir->type->vector_elements,
+					 ir->type->matrix_columns);
+
      op0 = new(ir) ir_expression(ir_binop_mul, vec_type, op0, op1);

      ir->operation = ir_unop_f2i;
--- a/src/glsl/lower_jumps.cpp
+++ b/src/glsl/lower_jumps.cpp
@@ -146,16 +146,17 @@ struct function_record
   ir_function_signature* signature;
   ir_variable* return_flag; /* used to break out of all loops and then jump to the return instruction */
   ir_variable* return_value;
-   bool is_main;
+   bool lower_return;
   unsigned nesting_depth;

-   function_record(ir_function_signature* p_signature = 0)
+   function_record(ir_function_signature* p_signature = 0,
+                   bool lower_return = false)
   {
      this->signature = p_signature;
      this->return_flag = 0;
      this->return_value = 0;
      this->nesting_depth = 0;
-      this->is_main = this->signature && (strcmp(this->signature->function_name(), "main") == 0);
+      this->lower_return = lower_return;
   }

   ir_variable* get_return_flag()
@@ -218,6 +219,87 @@ struct ir_lower_jumps_visitor : public ir_control_flow_visitor {
      }
   }

+   /**
+    * Insert the instructions necessary to lower a return statement,
+    * before the given return instruction.
+    */
+   void insert_lowered_return(ir_return *ir)
+   {
+      ir_variable* return_flag = this->function.get_return_flag();
+      if(!this->function.signature->return_type->is_void()) {
+         ir_variable* return_value = this->function.get_return_value();
+         ir->insert_before(
+            new(ir) ir_assignment(
+               new (ir) ir_dereference_variable(return_value),
+               ir->value));
+      }
+      ir->insert_before(
+         new(ir) ir_assignment(
+            new (ir) ir_dereference_variable(return_flag),
+            new (ir) ir_constant(true)));
+      this->loop.may_set_return_flag = true;
+   }
+
+   /**
+    * If the given instruction is a return, lower it to instructions
+    * that store the return value (if there is one), set the return
+    * flag, and then break.
+    *
+    * It is safe to pass NULL to this function.
+    */
+   void lower_return_unconditionally(ir_instruction *ir)
+   {
+      if (get_jump_strength(ir) != strength_return) {
+         return;
+      }
+      insert_lowered_return((ir_return*)ir);
+      ir->replace_with(new(ir) ir_loop_jump(ir_loop_jump::jump_break));
+   }
+
+   /**
+    * Create the necessary instruction to replace a break instruction.
+    */
+   ir_instruction *create_lowered_break()
+   {
+      void *ctx = this->function.signature;
+      return new(ctx) ir_assignment(
+          new(ctx) ir_dereference_variable(this->loop.get_break_flag()),
+          new(ctx) ir_constant(true),
+          0);
+   }
+
+   /**
+    * If the given instruction is a break, lower it to an instruction
+    * that sets the break flag, without consulting
+    * should_lower_jump().
+    *
+    * It is safe to pass NULL to this function.
+    */
+   void lower_break_unconditionally(ir_instruction *ir)
+   {
+      if (get_jump_strength(ir) != strength_break) {
+         return;
+      }
+      ir->replace_with(create_lowered_break());
+   }
+
+   /**
+    * If the block ends in a conditional or unconditional break, lower
+    * it, even though should_lower_jump() says it needn't be lowered.
+    */
+   void lower_final_breaks(exec_list *block)
+   {
+      ir_instruction *ir = (ir_instruction *) block->get_tail();
+      lower_break_unconditionally(ir);
+      ir_if *ir_if = ir->as_if();
+      if (ir_if) {
+          lower_break_unconditionally(
+              (ir_instruction *) ir_if->then_instructions.get_tail());
+          lower_break_unconditionally(
+              (ir_instruction *) ir_if->else_instructions.get_tail());
+      }
+   }
+
   virtual void visit(class ir_loop_jump * ir)
   {
      truncate_after_instruction(ir);
@@ -274,10 +356,8 @@ struct ir_lower_jumps_visitor : public ir_control_flow_visitor {
         /* never lower return at the end of a this->function */
         if(this->function.nesting_depth == 0 && ir->get_next()->is_tail_sentinel())
            lower = false;
-         else if (this->function.is_main)
-            lower = lower_main_return;
         else
-            lower = lower_sub_return;
+            lower = this->function.lower_return;
         break;
      }
      return lower;
@@ -285,9 +365,20 @@ struct ir_lower_jumps_visitor : public ir_control_flow_visitor {

   block_record visit_block(exec_list* list)
   {
+      /* Note: since visiting a node may change that node's next
+       * pointer, we can't use visit_exec_list(), because
+       * visit_exec_list() caches the node's next pointer before
+       * visiting it.  So we use foreach_list() instead.
+       *
+       * foreach_list() isn't safe if the node being visited gets
+       * removed, but fortunately this visitor doesn't do that.
+       */
+
      block_record saved_block = this->block;
      this->block = block_record();
-      visit_exec_list(list, this);
+      foreach_list(node, list) {
+         ((ir_instruction *) node)->accept(this);
+      }
      block_record ret = this->block;
      this->block = saved_block;
      return ret;
@@ -370,13 +461,12 @@ retry: /* we get here if we put code after the if inside a branch */
            break;

         if(jump_strengths[lower] == strength_return) {
-            ir_variable* return_flag = this->function.get_return_flag();
-            if(!this->function.signature->return_type->is_void()) {
-               ir_variable* return_value = this->function.get_return_value();
-               jumps[lower]->insert_before(new(ir) ir_assignment(new (ir) ir_dereference_variable(return_value), ((ir_return*)jumps[lower])->value, NULL));
-            }
-            jumps[lower]->insert_before(new(ir) ir_assignment(new (ir) ir_dereference_variable(return_flag), new (ir) ir_constant(true), NULL));
-            this->loop.may_set_return_flag = true;
+            /* To lower a return, we create a return flag (if the
+             * function doesn't have one already) and add instructions
+             * that: 1. store the return value (if this function has a
+             * non-void return) and 2. set the return flag
+             */
+            insert_lowered_return((ir_return*)jumps[lower]);
            if(this->loop.loop) {
               ir_loop_jump* lowered = 0;
               lowered = new(ir) ir_loop_jump(ir_loop_jump::jump_break);
@@ -395,7 +485,7 @@ retry: /* we get here if we put code after the if inside a branch */
             * Smarter options (such as undoing the increment) are possible but it's not worth implementing them,
             * because if break is lowered, continue is almost surely lowered too.
             */
-            jumps[lower]->insert_before(new(ir) ir_assignment(new (ir) ir_dereference_variable(this->loop.get_break_flag()), new (ir) ir_constant(true), 0));
+            jumps[lower]->insert_before(create_lowered_break());
            goto lower_continue;
         } else if(jump_strengths[lower] == strength_continue) {
 lower_continue:
@@ -405,7 +495,10 @@ lower_continue:
            block_records[lower].min_strength = strength_always_clears_execute_flag;
            block_records[lower].may_clear_execute_flag = true;
            this->progress = true;
-            break;
+
+            /* Let the loop run again, in case the other branch of the
+             * if needs to be lowered too.
+             */
         }
      }

@@ -497,13 +590,50 @@ lower_continue:
      loop_record saved_loop = this->loop;
      this->loop = loop_record(this->function.signature, ir);

+      /* Recursively lower nested jumps.  This satisfies the
+       * CONTAINED_JUMPS_LOWERED postcondition, except in the case of
+       * an unconditional continue or return at the bottom of the
+       * loop, which are handled below.
+       */
      block_record body = visit_block(&ir->body_instructions);

+      /* If the loop ends in an unconditional continue, eliminate it
+       * because it is redundant.
+       */
+      ir_instruction *ir_last
+         = (ir_instruction *) ir->body_instructions.get_tail();
+      if (get_jump_strength(ir_last) == strength_continue) {
+         ir_last->remove();
+      }
+
+      /* If the loop ends in an unconditional return, and we are
+       * lowering returns, lower it.
+       */
+      if (this->function.lower_return)
+         lower_return_unconditionally(ir_last);
+
      if(body.min_strength >= strength_break) {
         /* FINISHME: turn the this->loop into an if, or replace it with its body */
      }

      if(this->loop.break_flag) {
+         /* We only get here if we are lowering breaks */
+         assert (lower_break);
+
+         /* If a break flag was generated while visiting the body of
+          * the loop, then at least one break was lowered, so we need
+          * to generate an if statement at the end of the loop that
+          * does a "break" if the break flag is set.  The break we
+          * generate won't violate the CONTAINED_JUMPS_LOWERED
+          * postcondition, because should_lower_jump() always returns
+          * false for a break that happens at the end of a loop.
+          *
+          * However, if the loop already ends in a conditional or
+          * unconditional break, then we need to lower that break,
+          * because it won't be at the end of the loop anymore.
+          */
+         lower_final_breaks(&ir->body_instructions);
+
         ir_if* break_if = new(ir) ir_if(new(ir) ir_dereference_variable(this->loop.break_flag));
         break_if->then_instructions.push_tail(new(ir) ir_loop_jump(ir_loop_jump::jump_break));
         ir->body_instructions.push_tail(break_if);
@@ -530,14 +660,34 @@ lower_continue:
      assert(!this->function.signature);
      assert(!this->loop.loop);

+      bool lower_return;
+      if (strcmp(ir->function_name(), "main") == 0)
+         lower_return = lower_main_return;
+      else
+         lower_return = lower_sub_return;
+
      function_record saved_function = this->function;
      loop_record saved_loop = this->loop;
-      this->function = function_record(ir);
+      this->function = function_record(ir, lower_return);
      this->loop = loop_record(ir);

      assert(!this->loop.loop);
      visit_block(&ir->body);

+      /* If the body ended in an unconditional return of non-void,
+       * then we don't need to lower it because it's the one canonical
+       * return.
+       *
+       * If the body ended in a return of void, eliminate it because
+       * it is redundant.
+       */
+      if (ir->return_type->is_void() &&
+          get_jump_strength((ir_instruction *) ir->body.get_tail())) {
+         ir_jump *jump = (ir_jump *) ir->body.get_tail();
+         assert (jump->ir_type == ir_type_return);
+         jump->remove();
+      }
+
      if(this->function.return_value)
         ir->body.push_tail(new(ir) ir_return(new (ir) ir_dereference_variable(this->function.return_value)));

--- a/src/glsl/lower_variable_index_to_cond_assign.cpp
+++ b/src/glsl/lower_variable_index_to_cond_assign.cpp
@@ -29,6 +29,21 @@
 *
 * Pre-DX10 GPUs often don't have a native way to do this operation,
 * and this works around that.
+ *
+ * The lowering process proceeds as follows.  Each non-constant index
+ * found in an r-value is converted to a canonical form \c array[i].  Each
+ * element of the array is conditionally assigned to a temporary by comparing
+ * \c i to a constant index.  This is done by cloning the canonical form and
+ * replacing all occurances of \c i with a constant.  Each remaining occurance
+ * of the canonical form in the IR is replaced with a dereference of the
+ * temporary variable.
+ *
+ * L-values with non-constant indices are handled similarly.  In this case,
+ * the RHS of the assignment is assigned to a temporary.  The non-constant
+ * index is replace with the canonical form (just like for r-values).  The
+ * temporary is conditionally assigned to each element of the canonical form
+ * by comparing \c i with each index.  The same clone-and-replace scheme is
+ * used.
 */

 #include "ir.h"
@@ -37,10 +52,76 @@
 #include "glsl_types.h"
 #include "main/macros.h"

+static inline bool
+is_array_or_matrix(const ir_instruction *ir)
+{
+   return (ir->type->is_array() || ir->type->is_matrix());
+}
+
+/**
+ * Replace a dereference of a variable with a specified r-value
+ *
+ * Each time a dereference of the specified value is replaced, the r-value
+ * tree is cloned.
+ */
+class deref_replacer : public ir_rvalue_visitor {
+public:
+   deref_replacer(const ir_variable *variable_to_replace, ir_rvalue *value)
+      : variable_to_replace(variable_to_replace), value(value),
+	progress(false)
+   {
+      assert(this->variable_to_replace != NULL);
+      assert(this->value != NULL);
+   }
+
+   virtual void handle_rvalue(ir_rvalue **rvalue)
+   {
+      ir_dereference_variable *const dv = (*rvalue)->as_dereference_variable();
+
+      if ((dv != NULL) && (dv->var == this->variable_to_replace)) {
+	 this->progress = true;
+	 *rvalue = this->value->clone(ralloc_parent(*rvalue), NULL);
+      }
+   }
+
+   const ir_variable *variable_to_replace;
+   ir_rvalue *value;
+   bool progress;
+};
+
+/**
+ * Find a variable index dereference of an array in an rvalue tree
+ */
+class find_variable_index : public ir_hierarchical_visitor {
+public:
+   find_variable_index()
+      : deref(NULL)
+   {
+      /* empty */
+   }
+
+   virtual ir_visitor_status visit_enter(ir_dereference_array *ir)
+   {
+      if (is_array_or_matrix(ir->array)
+	  && (ir->array_index->as_constant() == NULL)) {
+	 this->deref = ir;
+	 return visit_stop;
+      }
+
+      return visit_continue;
+   }
+
+   /**
+    * First array dereference found in the tree that has a non-constant index.
+    */
+   ir_dereference_array *deref;
+};
+
 struct assignment_generator
 {
   ir_instruction* base_ir;
-   ir_rvalue* array;
+   ir_dereference *rvalue;
+   ir_variable *old_index;
   bool is_write;
   unsigned int write_mask;
   ir_variable* var;
@@ -55,18 +136,23 @@ struct assignment_generator
       * underlying variable.
       */
      void *mem_ctx = ralloc_parent(base_ir);
-      ir_dereference *element =
-	 new(mem_ctx) ir_dereference_array(this->array->clone(mem_ctx, NULL),
-					   new(mem_ctx) ir_constant(i));
-      ir_rvalue *variable = new(mem_ctx) ir_dereference_variable(this->var);

-      ir_assignment *assignment;
-      if (is_write) {
-	 assignment = new(mem_ctx) ir_assignment(element, variable, condition,
-						 write_mask);
-      } else {
-	 assignment = new(mem_ctx) ir_assignment(variable, element, condition);
-      }
+      /* Clone the old r-value in its entirety.  Then replace any occurances of
+       * the old variable index with the new constant index.
+       */
+      ir_dereference *element = this->rvalue->clone(mem_ctx, NULL);
+      ir_constant *const index = new(mem_ctx) ir_constant(i);
+      deref_replacer r(this->old_index, index);
+      element->accept(&r);
+      assert(r.progress);
+
+      /* Generate a conditional assignment to (or from) the constant indexed
+       * array dereference.
+       */
+      ir_rvalue *variable = new(mem_ctx) ir_dereference_variable(this->var);
+      ir_assignment *const assignment = (is_write)
+	 ? new(mem_ctx) ir_assignment(element, variable, condition, write_mask)
+	 : new(mem_ctx) ir_assignment(variable, element, condition);

      list->push_tail(assignment);
   }
@@ -233,21 +319,18 @@ public:
   bool lower_temps;
   bool lower_uniforms;

-   bool is_array_or_matrix(const ir_instruction *ir) const
+   bool storage_type_needs_lowering(ir_dereference_array *deref) const
   {
-      return (ir->type->is_array() || ir->type->is_matrix());
-   }
-
-   bool needs_lowering(ir_dereference_array *deref) const
-   {
-      if (deref == NULL || deref->array_index->as_constant()
-	  || !is_array_or_matrix(deref->array))
-	 return false;
-
-      if (deref->array->ir_type == ir_type_constant)
+      /* If a variable isn't eventually the target of this dereference, then
+       * it must be a constant or some sort of anonymous temporary storage.
+       *
+       * FINISHME: Is this correct?  Most drivers treat arrays of constants as
+       * FINISHME: uniforms.  It seems like this should do the same.
+       */
+      const ir_variable *const var = deref->array->variable_referenced();
+      if (var == NULL)
 	 return this->lower_temps;

-      const ir_variable *const var = deref->array->variable_referenced();
      switch (var->mode) {
      case ir_var_auto:
      case ir_var_temporary:
@@ -267,8 +350,18 @@ public:
      return false;
   }

+   bool needs_lowering(ir_dereference_array *deref) const
+   {
+      if (deref == NULL || deref->array_index->as_constant()
+	  || !is_array_or_matrix(deref->array))
+	 return false;
+
+      return this->storage_type_needs_lowering(deref);
+   }
+
   ir_variable *convert_dereference_array(ir_dereference_array *orig_deref,
-					  ir_assignment* orig_assign)
+					  ir_assignment* orig_assign,
+					  ir_dereference *orig_base)
   {
      assert(is_array_or_matrix(orig_deref->array));

@@ -314,9 +407,12 @@ public:
 	 new(mem_ctx) ir_assignment(lhs, orig_deref->array_index, NULL);
      base_ir->insert_before(assign);

+      orig_deref->array_index = lhs->clone(mem_ctx, NULL);
+
      assignment_generator ag;
-      ag.array = orig_deref->array;
+      ag.rvalue = orig_base;
      ag.base_ir = base_ir;
+      ag.old_index = index;
      ag.var = var;
      if (orig_assign) {
 	 ag.is_write = true;
@@ -327,21 +423,40 @@ public:

      switch_generator sg(ag, index, 4, 4);

-      exec_list list;
-      sg.generate(0, length, &list);
-      base_ir->insert_before(&list);
+      /* If the original assignment has a condition, respect that original
+       * condition!  This is acomplished by wrapping the new conditional
+       * assignments in an if-statement that uses the original condition.
+       */
+      if ((orig_assign != NULL) && (orig_assign->condition != NULL)) {
+	 /* No need to clone the condition because the IR that it hangs on is
+	  * going to be removed from the instruction sequence.
+	  */
+	 ir_if *if_stmt = new(mem_ctx) ir_if(orig_assign->condition);
+
+	 sg.generate(0, length, &if_stmt->then_instructions);
+	 base_ir->insert_before(if_stmt);
+      } else {
+	 exec_list list;
+
+	 sg.generate(0, length, &list);
+	 base_ir->insert_before(&list);
+      }

      return var;
   }

   virtual void handle_rvalue(ir_rvalue **pir)
   {
+      if (this->in_assignee)
+	 return;
+
      if (!*pir)
         return;

      ir_dereference_array* orig_deref = (*pir)->as_dereference_array();
      if (needs_lowering(orig_deref)) {
-         ir_variable* var = convert_dereference_array(orig_deref, 0);
+         ir_variable *var =
+	    convert_dereference_array(orig_deref, NULL, orig_deref);
         assert(var);
         *pir = new(ralloc_parent(base_ir)) ir_dereference_variable(var);
         this->progress = true;
@@ -353,10 +468,11 @@ public:
   {
      ir_rvalue_visitor::visit_leave(ir);

-      ir_dereference_array *orig_deref = ir->lhs->as_dereference_array();
+      find_variable_index f;
+      ir->lhs->accept(&f);

-      if (needs_lowering(orig_deref)) {
-         convert_dereference_array(orig_deref, ir);
+      if ((f.deref != NULL) && storage_type_needs_lowering(f.deref)) {
+         convert_dereference_array(f.deref, ir, ir->lhs);
         ir->remove();
         this->progress = true;
      }
@@ -377,7 +493,17 @@ lower_variable_index_to_cond_assign(exec_list *instructions,
 					   lower_temp,
 					   lower_uniform);

-   visit_list_elements(&v, instructions);
+   /* Continue lowering until no progress is made.  If there are multiple
+    * levels of indirection (e.g., non-constant indexing of array elements and
+    * matrix columns of an array of matrix), each pass will only lower one
+    * level of indirection.
+    */
+   bool progress_ever = false;
+   do {
+      v.progress = false;
+      visit_list_elements(&v, instructions);
+      progress_ever = v.progress || progress_ever;
+   } while (v.progress);

-   return v.progress;
+   return progress_ever;
 }
--- a/src/glsl/lower_vec_index_to_cond_assign.cpp
+++ b/src/glsl/lower_vec_index_to_cond_assign.cpp
@@ -171,21 +171,23 @@ ir_vec_index_to_cond_assign_visitor::visit_leave(ir_assignment *ir)

   assert(orig_deref->array_index->type->base_type == GLSL_TYPE_INT);

+   exec_list list;
+
   /* Store the index to a temporary to avoid reusing its tree. */
   index = new(ir) ir_variable(glsl_type::int_type, "vec_index_tmp_i",
 			       ir_var_temporary);
-   ir->insert_before(index);
+   list.push_tail(index);
   deref = new(ir) ir_dereference_variable(index);
   assign = new(ir) ir_assignment(deref, orig_deref->array_index, NULL);
-   ir->insert_before(assign);
+   list.push_tail(assign);

   /* Store the RHS to a temporary to avoid reusing its tree. */
   var = new(ir) ir_variable(ir->rhs->type, "vec_index_tmp_v",
 			     ir_var_temporary);
-   ir->insert_before(var);
+   list.push_tail(var);
   deref = new(ir) ir_dereference_variable(var);
   assign = new(ir) ir_assignment(deref, ir->rhs, NULL);
-   ir->insert_before(assign);
+   list.push_tail(assign);

   /* Generate a conditional move of each vector element to the temp. */
   for (i = 0; i < orig_deref->array->type->vector_elements; i++) {
@@ -205,8 +207,25 @@ ir_vec_index_to_cond_assign_visitor::visit_leave(ir_assignment *ir)

      deref = new(ir) ir_dereference_variable(var);
      assign = new(ir) ir_assignment(swizzle, deref, condition);
-      ir->insert_before(assign);
+      list.push_tail(assign);
   }
+
+   /* If the original assignment has a condition, respect that original
+    * condition!  This is acomplished by wrapping the new conditional
+    * assignments in an if-statement that uses the original condition.
+    */
+   if (ir->condition != NULL) {
+      /* No need to clone the condition because the IR that it hangs on is
+       * going to be removed from the instruction sequence.
+       */
+      ir_if *if_stmt = new(mem_ctx) ir_if(ir->condition);
+
+      list.move_nodes_to(&if_stmt->then_instructions);
+      ir->insert_before(if_stmt);
+   } else {
+      ir->insert_before(&list);
+   }
+
   ir->remove();

   this->progress = true;
--- a/src/glsl/main.cpp
+++ b/src/glsl/main.cpp
@@ -275,6 +275,7 @@ main(int argc, char **argv)

   whole_program = rzalloc (NULL, struct gl_shader_program);
   assert(whole_program != NULL);
+   whole_program->InfoLog = ralloc_strdup(whole_program, "");

   for (/* empty */; argc > optind; optind++) {
      whole_program->Shaders =
--- a/src/glsl/opt_dead_functions.cpp
+++ b/src/glsl/opt_dead_functions.cpp
@@ -50,7 +50,6 @@ public:
   ir_dead_functions_visitor()
   {
      this->mem_ctx = ralloc_context(NULL);
-      this->seen_another_function_signature = false;
   }

   ~ir_dead_functions_visitor()
@@ -65,8 +64,6 @@ public:

   bool (*predicate)(ir_instruction *ir);

-   bool seen_another_function_signature;
-
   /* List of signature_entry */
   exec_list signature_list;
   void *mem_ctx;
@@ -97,13 +94,7 @@ ir_dead_functions_visitor::visit_enter(ir_function_signature *ir)
      entry->used = true;
   }

-   /* If this is the first signature to look at, no need to descend to see
-    * if it has calls to another function signature.
-    */
-   if (!this->seen_another_function_signature) {
-      this->seen_another_function_signature = true;
-      return visit_continue_with_parent;
-   }
+

   return visit_continue;
 }
--- a/src/glsl/program.h
+++ b/src/glsl/program.h
@@ -25,3 +25,11 @@

 extern void
 link_shaders(struct gl_context *ctx, struct gl_shader_program *prog);
+
+extern void
+linker_error(gl_shader_program *prog, const char *fmt, ...)
+   PRINTFLIKE(2, 3);
+
+extern void
+linker_warning(gl_shader_program *prog, const char *fmt, ...)
+   PRINTFLIKE(2, 3);
--- a/src/glsl/s_expression.cpp
+++ b/src/glsl/s_expression.cpp
@@ -25,10 +25,13 @@
 #include <assert.h>
 #include "s_expression.h"

-s_symbol::s_symbol(const char *tmp, size_t n)
+s_symbol::s_symbol(const char *str, size_t n)
 {
-   this->str = ralloc_strndup (this, tmp, n);
-   assert(this->str != NULL);
+   /* Assume the given string is already nul-terminated and in memory that
+    * will live as long as this node.
+    */
+   assert(str[n] == '\0');
+   this->str = str;
 }

 s_list::s_list()
@@ -36,22 +39,26 @@ s_list::s_list()
 }

 static void
-skip_whitespace(const char *& src)
+skip_whitespace(const char *&src, char *&symbol_buffer)
 {
-   src += strspn(src, " \v\t\r\n");
+   size_t n = strspn(src, " \v\t\r\n");
+   src += n;
+   symbol_buffer += n;
   /* Also skip Scheme-style comments: semi-colon 'til end of line */
   if (src[0] == ';') {
-      src += strcspn(src, "\n");
-      skip_whitespace(src);
+      n = strcspn(src, "\n");
+      src += n;
+      symbol_buffer += n;
+      skip_whitespace(src, symbol_buffer);
   }
 }

 static s_expression *
-read_atom(void *ctx, const char *& src)
+read_atom(void *ctx, const char *&src, char *&symbol_buffer)
 {
   s_expression *expr = NULL;

-   skip_whitespace(src);
+   skip_whitespace(src, symbol_buffer);

   size_t n = strcspn(src, "( \v\t\r\n);");
   if (n == 0)
@@ -70,42 +77,63 @@ read_atom(void *ctx, const char *& src)
 	 expr = new(ctx) s_int(i);
   } else {
      // Not a number; return a symbol.
-      expr = new(ctx) s_symbol(src, n);
+      symbol_buffer[n] = '\0';
+      expr = new(ctx) s_symbol(symbol_buffer, n);
   }

   src += n;
+   symbol_buffer += n;

   return expr;
 }

+static s_expression *
+__read_expression(void *ctx, const char *&src, char *&symbol_buffer)
+{
+   s_expression *atom = read_atom(ctx, src, symbol_buffer);
+   if (atom != NULL)
+      return atom;
+
+   skip_whitespace(src, symbol_buffer);
+   if (src[0] == '(') {
+      ++src;
+      ++symbol_buffer;
+
+      s_list *list = new(ctx) s_list;
+      s_expression *expr;
+
+      while ((expr = __read_expression(ctx, src, symbol_buffer)) != NULL) {
+	 list->subexpressions.push_tail(expr);
+      }
+      skip_whitespace(src, symbol_buffer);
+      if (src[0] != ')') {
+	 printf("Unclosed expression (check your parenthesis).\n");
+	 return NULL;
+      }
+      ++src;
+      ++symbol_buffer;
+      return list;
+   }
+   return NULL;
+}
+
 s_expression *
 s_expression::read_expression(void *ctx, const char *&src)
 {
   assert(src != NULL);

-   s_expression *atom = read_atom(ctx, src);
-   if (atom != NULL)
-      return atom;
-
-   skip_whitespace(src);
-   if (src[0] == '(') {
-      ++src;
-
-      s_list *list = new(ctx) s_list;
-      s_expression *expr;
-
-      while ((expr = read_expression(ctx, src)) != NULL) {
-	 list->subexpressions.push_tail(expr);
-      }
-      skip_whitespace(src);
-      if (src[0] != ')') {
-	 printf("Unclosed expression (check your parenthesis).\n");
-	 return NULL;
-      }
-      ++src;
-      return list;
-   }
-   return NULL;
+   /* When we encounter a Symbol, we need to save a nul-terminated copy of
+    * the string.  However, ralloc_strndup'ing every individual Symbol is
+    * extremely expensive.  We could avoid this by simply overwriting the
+    * next character (guaranteed to be whitespace, parens, or semicolon) with
+    * a nul-byte.  But overwriting non-whitespace would mess up parsing.
+    *
+    * So, just copy the whole buffer ahead of time.  Walk both, leaving the
+    * original source string unmodified, and altering the copy to contain the
+    * necessary nul-bytes whenever we encounter a symbol.
+    */
+   char *symbol_buffer = ralloc_strdup(ctx, src);
+   return __read_expression(ctx, src, symbol_buffer);
 }

 void s_int::print()
--- a/src/glsl/s_expression.h
+++ b/src/glsl/s_expression.h
@@ -129,7 +129,7 @@ public:
   void print();

 private:
-   char *str;
+   const char *str;
 };

 /* Lists of expressions: (expr1 ... exprN) */
--- a/src/glx/apple/apple_glapi.c
+++ b/src/glx/apple/apple_glapi.c
@@ -49,11 +49,9 @@
 struct _glapi_table * __ogl_framework_api = NULL;
 struct _glapi_table * __applegl_api = NULL;

-void apple_glapi_set_dispatch(void) {
-    if(__applegl_api)  {
-        _glapi_set_dispatch(__applegl_api);
+static void _apple_glapi_create_table(void) {
+    if (__applegl_api)
        return;
-    }

    __ogl_framework_api = _glapi_create_table_from_handle(apple_cgl_get_dl_handle(), "gl");
    assert(__ogl_framework_api);
@@ -68,6 +66,15 @@ void apple_glapi_set_dispatch(void) {
    SET_DrawBuffer(__applegl_api, __applegl_glDrawBuffer);
    SET_DrawBuffersARB(__applegl_api, __applegl_glDrawBuffersARB);
    SET_Viewport(__applegl_api, __applegl_glViewport);
+}

+void apple_glapi_set_dispatch(void) {
+    _apple_glapi_create_table();
    _glapi_set_dispatch(__applegl_api);
 }
+
+void apple_glapi_oglfw_viewport_scissor(GLint x, GLint y, GLsizei width, GLsizei height) {
+    _apple_glapi_create_table();
+    __ogl_framework_api->Viewport(x, y, width, height);
+    __ogl_framework_api->Scissor(x, y, width, height);
+}
--- a/src/glx/apple/apple_glx.h
+++ b/src/glx/apple/apple_glx.h
@@ -46,5 +46,6 @@ void apple_glx_waitx(Display * dpy, void *ptr);
 int apple_get_dri_event_base(void);

 void apple_glapi_set_dispatch(void);
+void apple_glapi_oglfw_viewport_scissor(GLint x, GLint y, GLsizei width, GLsizei height);

 #endif
--- a/src/glx/apple/apple_glx_pbuffer.c
+++ b/src/glx/apple/apple_glx_pbuffer.c
@@ -84,8 +84,7 @@ pbuffer_make_current(struct apple_glx_context *ac,
   }

   if (!ac->made_current) {
-      glViewport(0, 0, pbuf->width, pbuf->height);
-      glScissor(0, 0, pbuf->width, pbuf->height);
+      apple_glapi_oglfw_viewport_scissor(0, 0, pbuf->width, pbuf->height);
      ac->made_current = true;
   }

--- a/src/glx/apple/apple_glx_pixmap.c
+++ b/src/glx/apple/apple_glx_pixmap.c
@@ -80,8 +80,7 @@ pixmap_make_current(struct apple_glx_context *ac,
   }

   if (!ac->made_current) {
-      glViewport(0, 0, p->width, p->height);
-      glScissor(0, 0, p->width, p->height);
+      apple_glapi_oglfw_viewport_scissor(0, 0, p->width, p->height);
      ac->made_current = true;
   }

--- a/src/glx/apple/apple_glx_surface.c
+++ b/src/glx/apple/apple_glx_surface.c
@@ -53,8 +53,7 @@ update_viewport_and_scissor(Display * dpy, GLXDrawable drawable)

   XGetGeometry(dpy, drawable, &root, &x, &y, &width, &height, &bd, &depth);

-   glViewport(0, 0, width, height);
-   glScissor(0, 0, width, height);
+   apple_glapi_oglfw_viewport_scissor(0, 0, width, height);
 }

 static bool
--- a/src/glx/apple/appledri.c
+++ b/src/glx/apple/appledri.c
@@ -332,12 +332,12 @@ XAppleDRICreateSharedBuffer(Display * dpy, int screen, Drawable drawable,
      return False;
   }

-   printf("rep.stringLength %d\n", (int) rep.stringLength);
+   /* printf("rep.stringLength %d\n", (int) rep.stringLength); */

   if (rep.stringLength > 0 && rep.stringLength <= pathlen) {
      _XReadPad(dpy, path, rep.stringLength);

-      printf("path: %s\n", path);
+      /* printf("path: %s\n", path); */

      *width = rep.width;
      *height = rep.height;
@@ -404,7 +404,7 @@ XAppleDRICreatePixmap(Display * dpy, int screen, Drawable drawable,
   if (rep.stringLength > 0 && rep.stringLength <= bufnamesize) {
      _XReadPad(dpy, bufname, rep.stringLength);

-      printf("path: %s\n", bufname);
+      /* printf("path: %s\n", bufname); */

      *width = rep.width;
      *height = rep.height;
--- a/src/glx/applegl_glx.c
+++ b/src/glx/applegl_glx.c
@@ -69,6 +69,24 @@ applegl_bind_context(struct glx_context *gc, struct glx_context *old,
 static void
 applegl_unbind_context(struct glx_context *gc, struct glx_context *new)
 {
+   Display *dpy;
+   bool error;
+
+   /* If we don't have a context, then we have nothing to unbind */
+   if (!gc)
+      return;
+
+   /* If we have a new context, keep this one around and remove it during bind. */
+   if (new)
+      return;
+
+   dpy = gc->psc->dpy;
+
+   error = apple_glx_make_current_context(dpy,
+					  (gc != &dummyContext) ? gc->driContext : NULL,
+					  NULL, None);
+
+   apple_glx_diagnostic("%s: error %s\n", __func__, error ? "YES" : "NO");
 }

 static void
--- a/src/glx/dri2.c
+++ b/src/glx/dri2.c
@@ -180,6 +180,15 @@ DRI2Error(Display *display, xError *err, XExtCodes *codes, int *ret_code)
 	err->minorCode == X_DRI2DestroyDrawable)
 	return True;

+    /* If the server is non-local DRI2Connect will raise BadRequest.
+     * Swallow this so that DRI2Connect can signal this in its return code */
+    if (err->majorCode == codes->major_opcode &&
+        err->minorCode == X_DRI2Connect &&
+        err->errorCode == BadRequest) {
+	*ret_code = False;
+	return True;
+    }
+
    return False;
 }

--- a/src/glx/dri2_glx.c
+++ b/src/glx/dri2_glx.c
@@ -539,6 +539,15 @@ dri2SwapBuffers(__GLXDRIdrawable *pdraw, int64_t target_msc, int64_t divisor,
 	(struct dri2_display *)dpyPriv->dri2Display;
    CARD64 ret = 0;

+    /* Check we have the right attachments */
+    if (!priv->have_back)
+	return ret;
+
+    /* Old servers can't handle swapbuffers */
+    if (!pdp->swapAvailable) {
+       dri2CopySubBuffer(pdraw, 0, 0, priv->width, priv->height);
+    } else {
+#ifdef X_DRI2SwapBuffers
 #ifdef __DRI2_FLUSH
    if (psc->f) {
       struct glx_context *gc = __glXGetCurrentContext();
@@ -549,21 +558,15 @@ dri2SwapBuffers(__GLXDRIdrawable *pdraw, int64_t target_msc, int64_t divisor,
    }
 #endif

+       DRI2SwapBuffers(psc->base.dpy, pdraw->xDrawable,
+		       target_msc, divisor, remainder, &ret);
+#endif
+    }
+
    /* Old servers don't send invalidate events */
    if (!pdp->invalidateAvailable)
       dri2InvalidateBuffers(dpyPriv->dpy, pdraw->xDrawable);

-    /* Old servers can't handle swapbuffers */
-    if (!pdp->swapAvailable) {
-       dri2CopySubBuffer(pdraw, 0, 0, priv->width, priv->height);
-       return 0;
-    }
-
-#ifdef X_DRI2SwapBuffers
-    DRI2SwapBuffers(psc->base.dpy, pdraw->xDrawable, target_msc, divisor,
-		    remainder, &ret);
-#endif
-
    return ret;
 }

--- a/src/glx/drisw_glx.c
+++ b/src/glx/drisw_glx.c
@@ -100,6 +100,13 @@ XCreateDrawable(struct drisw_drawable * pdp,
                              32,                     /* bitmap_pad */
                              0);                     /* bytes_per_line */

+  /**
+   * swrast does not handle 24-bit depth with 24 bpp, so let X do the
+   * the conversion for us.
+   */
+  if (pdp->ximage->bits_per_pixel == 24)
+     pdp->ximage->bits_per_pixel = 32;
+
   return True;
 }

@@ -362,10 +369,6 @@ driswCreateDrawable(struct glx_screen *base, XID xDrawable,

   const __DRIswrastExtension *swrast = psc->swrast;

-   /* Old dri can't handle GLX 1.3+ drawable constructors. */
-   if (xDrawable != drawable)
-      return NULL;
-
   pdp = Xmalloc(sizeof(*pdp));
   if (!pdp)
      return NULL;
--- a/src/glx/glxextensions.c
+++ b/src/glx/glxextensions.c
@@ -139,7 +139,7 @@ static const struct extension_info known_glx_extensions[] = {
   { GLX(SGIX_visual_select_group),    VER(0,0), Y, Y, N, N },
   { GLX(EXT_texture_from_pixmap),     VER(0,0), Y, N, N, N },
 #endif
-   { GLX(INTEL_swap_event),            VER(1,4), Y, Y, N, N },
+   { GLX(INTEL_swap_event),            VER(1,4), Y, N, N, N },
   { NULL }
 };

--- a/Show More
+++ b/Show More