i965: Fix intel_miptree_is_fast_clear_capable()

There are three types of fast clears: a. fast depth clears b. fast singlesample color clears c. fast multisample color clears Function intel_miptree_is_fast_clear_capable() checks if a miptree supports fast clears of type (b). Rename the function to disambiguate what it does: old: intel_miptree_is_fast_clear_capable new: intel_miptree_supports_non_msrt_fast_clear The functionally accidentally rejected multisampled color surfaces because it thought they were singlesample array surfaces. Fix that by explicitly rejecting surfaces with samples > 1. This fix would have been needed before we enabled layered fast singlesample color clears (introduced in gen8), which we want to do eventually. For now, though, this patch changes no behavior; it just fixes how the driver chooses its behavior. Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
i965/mt: Declare some functions as static
2015-10-05 11:14:04 -07:00 · 2015-10-05 11:10:11 -07:00 · 2015-10-05 13:50:15 +02:00 · 2015-10-05 14:44:24 +03:00 · 2015-10-05 08:19:34 +02:00 · 2015-10-05 08:12:17 +02:00
830 changed files with 56332 additions and 24038 deletions
--- a/Android.common.mk
+++ b/Android.common.mk
@@ -42,6 +42,7 @@ LOCAL_CFLAGS += \
 	-DANDROID_VERSION=0x0$(MESA_ANDROID_MAJOR_VERSION)0$(MESA_ANDROID_MINOR_VERSION)

 LOCAL_CFLAGS += \
+	-D__STDC_LIMIT_MACROS \
 	-DHAVE___BUILTIN_EXPECT \
 	-DHAVE___BUILTIN_FFS \
 	-DHAVE___BUILTIN_FFSLL \
@@ -70,7 +71,7 @@ endif

 ifeq ($(MESA_ENABLE_LLVM),true)
 LOCAL_CFLAGS += \
-	-DHAVE_LLVM=0x0305 -DLLVM_VERSION_PATCH=2 \
+	-DHAVE_LLVM=0x0305 -DMESA_LLVM_VERSION_PATCH=2 \
 	-D__STDC_CONSTANT_MACROS \
 	-D__STDC_FORMAT_MACROS \
 	-D__STDC_LIMIT_MACROS
--- a/2
+++ b/2
@@ -1 +1 @@
-11.0.0-devel
+11.1.0-devel
--- a/configure.ac
+++ b/configure.ac
@@ -74,7 +74,7 @@ LIBDRM_AMDGPU_REQUIRED=2.4.63
 LIBDRM_INTEL_REQUIRED=2.4.61
 LIBDRM_NVVIEUX_REQUIRED=2.4.33
 LIBDRM_NOUVEAU_REQUIRED=2.4.62
-LIBDRM_FREEDRENO_REQUIRED=2.4.64
+LIBDRM_FREEDRENO_REQUIRED=2.4.65
 DRI2PROTO_REQUIRED=2.6
 DRI3PROTO_REQUIRED=1.0
 PRESENTPROTO_REQUIRED=1.0
@@ -533,15 +533,32 @@ AM_CONDITIONAL(HAVE_COMPAT_SYMLINKS, test "x$HAVE_COMPAT_SYMLINKS" = xyes)
 dnl
 dnl library names
 dnl
+dnl Unfortunately we need to do a few things that libtool can't help us with,
+dnl so we need some knowledge of shared library filenames:
+dnl
+dnl LIB_EXT is the extension used when creating symlinks for alternate
+dnl filenames for a shared library which will be dynamically loaded
+dnl
+dnl IMP_LIB_EXT is the extension used when checking for the presence of a
+dnl the file for a shared library we wish to link with
+dnl
 case "$host_os" in
 darwin* )
-    LIB_EXT='dylib' ;;
+    LIB_EXT='dylib'
+    IMP_LIB_EXT=$LIB_EXT
+    ;;
 cygwin* )
-    LIB_EXT='dll' ;;
+    LIB_EXT='dll'
+    IMP_LIB_EXT='dll.a'
+    ;;
 aix* )
-    LIB_EXT='a' ;;
+    LIB_EXT='a'
+    IMP_LIB_EXT=$LIB_EXT
+    ;;
 * )
-    LIB_EXT='so' ;;
+    LIB_EXT='so'
+    IMP_LIB_EXT=$LIB_EXT
+    ;;
 esac

 AC_SUBST([LIB_EXT])
@@ -1110,6 +1127,11 @@ AC_MSG_RESULT([$with_sha1])
 AC_SUBST(SHA1_LIBS)
 AC_SUBST(SHA1_CFLAGS)

+# Enable a define for SHA1
+if test "x$with_sha1" != "x"; then
+	DEFINES="$DEFINES -DHAVE_SHA1"
+fi
+
 # Allow user to configure out the shader-cache feature
 AC_ARG_ENABLE([shader-cache],
    AS_HELP_STRING([--disable-shader-cache], [Disable binary shader cache]),
@@ -1289,6 +1311,16 @@ AC_SUBST(GLX_TLS, ${GLX_USE_TLS})
 AS_IF([test "x$GLX_USE_TLS" = xyes -a "x$ax_pthread_ok" = xyes],
      [DEFINES="${DEFINES} -DGLX_USE_TLS"])

+dnl Read-only text section on x86 hardened platforms
+AC_ARG_ENABLE([glx-read-only-text],
+    [AS_HELP_STRING([--enable-glx-read-only-text],
+        [Disable writable .text section on x86 (decreases performance) @<:@default=disabled@:>@])],
+    [enable_glx_read_only_text="$enableval"],
+    [enable_glx_read_only_text=no])
+if test "x$enable_glx_read_only_text" = xyes; then
+    DEFINES="$DEFINES -DGLX_X86_READONLY_TEXT"
+fi
+
 dnl
 dnl More DRI setup
 dnl
@@ -2051,7 +2083,7 @@ radeon_llvm_check() {
    if test "x$enable_gallium_llvm" != "xyes"; then
        AC_MSG_ERROR([--enable-gallium-llvm is required when building $1])
    fi
-    llvm_check_version_for "3" "4" "2" $1 
+    llvm_check_version_for "3" "5" "0" $1
    if test true && $LLVM_CONFIG --targets-built | grep -iqvw $amdgpu_llvm_target_name ; then
        AC_MSG_ERROR([LLVM $amdgpu_llvm_target_name not enabled in your LLVM build.])
    fi
@@ -2139,11 +2171,8 @@ if test -n "$with_gallium_drivers"; then
            gallium_require_drm "vc4"
            gallium_require_drm_loader

-            case "$host_cpu" in
-                i?86 | x86_64 | amd64)
-                USE_VC4_SIMULATOR=yes
-                ;;
-            esac
+            PKG_CHECK_MODULES([SIMPENROSE], [simpenrose],
+                              [USE_VC4_SIMULATOR=yes], [USE_VC4_SIMULATOR=no])
            ;;
        *)
            AC_MSG_ERROR([Unknown Gallium driver: $driver])
@@ -2163,10 +2192,14 @@ if test "x$MESA_LLVM" != x0; then

    LLVM_LIBS="`$LLVM_CONFIG --libs ${LLVM_COMPONENTS}`"

+    dnl llvm-config may not give the right answer when llvm is a built as a
+    dnl single shared library, so we must work the library name out for
+    dnl ourselves.
+    dnl (See https://llvm.org/bugs/show_bug.cgi?id=6823)
    if test "x$enable_llvm_shared_libs" = xyes; then
        dnl We can't use $LLVM_VERSION because it has 'svn' stripped out,
        LLVM_SO_NAME=LLVM-`$LLVM_CONFIG --version`
-        AS_IF([test -f "$LLVM_LIBDIR/lib$LLVM_SO_NAME.so"], [llvm_have_one_so=yes])
+        AS_IF([test -f "$LLVM_LIBDIR/lib$LLVM_SO_NAME.$IMP_LIB_EXT"], [llvm_have_one_so=yes])

        if test "x$llvm_have_one_so" = xyes; then
            dnl LLVM was built using auto*, so there is only one shared object.
@@ -2174,7 +2207,7 @@ if test "x$MESA_LLVM" != x0; then
        else
            dnl If LLVM was built with CMake, there will be one shared object per
            dnl component.
-            AS_IF([test ! -f "$LLVM_LIBDIR/libLLVMTarget.so"],
+            AS_IF([test ! -f "$LLVM_LIBDIR/libLLVMTarget.$IMP_LIB_EXT"],
                    [AC_MSG_ERROR([Could not find llvm shared libraries:
 	Please make sure you have built llvm with the --enable-shared option
 	and that your llvm libraries are installed in $LLVM_LIBDIR
@@ -2317,6 +2350,7 @@ AC_CONFIG_FILES([Makefile
 		src/gallium/auxiliary/Makefile
 		src/gallium/auxiliary/pipe-loader/Makefile
 		src/gallium/drivers/freedreno/Makefile
+		src/gallium/drivers/ddebug/Makefile
 		src/gallium/drivers/i915/Makefile
 		src/gallium/drivers/ilo/Makefile
 		src/gallium/drivers/llvmpipe/Makefile
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -109,14 +109,14 @@ GL 4.0, GLSL 4.00 --- all DONE: nvc0, radeonsi
  - Enhanced per-sample shading                        DONE (r600)
  - Interpolation functions                            DONE (r600)
  - New overload resolution rules                      DONE
-  GL_ARB_gpu_shader_fp64                               DONE (llvmpipe, softpipe)
+  GL_ARB_gpu_shader_fp64                               DONE (r600, llvmpipe, softpipe)
  GL_ARB_sample_shading                                DONE (i965, nv50, r600)
  GL_ARB_shader_subroutine                             DONE (i965, nv50, r600, llvmpipe, softpipe)
  GL_ARB_tessellation_shader                           DONE ()
  GL_ARB_texture_buffer_object_rgb32                   DONE (i965, r600, llvmpipe, softpipe)
  GL_ARB_texture_cube_map_array                        DONE (i965, nv50, r600, llvmpipe, softpipe)
  GL_ARB_texture_gather                                DONE (i965, nv50, r600, llvmpipe, softpipe)
-  GL_ARB_texture_query_lod                             DONE (i965, nv50, r600)
+  GL_ARB_texture_query_lod                             DONE (i965, nv50, r600, softpipe)
  GL_ARB_transform_feedback2                           DONE (i965, nv50, r600, llvmpipe, softpipe)
  GL_ARB_transform_feedback3                           DONE (i965, nv50, r600, llvmpipe, softpipe)

@@ -127,7 +127,7 @@ GL 4.1, GLSL 4.10 --- all DONE: nvc0, radeonsi
  GL_ARB_get_program_binary                            DONE (0 binary formats)
  GL_ARB_separate_shader_objects                       DONE (all drivers)
  GL_ARB_shader_precision                              DONE (all drivers that support GLSL 4.10)
-  GL_ARB_vertex_attrib_64bit                           DONE (llvmpipe, softpipe)
+  GL_ARB_vertex_attrib_64bit                           DONE (r600, llvmpipe, softpipe)
  GL_ARB_viewport_array                                DONE (i965, nv50, r600, llvmpipe)


@@ -164,7 +164,7 @@ GL 4.3, GLSL 4.30:
  GL_ARB_program_interface_query                       DONE (all drivers)
  GL_ARB_robust_buffer_access_behavior                 not started
  GL_ARB_shader_image_size                             DONE (i965)
-  GL_ARB_shader_storage_buffer_object                  in progress (Iago Toral, Samuel Iglesias)
+  GL_ARB_shader_storage_buffer_object                  DONE (i965)
  GL_ARB_stencil_texturing                             DONE (i965/gen8+, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
  GL_ARB_texture_buffer_range                          DONE (nv50, nvc0, i965, r600, radeonsi, llvmpipe)
  GL_ARB_texture_query_levels                          DONE (all drivers that support GLSL 1.30)
@@ -194,9 +194,9 @@ GL 4.5, GLSL 4.50:
  GL_ARB_derivative_control                            DONE (i965, nv50, nvc0, r600, radeonsi)
  GL_ARB_direct_state_access                           DONE (all drivers)
  GL_ARB_get_texture_sub_image                         DONE (all drivers)
-  GL_ARB_shader_texture_image_samples                  not started
-  GL_ARB_texture_barrier                               DONE (nv50, nvc0, r600, radeonsi)
-  GL_KHR_context_flush_control                         DONE (all - but needs GLX/EXT extension to be useful)
+  GL_ARB_shader_texture_image_samples                  DONE (i965, nv50, nvc0, r600, radeonsi)
+  GL_ARB_texture_barrier                               DONE (i965, nv50, nvc0, r600, radeonsi)
+  GL_KHR_context_flush_control                         DONE (all - but needs GLX/EGL extension to be useful)
  GL_KHR_robust_buffer_access_behavior                 not started
  GL_KHR_robustness                                    90% done (the ARB variant)
  GL_EXT_shader_integer_mix                            DONE (all drivers that support GLSL)
@@ -212,7 +212,7 @@ GLES3.1, GLSL ES 3.1
  GL_ARB_shader_atomic_counters                        DONE (i965)
  GL_ARB_shader_image_load_store                       DONE (i965)
  GL_ARB_shader_image_size                             DONE (i965)
-  GL_ARB_shader_storage_buffer_object                  in progress (Iago Toral, Samuel Iglesias)
+  GL_ARB_shader_storage_buffer_object                  DONE (i965)
  GL_ARB_shading_language_packing                      DONE (all drivers)
  GL_ARB_separate_shader_objects                       DONE (all drivers)
  GL_ARB_stencil_texturing                             DONE (i965/gen8+, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
@@ -223,10 +223,35 @@ GLES3.1, GLSL ES 3.1
  GS5 Packing/bitfield/conversion functions            DONE (i965, nvc0, r600, radeonsi)
  GL_EXT_shader_integer_mix                            DONE (all drivers that support GLSL)

-  Additional functions not covered above:
-      glMemoryBarrierByRegion
-      glGetTexLevelParameter[fi]v - needs updates to restrict to GLES enums
-      glGetBooleani_v - needs updates to restrict to GLES enums
+  Additional functionality not covered above:
+      glMemoryBarrierByRegion                          DONE
+      glGetTexLevelParameter[fi]v - needs updates      DONE
+      glGetBooleani_v - restrict to GLES enums
+      gl_HelperInvocation support
+
+GLES3.2, GLSL ES 3.2
+  GL_EXT_color_buffer_float                            DONE (all drivers)
+  GL_KHR_blend_equation_advanced                       not started
+  GL_KHR_debug                                         DONE (all drivers)
+  GL_KHR_robustness                                    90% done (the ARB variant)
+  GL_KHR_texture_compression_astc_ldr                  DONE (i965/gen9+)
+  GL_OES_copy_image                                    not started (based on GL_ARB_copy_image, which is done for some drivers)
+  GL_OES_draw_buffers_indexed                          not started
+  GL_OES_draw_elements_base_vertex                     not started (based on GL_ARB_draw_elements_base_vertex, which is done for all drivers)
+  GL_OES_geometry_shader                               not started (based on GL_ARB_geometry_shader4, which is done for all drivers)
+  GL_OES_gpu_shader5                                   not started (based on parts of GL_ARB_gpu_shader5, which is done for some drivers)
+  GL_OES_primitive_bounding box                        not started
+  GL_OES_sample_shading                                not started (based on parts of GL_ARB_sample_shading, which is done for some drivers)
+  GL_OES_sample_variables                              not started (based on parts of GL_ARB_sample_shading, which is done for some drivers)
+  GL_OES_shader_image_atomic                           not started (based on parts of GL_ARB_shader_image_load_store, which is done for some drivers)
+  GL_OES_shader_io_blocks                              not started (based on parts of GLSL 1.50, which is done)
+  GL_OES_shader_multisample_interpolation              not started (based on parts of GL_ARB_gpu_shader5, which is done)
+  GL_OES_tessellation_shader                           not started (based on GL_ARB_tessellation_shader, which is done for some drivers)
+  GL_OES_texture_border_clamp                          not started (based on GL_ARB_texture_border_clamp, which is done)
+  GL_OES_texture_buffer                                not started (based on GL_ARB_texture_buffer_object, GL_ARB_texture_buffer_range, and GL_ARB_texture_buffer_object_rgb32 that are all done)
+  GL_OES_texture_cube_map_array                        not started (based on GL_ARB_texture_cube_map_array, which is done for all drivers)
+  GL_OES_texture_stencil8                              not started (based on GL_ARB_texture_stencil8, which is done for some drivers)
+  GL_OES_texture_storage_multisample_2d_array          DONE (all drivers that support GL_ARB_texture_multisample)

 More info about these features and the work involved can be found at
 http://dri.freedesktop.org/wiki/MissingFunctionality
--- a/docs/autoconf.html
+++ b/docs/autoconf.html
@@ -87,6 +87,13 @@ created in a <code>lib64</code> directory at the top of the Mesa source
 tree.</p>
 </dd>

+<dt><code>--sysconfdir=DIR</code></dt>
+<dd><p>This option specifies the directory where the configuration
+files will be installed. The default is <code>${prefix}/etc</code>.
+Currently there's only one config file provided when dri drivers are
+enabled - it's <code>drirc</code>.</p>
+</dd>
+
 <dt><code>--enable-static, --disable-shared</code></dt>
 <dd><p>By default, Mesa
 will build shared libraries. Either of these options will force static
@@ -217,7 +224,7 @@ GLX.
 <dt><code>--with-expat=DIR</code>
 <dd><p><strong>DEPRECATED</strong>, use <code>PKG_CONFIG_PATH</code> instead.</p>
 <p>The DRI-enabled libGL uses expat to
-parse the DRI configuration files in <code>/etc/drirc</code> and
+parse the DRI configuration files in <code>${sysconfdir}/drirc</code> and
 <code>~/.drirc</code>. This option allows a specific expat installation
 to be used. For example, <code>--with-expat=/usr/local</code> will
 search for expat headers and libraries in <code>/usr/local/include</code>
--- a/docs/envvars.html
+++ b/docs/envvars.html
@@ -153,6 +153,7 @@ See the <a href="xlibdriver.html">Xlib software driver page</a> for details.
   <li>no16 - suppress generation of 16-wide fragment shaders. useful for debugging broken shaders</li>
   <li>blorp - emit messages about the blorp operations (blits &amp; clears)</li>
   <li>nodualobj - suppress generation of dual-object geometry shader code</li>
+   <li>optimizer - dump shader assembly to files at each optimization pass and iteration that make progress</li>
 </ul>
 </ul>

--- a/docs/index.html
+++ b/docs/index.html
@@ -16,25 +16,72 @@

 <h1>News</h1>

-<h2>August 22 2015</h2>
+<h2>October 3, 2015</h2>
+<p>
+<a href="relnotes/10.6.9.html">Mesa 10.6.9</a> is released.
+This is a bug-fix release.
+<br>
+NOTE: It is anticipated that 10.6.9 will be the final release in the 10.6
+series. Users of 10.5 are encouraged to migrate to the 11.0 series in order
+to obtain future fixes.
+</p>
+
+<h2>September 28, 2015</h2>
+<p>
+<a href="relnotes/11.0.2.html">Mesa 11.0.2</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>September 26, 2015</h2>
+<p>
+<a href="relnotes/11.0.1.html">Mesa 11.0.1</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>September 20, 2015</h2>
+<p>
+<a href="relnotes/10.6.8.html">Mesa 10.6.8</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>September 12, 2015</h2>
+<p>
+<a href="relnotes/11.0.0.html">Mesa 11.0.0</a> is released.  This is a new
+development release.  See the release notes for more information about
+the release.
+</p>
+
+<h2>September 10, 2015</h2>
+<p>
+<a href="relnotes/10.6.7.html">Mesa 10.6.7</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>September 4, 2015</h2>
+<p>
+<a href="relnotes/10.6.6.html">Mesa 10.6.6</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>August 22, 2015</h2>
 <p>
 <a href="relnotes/10.6.5.html">Mesa 10.6.5</a> is released.
 This is a bug-fix release.
 </p>

-<h2>August 11 2015</h2>
+<h2>August 11, 2015</h2>
 <p>
 <a href="relnotes/10.6.4.html">Mesa 10.6.4</a> is released.
 This is a bug-fix release.
 </p>

-<h2>July 26 2015</h2>
+<h2>July 26, 2015</h2>
 <p>
 <a href="relnotes/10.6.3.html">Mesa 10.6.3</a> is released.
 This is a bug-fix release.
 </p>

-<h2>July 11 2015</h2>
+<h2>July 11, 2015</h2>
 <p>
 <a href="relnotes/10.6.2.html">Mesa 10.6.2</a> is released.
 This is a bug-fix release.
--- a/docs/relnotes.html
+++ b/docs/relnotes.html
@@ -21,6 +21,13 @@ The release notes summarize what's new or changed in each Mesa release.
 </p>

 <ul>
+<li><a href="relnotes/10.6.9.html">10.6.9 release notes</a>
+<li><a href="relnotes/11.0.2.html">11.0.2 release notes</a>
+<li><a href="relnotes/11.0.1.html">11.0.1 release notes</a>
+<li><a href="relnotes/10.6.8.html">10.6.8 release notes</a>
+<li><a href="relnotes/11.0.0.html">11.0.0 release notes</a>
+<li><a href="relnotes/10.6.7.html">10.6.7 release notes</a>
+<li><a href="relnotes/10.6.6.html">10.6.6 release notes</a>
 <li><a href="relnotes/10.6.5.html">10.6.5 release notes</a>
 <li><a href="relnotes/10.6.4.html">10.6.4 release notes</a>
 <li><a href="relnotes/10.6.3.html">10.6.3 release notes</a>
--- a/docs/relnotes/10.6.6.html
+++ b/docs/relnotes/10.6.6.html
@@ -0,0 +1,164 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.6.6 Release Notes / September 04, 2015</h1>
+
+<p>
+Mesa 10.6.6 is a bug fix release which fixes bugs found since the 10.6.5 release.
+</p>
+<p>
+Mesa 10.6.6 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+416517aa9df4791f97d34451a9e4da33c966afcd18c115c5769b92b15b018ef5  mesa-10.6.6.tar.gz
+570f2154b7340ff5db61ff103bc6e85165b8958798b78a50fa2df488e98e5778  mesa-10.6.6.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=84677">Bug 84677</a> - Triangle disappears with glPolygonMode GL_LINE</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90734">Bug 90734</a> - glBufferSubData is corrupting data when buffer is &gt; 32k</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90748">Bug 90748</a> - [BDW Bisected]dEQP-GLES3.functional.fbo.completeness.renderable.texture.depth.rg_half_float_oes fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90902">Bug 90902</a> - [bsw][regression] dEQP: &quot;Found invalid pixel values&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90925">Bug 90925</a> - &quot;high fidelity&quot;: Segfault in _mesa_program_resource_find_name</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91254">Bug 91254</a> - (regresion) video using VA-API on Intel slow and freeze system with mesa 10.6 or 10.6.1</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91292">Bug 91292</a> - [BDW+] glVertexAttribDivisor not working in combination with glPolygonMode</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91673">Bug 91673</a> - Segfault when calling glTexSubImage2D on storage texture to bound FBO</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91726">Bug 91726</a> - R600 asserts in tgsi_cmp/make_src_for_op3</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Chris Wilson (2):</p>
+<ul>
+  <li>i965: Prevent coordinate overflow in intel_emit_linear_blit</li>
+  <li>i965: Always re-emit the pipeline select during invariant state emission</li>
+</ul>
+
+<p>Daniel Scharrer (1):</p>
+<ul>
+  <li>mesa: add missing queries for ARB_direct_state_access</li>
+</ul>
+
+<p>Dave Airlie (8):</p>
+<ul>
+  <li>mesa/arb_gpu_shader_fp64: add support for glGetUniformdv</li>
+  <li>mesa/texgetimage: fix missing stencil check</li>
+  <li>st/readpixels: fix accel path for skipimages.</li>
+  <li>texcompress_s3tc/fxt1: fix stride checks (v1.1)</li>
+  <li>mesa/readpixels: check strides are equal before skipping conversion</li>
+  <li>mesa: enable texture stencil8 for multisample</li>
+  <li>r600/sb: update last_cf for finalize if.</li>
+  <li>r600g: fix calculation for gpr allocation</li>
+</ul>
+
+<p>David Heidelberg (1):</p>
+<ul>
+  <li>st/nine: Require gcc &gt;= 4.6</li>
+</ul>
+
+<p>Emil Velikov (2):</p>
+<ul>
+  <li>docs: add sha256 checksums for 10.6.5</li>
+  <li>get-pick-list.sh: Require explicit "10.6" for nominating stable patches</li>
+</ul>
+
+<p>Glenn Kennard (4):</p>
+<ul>
+  <li>r600g: Fix assert in tgsi_cmp</li>
+  <li>r600g/sb: Handle undef in read port tracker</li>
+  <li>r600g/sb: Don't read junk after EOP</li>
+  <li>r600g/sb: Don't crash on empty if jump target</li>
+</ul>
+
+<p>Ilia Mirkin (5):</p>
+<ul>
+  <li>st/mesa: fix assignments with 4-operand arguments (i.e. BFI)</li>
+  <li>st/mesa: pass through 4th opcode argument in bitmap/pixel visitors</li>
+  <li>nv50,nvc0: disable depth bounds test on blit</li>
+  <li>nv50: fix 2d engine blits for 64- and 128-bit formats</li>
+  <li>mesa: only copy the requested teximage faces</li>
+</ul>
+
+<p>Jason Ekstrand (1):</p>
+<ul>
+  <li>i965/fs: Split VGRFs after lowering pull constants</li>
+</ul>
+
+<p>Kenneth Graunke (3):</p>
+<ul>
+  <li>i965: Fix copy propagation type changes.</li>
+  <li>Revert "i965: Advertise a line width of 40.0 on Cherryview and Skylake."</li>
+  <li>i965: Momentarily pretend to support ARB_texture_stencil8 for blits.</li>
+</ul>
+
+<p>Marek Olšák (3):</p>
+<ul>
+  <li>gallium/radeon: fix the ADDRESS_HI mask for EVENT_WRITE CIK packets</li>
+  <li>mesa: create multisample fallback textures like normal textures</li>
+  <li>radeonsi: fix a Unigine Heaven hang when drirc is missing</li>
+</ul>
+
+<p>Matt Turner (1):</p>
+<ul>
+  <li>i965/fs: Handle MRF destinations in lower_integer_multiplication().</li>
+</ul>
+
+<p>Neil Roberts (2):</p>
+<ul>
+  <li>i965: Swap the order of the vertex ID and edge flag attributes</li>
+  <li>i965/bdw: Fix 3DSTATE_VF_INSTANCING when the edge flag is used</li>
+</ul>
+
+<p>Tapani Pälli (5):</p>
+<ul>
+  <li>mesa: update fbo state in glTexStorage</li>
+  <li>glsl: build stageref mask using IR, not symbol table</li>
+  <li>glsl: expose build_program_resource_list function</li>
+  <li>glsl: create program resource list after LinkShader</li>
+  <li>mesa: add GL_RED, GL_RG support for floating point textures</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/10.6.7.html
+++ b/docs/relnotes/10.6.7.html
@@ -0,0 +1,75 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.6.7 Release Notes / September 10, 2015</h1>
+
+<p>
+Mesa 10.6.7 is a bug fix release which fixes bugs found since the 10.6.6 release.
+</p>
+<p>
+Mesa 10.6.7 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+4ba10c59abee30d72476543a57afd2f33803dabf4620dc333b335d47966ff842  mesa-10.6.7.tar.gz
+feb1f640b915dada88a7c793dfaff0ae23580f8903f87a6b76469253de0d28d8  mesa-10.6.7.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90751">Bug 90751</a> - [BDW Bisected]dEQP-GLES3.functional.fbo.completeness.renderable.texture.stencil.stencil_index8 fails</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Dave Airlie (1):</p>
+<ul>
+  <li>mesa/teximage: use correct extension for accept stencil texture.</li>
+</ul>
+
+<p>Emil Velikov (3):</p>
+<ul>
+  <li>docs: add sha256 checksums for 10.6.6</li>
+  <li>Revert "i965: Momentarily pretend to support ARB_texture_stencil8 for blits."</li>
+  <li>Update version to 10.6.7</li>
+</ul>
+
+<p>Kenneth Graunke (1):</p>
+<ul>
+  <li>glsl: Handle attribute aliasing in attribute storage limit check.</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/10.6.8.html
+++ b/docs/relnotes/10.6.8.html
@@ -0,0 +1,136 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.6.8 Release Notes / September 20, 2015</h1>
+
+<p>
+Mesa 10.6.8 is a bug fix release which fixes bugs found since the 10.6.7 release.
+</p>
+<p>
+Mesa 10.6.8 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+1f34dba2a8059782e3e4e0f18b9628004e253b2c69085f735b846d2e63c9e250  mesa-10.6.8.tar.gz
+e36ee5ceeadb3966fb5ce5b4cf18322dbb76a4f075558ae49c3bba94f57d58fd  mesa-10.6.8.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90621">Bug 90621</a> - Mesa fail to build from git</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91526">Bug 91526</a> - World of Warcraft (on Wine) has UI corruption with nouveau</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91719">Bug 91719</a> - [SNB,HSW,BYT] dEQP regressions associated with using NIR for vertex shaders</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Alejandro Piñeiro (1):</p>
+<ul>
+  <li>i965/vec4: fill src_reg type using the constructor type parameter</li>
+</ul>
+
+<p>Antia Puentes (1):</p>
+<ul>
+  <li>i965/vec4: Fix saturation errors when coalescing registers</li>
+</ul>
+
+<p>Emil Velikov (2):</p>
+<ul>
+  <li>docs: add sha256 checksums for 10.6.7</li>
+  <li>cherry-ignore: add commit non applicable for 10.6</li>
+</ul>
+
+<p>Hans de Goede (4):</p>
+<ul>
+  <li>nv30: Fix creation of scanout buffers</li>
+  <li>nv30: Implement color resolve for msaa</li>
+  <li>nv30: Fix max width / height checks in nv30 sifm code</li>
+  <li>nv30: Disable msaa unless requested from the env by NV30_MAX_MSAA</li>
+</ul>
+
+<p>Ian Romanick (2):</p>
+<ul>
+  <li>mesa: Pass the type to _mesa_uniform_matrix as a glsl_base_type</li>
+  <li>mesa: Don't allow wrong type setters for matrix uniforms</li>
+</ul>
+
+<p>Ilia Mirkin (5):</p>
+<ul>
+  <li>st/mesa: don't fall back to 16F when 32F is requested</li>
+  <li>nvc0: always emit a full shader colormask</li>
+  <li>nvc0: remove BGRA4 format support</li>
+  <li>st/mesa: avoid integer overflows with buffers &gt;= 512MB</li>
+  <li>nv50, nvc0: fix max texture buffer size to 128M elements</li>
+</ul>
+
+<p>Jason Ekstrand (1):</p>
+<ul>
+  <li>i965/vec4: Don't reswizzle hardware registers</li>
+</ul>
+
+<p>Jose Fonseca (1):</p>
+<ul>
+  <li>gallivm: Workaround LLVM PR23628.</li>
+</ul>
+
+<p>Kenneth Graunke (1):</p>
+<ul>
+  <li>i965: Momentarily pretend to support ARB_texture_stencil8 for blits.</li>
+</ul>
+
+<p>Oded Gabbay (1):</p>
+<ul>
+  <li>llvmpipe: convert double to long long instead of unsigned long long</li>
+</ul>
+
+<p>Ray Strode (1):</p>
+<ul>
+  <li>gbm: convert gbm bo format to fourcc format on dma-buf import</li>
+</ul>
+
+<p>Ulrich Weigand (1):</p>
+<ul>
+  <li>mesa: Fix texture compression on big-endian systems</li>
+</ul>
+
+<p>Vinson Lee (1):</p>
+<ul>
+  <li>gallivm: Do not use NoFramePointerElim with LLVM 3.7.</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/10.6.9.html
+++ b/docs/relnotes/10.6.9.html
@@ -0,0 +1,130 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.6.9 Release Notes / Octover 03, 2015</h1>
+
+<p>
+Mesa 10.6.9 is a bug fix release which fixes bugs found since the 10.6.8 release.
+</p>
+<p>
+Mesa 10.6.9 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+3406876aac67546d0c3e2cb97da330b62644c313e7992b95618662e13c54296a  mesa-10.6.9.tar.gz
+b04c4de6280b863babc2929573da17218d92e9e4ba6272d548d135415723e8c3  mesa-10.6.9.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=38109">Bug 38109</a> - i915 driver crashes if too few vertices are submitted (Mesa 7.10.2)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=55552">Bug 55552</a> - Compile errors with --enable-mangling</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=86281">Bug 86281</a> - brw_meta_fast_clear (brw=brw&#64;entry=0x7fffd4097a08, fb=fb&#64;entry=0x7fffd40fa900, buffers=buffers&#64;entry=2, partial_clear=partial_clear&#64;entry=false)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91970">Bug 91970</a> - [BSW regression] dEQP-GLES3.functional.shaders.precision.int.highp_mul_vertex</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92072">Bug 92072</a> - Wine breakage since d082c5324 (st/mesa: don't call st_validate_state in BlitFramebuffer)</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Brian Paul (1):</p>
+<ul>
+  <li>st/mesa: try PIPE_BIND_RENDER_TARGET when choosing float texture formats</li>
+</ul>
+
+<p>Chris Wilson (1):</p>
+<ul>
+  <li>i965: Remove early release of DRI2 miptree</li>
+</ul>
+
+<p>Emil Velikov (4):</p>
+<ul>
+  <li>docs: add sha256 checksums for 10.6.8</li>
+  <li>cherry-ignore: add commit non applicable for 10.6</li>
+  <li>cherry-ignore: add commit non applicable for 10.6</li>
+  <li>Update version to 10.6.9</li>
+</ul>
+
+<p>Iago Toral Quiroga (1):</p>
+<ul>
+  <li>mesa: Fix GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE for default framebuffer.</li>
+</ul>
+
+<p>Ian Romanick (5):</p>
+<ul>
+  <li>t_dd_dmatmp: Make "count" actually be the count</li>
+  <li>t_dd_dmatmp: Clean up improper code formatting from previous patch</li>
+  <li>t_dd_dmatmp: Use '&amp; 3' instead of '% 4' everywhere</li>
+  <li>t_dd_dmatmp: Pull out common 'count -= count &amp; 3' code</li>
+  <li>t_dd_dmatmp: Use addition instead of subtraction in loop bounds</li>
+</ul>
+
+<p>Jeremy Huddleston (1):</p>
+<ul>
+  <li>configure.ac: Add support to enable read-only text segment on x86.</li>
+</ul>
+
+<p>Kristian Høgsberg Kristensen (1):</p>
+<ul>
+  <li>i965: Respect stride and subreg_offset for ATTR registers</li>
+</ul>
+
+<p>Kyle Brenneman (3):</p>
+<ul>
+  <li>glx: Fix build errors with --enable-mangling (v2)</li>
+  <li>mapi: Make _glapi_get_stub work with "gl" or "mgl" prefix.</li>
+  <li>glx: Don't hard-code the name "libGL.so.1" in driOpenDriver (v3)</li>
+</ul>
+
+<p>Leo Liu (1):</p>
+<ul>
+  <li>radeon/vce: fix vui time_scale zero error</li>
+</ul>
+
+<p>Marek Olšák (1):</p>
+<ul>
+  <li>st/mesa: fix front buffer regression after dropping st_validate_state in Blit</li>
+</ul>
+
+<p>Roland Scheidegger (1):</p>
+<ul>
+  <li>mesa: fix mipmap generation for immutable, compressed textures</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/11.0.0.html
+++ b/docs/relnotes/11.0.0.html
@@ -14,7 +14,7 @@
 <iframe src="../contents.html"></iframe>
 <div class="content">

-<h1>Mesa 11.0.0 Release Notes / TBD</h1>
+<h1>Mesa 11.0.0 Release Notes / September 12, 2015</h1>

 <p>
 Mesa 11.0.0 is a new development release.
@@ -33,7 +33,8 @@ because compatibility contexts are not supported.

 <h2>SHA256 checksums</h2>
 <pre>
-TBD.
+7d7e4ddffa3b162506efa01e2cc41e329caa4995336b92e5cc21f2e1fb36c1b3  mesa-11.0.0.tar.gz
+e095a3eb2eca9dfde7efca8946527c8ae20a0cc938a8c78debc7f158ad44af32  mesa-11.0.0.tar.xz
 </pre>


@@ -83,13 +84,175 @@ Note: some of the new features are only available with certain drivers.
 <li>EGL 1.5 on r600, radeonsi, nv50, nvc0</li>
 </ul>

+
 <h2>Bug fixes</h2>

-TBD.
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=51658">Bug 51658</a> - r200 (&amp; possibly radeon) DRI fixes for gnome shell on Mesa 8.0.3</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=65525">Bug 65525</a> - [llvmpipe] lp_scene.h:210:lp_scene_alloc: Assertion `size &lt;= (64 * 1024)' failed.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=66346">Bug 66346</a> - shader_query.cpp:49: error: invalid conversion from 'void*' to 'GLuint'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=73512">Bug 73512</a> - [clover] mesa.icd. should contain full path</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=73528">Bug 73528</a> - Deferred lighting in Second Life causes system hiccups and screen flickering</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=74329">Bug 74329</a> - Please expose OES_texture_float and OES_texture_half_float on the ES3 context</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=80500">Bug 80500</a> - Flickering shadows in unreleased title trace</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=82186">Bug 82186</a> - [r600g] BARTS GPU lockup with minecraft shaders</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=84225">Bug 84225</a> - Allow constant-index-expression sampler array indexing with GLSL-ES &lt; 300</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=84677">Bug 84677</a> - Triangle disappears with glPolygonMode GL_LINE</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=85252">Bug 85252</a> - Segfault in compiler while processing ternary operator with void arguments</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89131">Bug 89131</a> - [Bisected] Graphical corruption in Weston,  shows old framebuffer pieces</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90000">Bug 90000</a> - [i965 Bisected NIR] Piglit/gglean_fragprog1-z-write_test fail</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90073">Bug 90073</a> - Leaks in xcb_dri3_open_reply_fds() and get_render_node_from_id_path_tag</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90249">Bug 90249</a> - Fails to build egl_dri2 on osx</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90310">Bug 90310</a> - Fails to build gallium_dri.so at linking stage with clang because of multiple redefinitions</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90347">Bug 90347</a> - [NVE0+] Failure to insert texbar under some circumstances (causing bad colors in Terasology)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90466">Bug 90466</a> - arm: linker error ndefined reference to `nir_metadata_preserve'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90520">Bug 90520</a> - Register spilling clobbers registers used elsewhere in the shader</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90537">Bug 90537</a> - radeonsi bo/va conflict on RADEON_GEM_VA (rscreen-&gt;ws-&gt;buffer_from_handle returns NULL)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90547">Bug 90547</a> - [BDW/BSW/SKL Bisected]Piglit/glean&#64;vertprog1-rsq_test_2_(reciprocal_square_root_of_negative_value) fais</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90580">Bug 90580</a> - [HSW bisected] integer multiplication bug</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90600">Bug 90600</a> - IOError: [Errno 2] No such file or directory: 'gl_API.xml'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90621">Bug 90621</a> - Mesa fail to build from git</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90629">Bug 90629</a> - [i965] SIMD16 dual_source_blend assertion `src[i].file != GRF || src[i].width == dst.width' failed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90691">Bug 90691</a> - [BSW]Piglit/spec/nv_conditional_render/dlist fails intermittently</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90728">Bug 90728</a> - dvd playback with vlc and vdpau causes segmentation fault</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90734">Bug 90734</a> - glBufferSubData is corrupting data when buffer is &gt; 32k</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90748">Bug 90748</a> - [BDW Bisected]dEQP-GLES3.functional.fbo.completeness.renderable.texture.depth.rg_half_float_oes fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90749">Bug 90749</a> - [BDW Bisected]dEQP-GLES3.functional.rasterization.fbo.rbo_multisample_max.primitives.lines_wide fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90751">Bug 90751</a> - [BDW Bisected]dEQP-GLES3.functional.fbo.completeness.renderable.texture.stencil.stencil_index8 fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90797">Bug 90797</a> - [ALL bisected] Mesa change cause performance case manhattan fail.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90817">Bug 90817</a> - swrast fails to load with certain remote X servers</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90830">Bug 90830</a> - [bsw bisected regression] GPU hang for spec.arb_gpu_shader5.execution.sampler_array_indexing.vs-nonzero-base</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90839">Bug 90839</a> - [10.5.5/10.6 regression, bisected] PBO glDrawPixels no longer using blit fastpath</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90873">Bug 90873</a> - Kernel hang, TearFree On, Mate desktop environment</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90887">Bug 90887</a> - PhiMovesPass in register allocator broken</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90895">Bug 90895</a> - [IVB/HSW/BDW/BSW Bisected] GLB2.7 Egypt, GfxBench3.0 T-Rex &amp; ALU and many SynMark cases performance reduced by 10-23%</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90902">Bug 90902</a> - [bsw][regression] dEQP: &quot;Found invalid pixel values&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90903">Bug 90903</a> - egl_dri2.c:dri2_load fails to load libglapi on osx</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90904">Bug 90904</a> - OSX: EXC_BAD_ACCESS when using translate_sse + gallium + softpipe/llvmpipe</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90905">Bug 90905</a> - mesa: Finish subdir-objects transition</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90925">Bug 90925</a> - &quot;high fidelity&quot;: Segfault in _mesa_program_resource_find_name</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91022">Bug 91022</a> - [g45 g965 bisected] assertions generated from textureGrad cube samplers fix</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91047">Bug 91047</a> - [SNB Bisected] Messed up Fog in Super Smash Bros. Melee in Dolphin</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91056">Bug 91056</a> - The Bard's Tale (2005, native)  has rendering issues</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91077">Bug 91077</a> - dri2_glx.c:1186: undefined reference to `loader_open_device'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91099">Bug 91099</a> - [llvmpipe] piglit glsl-max-varyings &gt;max_varying_components regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91101">Bug 91101</a> - [softpipe] piglit glsl-1.50&#64;execution&#64;geometry&#64;max-input-components regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91117">Bug 91117</a> - Nimbus (running in wine) has rendering issues, objects are semi-transparent</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91124">Bug 91124</a> - Civilization V (in Wine) has rendering issues: text missing, menu bar corrupted</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91173">Bug 91173</a> - Oddworld: Stranger's Wrath HD: disfigured models in wrong colors</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91193">Bug 91193</a> - [290x] Dota2 reborn ingame rendering breaks with git-af4b9c7</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91222">Bug 91222</a> - lp_test_format regression on CentOS 7</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91226">Bug 91226</a> - Crash in glLinkProgram (NEW)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91231">Bug 91231</a> - [NV92] Psychonauts (native) segfaults on start when DRI3 enabled</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91254">Bug 91254</a> - (regresion) video using VA-API on Intel slow and freeze system with mesa 10.6 or 10.6.1</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91290">Bug 91290</a> - SIGSEGV glcpp/glcpp-parse.y:1077</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91292">Bug 91292</a> - [BDW+] glVertexAttribDivisor not working in combination with glPolygonMode</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91337">Bug 91337</a> - OSMesaGetProcAdress(&quot;OSMesaPixelStore&quot;) returns nil</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91418">Bug 91418</a> - Visual Studio 2015 vsnprintf build error</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91425">Bug 91425</a> - [regression, bisected] Piglit spec/ext_packed_float/ getteximage-invalid-format-for-packed-type fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91441">Bug 91441</a> - make check DispatchSanity_test.GL30 regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91444">Bug 91444</a> - regression bisected radeonsi: don't change pipe_resource in resource_copy_region</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91461">Bug 91461</a> - gl_TessLevel* writes have no effect for all but the last TCS invocation</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91513">Bug 91513</a> - [IVB/HSW/BDW/SKL Bisected] Lightsmark performance reduced by 7%-10%</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91526">Bug 91526</a> - World of Warcraft (on Wine) has UI corruption with nouveau</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91544">Bug 91544</a> - [i965, regression, bisected] regression of several tests in 93977d3a151675946c03e</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91551">Bug 91551</a> - DXTn compressed normal maps produce severe artifacts on all NV5x and NVDx chipsets</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91570">Bug 91570</a> - Upgrading mesa to 10.6 causes segfault in OpenGL applications with GeForce4 MX 440 / AGP 8X</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91591">Bug 91591</a> - rounding.h:102:2: error: #error &quot;Unsupported or undefined LONG_BIT&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91610">Bug 91610</a> - [BSW] GPU hang for spec.shaders.point-vertex-id gl_instanceid divisor</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91673">Bug 91673</a> - Segfault when calling glTexSubImage2D on storage texture to bound FBO</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91726">Bug 91726</a> - R600 asserts in tgsi_cmp/make_src_for_op3</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91847">Bug 91847</a> - glGenerateTextureMipmap not working (no errors) unless glActiveTexture(GL_TEXTURE1) is called before</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91857">Bug 91857</a> - Mesa 10.6.3 linker is slow</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91881">Bug 91881</a> - regression: GPU lockups since mesa-11.0.0_rc1 on RV620 (r600) driver</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91890">Bug 91890</a> - [nve7] witcher2: blurry image &amp; DATA_ERRORs (class 0xa097 mthd 0x2380/0x238c)</li>
+
+</ul>
+

 <h2>Changes</h2>

-TBD.
+<li>Removed the EGL loader from the Linux SCons build.</li>

 </div>
 </body>
--- a/docs/relnotes/11.0.1.html
+++ b/docs/relnotes/11.0.1.html
@@ -0,0 +1,134 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 11.0.1 Release Notes / September 26, 2015</h1>
+
+<p>
+Mesa 11.0.1 is a bug fix release which fixes bugs found since the 11.0.0 release.
+</p>
+<p>
+Mesa 11.0.1 implements the OpenGL 4.1 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.1.  OpenGL
+4.1 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+6dab262877e12c0546a0e2970c6835a0f217e6d4026ccecb3cd5dd733d1ce867  mesa-11.0.1.tar.gz
+43d0dfcd1f1e36f07f8228cd76d90175d3fc74c1ed25d7071794a100a98ef2a6  mesa-11.0.1.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=38109">Bug 38109</a> - i915 driver crashes if too few vertices are submitted (Mesa 7.10.2)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91114">Bug 91114</a> - ES3-CTS.gtf.GL3Tests.shadow.shadow_execution_vert fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91716">Bug 91716</a> - [bisected] piglit.shaders.glsl-vs-int-attrib regresses on 32 bit BYT, HSW, IVB, SNB</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91719">Bug 91719</a> - [SNB,HSW,BYT] dEQP regressions associated with using NIR for vertex shaders</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92009">Bug 92009</a> - ES3-CTS.gtf.GL3Tests.packed_pixels.packed_pixels fails</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Antia Puentes (2):</p>
+<ul>
+  <li>i965/vec4: Fix saturation errors when coalescing registers</li>
+  <li>i965/vec4_nir: Load constants as integers</li>
+</ul>
+
+<p>Anuj Phogat (1):</p>
+<ul>
+  <li>meta: Abort meta pbo path if TexSubImage need signed unsigned conversion</li>
+</ul>
+
+<p>Emil Velikov (2):</p>
+<ul>
+  <li>docs: add sha256 checksums for 11.0.0</li>
+  <li>Update version to 11.0.1</li>
+</ul>
+
+<p>Iago Toral Quiroga (1):</p>
+<ul>
+  <li>mesa: Fix GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE for default framebuffer.</li>
+</ul>
+
+<p>Ian Romanick (5):</p>
+<ul>
+  <li>t_dd_dmatmp: Make "count" actually be the count</li>
+  <li>t_dd_dmatmp: Clean up improper code formatting from previous patch</li>
+  <li>t_dd_dmatmp: Use '&amp; 3' instead of '% 4' everywhere</li>
+  <li>t_dd_dmatmp: Pull out common 'count -= count &amp; 3' code</li>
+  <li>t_dd_dmatmp: Use addition instead of subtraction in loop bounds</li>
+</ul>
+
+<p>Ilia Mirkin (6):</p>
+<ul>
+  <li>st/mesa: avoid integer overflows with buffers &gt;= 512MB</li>
+  <li>nv50, nvc0: fix max texture buffer size to 128M elements</li>
+  <li>freedreno/a3xx: fix blending of L8 format</li>
+  <li>nv50,nvc0: detect underlying resource changes and update tic</li>
+  <li>nv50,nvc0: flush texture cache in presence of coherent bufs</li>
+  <li>radeonsi: load fmask ptr relative to the resources array</li>
+</ul>
+
+<p>Jason Ekstrand (2):</p>
+<ul>
+  <li>nir: Fix a bunch of ralloc parenting errors</li>
+  <li>i965/vec4: Don't reswizzle hardware registers</li>
+</ul>
+
+<p>Jeremy Huddleston (1):</p>
+<ul>
+  <li>configure.ac: Add support to enable read-only text segment on x86.</li>
+</ul>
+
+<p>Ray Strode (1):</p>
+<ul>
+  <li>gbm: convert gbm bo format to fourcc format on dma-buf import</li>
+</ul>
+
+<p>Tapani Pälli (2):</p>
+<ul>
+  <li>mesa: fix errors when reading depth with glReadPixels</li>
+  <li>i965: fix textureGrad for cubemaps</li>
+</ul>
+
+<p>Ulrich Weigand (1):</p>
+<ul>
+  <li>mesa: Fix texture compression on big-endian systems</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/11.0.2.html
+++ b/docs/relnotes/11.0.2.html
@@ -0,0 +1,85 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 11.0.2 Release Notes / September 28, 2015</h1>
+
+<p>
+Mesa 11.0.2 is a bug fix release which fixes bugs found since the 11.0.1 release.
+</p>
+<p>
+Mesa 11.0.2 implements the OpenGL 4.1 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.1.  OpenGL
+4.1 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+45170773500d6ae2f9eb93fc85efee69f7c97084411ada4eddf92f78bca56d20  mesa-11.0.2.tar.gz
+fce11fb27eb87adf1e620a76455d635c6136dfa49ae58c53b34ef8d0c7b7eae4  mesa-11.0.2.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91582">Bug 91582</a> - [bisected] Regression in DEQP gles2.functional.negative_api.texture.texsubimage2d_neg_offset</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91970">Bug 91970</a> - [BSW regression] dEQP-GLES3.functional.shaders.precision.int.highp_mul_vertex</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92095">Bug 92095</a> - [Regression, bisected] arb_shader_atomic_counters.compiler.builtins.frag</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Eduardo Lima Mitev (3):</p>
+<ul>
+  <li>mesa: Fix order of format+type and internal format checks for glTexImageXD ops</li>
+  <li>mesa: Move _mesa_base_tex_format() from teximage to glformats files</li>
+  <li>mesa: Use the effective internal format instead for validation</li>
+</ul>
+
+<p>Emil Velikov (2):</p>
+<ul>
+  <li>docs: add sha256 checksums for 11.0.1</li>
+  <li>Update version to 11.0.2</li>
+</ul>
+
+<p>Kristian Høgsberg Kristensen (1):</p>
+<ul>
+  <li>i965: Respect stride and subreg_offset for ATTR registers</li>
+</ul>
+
+<p>Matt Turner (1):</p>
+<ul>
+  <li>glsl: Expose gl_MaxTess{Control,Evaluation}AtomicCounters.</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -0,0 +1,65 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 11.1.0 Release Notes / TBD</h1>
+
+<p>
+Mesa 11.1.0 is a new development release.
+People who are concerned with stability and reliability should stick
+with a previous release or wait for Mesa 11.1.1.
+</p>
+<p>
+Mesa 11.1.0 implements the OpenGL 4.1 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.1.  OpenGL
+4.1 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+TBD.
+</pre>
+
+
+<h2>New features</h2>
+
+<p>
+Note: some of the new features are only available with certain drivers.
+</p>
+
+<ul>
+<li>GL_ARB_blend_func_extended on freedreno (a3xx)</li>
+<li>GL_ARB_shader_storage_buffer_object on i965</li>
+<li>GL_ARB_shader_texture_image_samples on i965, nv50, nvc0, r600, radeonsi</li>
+<li>GL_ARB_texture_barrier / GL_NV_texture_barrier on i965</li>
+<li>GL_ARB_texture_query_lod on softpipe</li>
+<li>GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba chips</li>
+</ul>
+
+<h2>Bug fixes</h2>
+
+TBD.
+
+<h2>Changes</h2>
+
+TBD.
+
+</div>
+</body>
+</html>
--- a/docs/shading.html
+++ b/docs/shading.html
@@ -63,6 +63,20 @@ execution.  These are generally used for debugging.
 Example:  export MESA_GLSL=dump,nopt
 </p>

+<p>
+Shaders can be dumped and replaced on runtime for debugging purposes. Mesa 
+needs to be configured with '--with-sha1' to enable this functionality. This 
+feature is not currently supported by SCons build.
+
+This is controlled via following environment variables:
+<ul>
+<li><b>MESA_SHADER_DUMP_PATH</b> - path where shader sources are dumped
+<li><b>MESA_SHADER_READ_PATH</b> - path where replacement shaders are read
+</ul>
+Note, path set must exist before running for dumping or replacing to work. 
+When both are set, these paths should be different so the dumped shaders do 
+not clobber the replacement shaders.
+</p>

 <h2 id="support">GLSL Version</h2>

--- a/docs/vmware-guest.html
+++ b/docs/vmware-guest.html
@@ -26,6 +26,31 @@ VMware Workstation running on Linux or Windows and VMware Fusion running on
 MacOS are all supported.
 </p>

+<p>
+With the August 2015 Workstation 12 / Fusion 8 releases, OpenGL 3.3
+is supported in the guest.
+This requires:
+<ul>
+<li>The VM is configured for virtual hardware version 12.
+<li>The host OS, GPU and graphics driver supports DX11 (Windows) or
+    OpenGL 4.0 (Linux, Mac)
+<li>On Linux, the vmwgfx kernel module must be version 2.9.0 or later.
+<li>A recent version of Mesa with the updated svga gallium driver.
+</ul>
+</p>
+
+<p>
+Otherwise, OpenGL 2.1 is supported.
+</p>
+
+<p>
+OpenGL 3.3 support can be disabled by setting the environment variable
+SVGA_VGPU10=0.
+You will then have OpenGL 2.1 support.
+This may be useful to work around application bugs (such as incorrect use
+of the OpenGL 3.x core profile).
+</p>
+
 <p>
 Most modern Linux distros include the SVGA3D driver so end users shouldn't
 be concerned with this information.
@@ -227,6 +252,16 @@ If you don't see this, try setting this environment variable:
 then rerun glxinfo and examine the output for error messages.
 </p>

+<p>
+If OpenGL 3.3 is not working (you only get OpenGL 2.1):
+</p>
+<ul>
+<li>Make sure the VM uses hardware version 12.
+<li>Make sure the vmwgfx kernel module is version 2.9.0 or later.
+<li>Check the vmware.log file for errors.
+<li>Run 'dmesg | grep vmwgfx' and look for "DX: yes".
+
+
 </div>
 </body>
 </html>
--- a/include/c11/threads_posix.h
+++ b/include/c11/threads_posix.h
@@ -102,9 +102,8 @@ call_once(once_flag *flag, void (*func)(void))
 static inline int
 cnd_broadcast(cnd_t *cond)
 {
-    if (!cond) return thrd_error;
-    pthread_cond_broadcast(cond);
-    return thrd_success;
+    assert(cond != NULL);
+    return (pthread_cond_broadcast(cond) == 0) ? thrd_success : thrd_error;
 }

 // 7.25.3.2
@@ -119,18 +118,16 @@ cnd_destroy(cnd_t *cond)
 static inline int
 cnd_init(cnd_t *cond)
 {
-    if (!cond) return thrd_error;
-    pthread_cond_init(cond, NULL);
-    return thrd_success;
+    assert(cond != NULL);
+    return (pthread_cond_init(cond, NULL) == 0) ? thrd_success : thrd_error;
 }

 // 7.25.3.4
 static inline int
 cnd_signal(cnd_t *cond)
 {
-    if (!cond) return thrd_error;
-    pthread_cond_signal(cond);
-    return thrd_success;
+    assert(cond != NULL);
+    return (pthread_cond_signal(cond) == 0) ? thrd_success : thrd_error;
 }

 // 7.25.3.5
@@ -139,7 +136,8 @@ cnd_timedwait(cnd_t *cond, mtx_t *mtx, const xtime *xt)
 {
    struct timespec abs_time;
    int rt;
-    if (!cond || !mtx || !xt) return thrd_error;
+    assert(mtx != NULL);
+    assert(cond != NULL);
    rt = pthread_cond_timedwait(cond, mtx, &abs_time);
    if (rt == ETIMEDOUT)
        return thrd_busy;
@@ -150,9 +148,9 @@ cnd_timedwait(cnd_t *cond, mtx_t *mtx, const xtime *xt)
 static inline int
 cnd_wait(cnd_t *cond, mtx_t *mtx)
 {
-    if (!cond || !mtx) return thrd_error;
-    pthread_cond_wait(cond, mtx);
-    return thrd_success;
+    assert(mtx != NULL);
+    assert(cond != NULL);
+    return (pthread_cond_wait(cond, mtx) == 0) ? thrd_success : thrd_error;
 }


@@ -161,7 +159,7 @@ cnd_wait(cnd_t *cond, mtx_t *mtx)
 static inline void
 mtx_destroy(mtx_t *mtx)
 {
-    assert(mtx);
+    assert(mtx != NULL);
    pthread_mutex_destroy(mtx);
 }

@@ -170,7 +168,7 @@ static inline int
 mtx_init(mtx_t *mtx, int type)
 {
    pthread_mutexattr_t attr;
-    if (!mtx) return thrd_error;
+    assert(mtx != NULL);
    if (type != mtx_plain && type != mtx_timed && type != mtx_try
      && type != (mtx_plain|mtx_recursive)
      && type != (mtx_timed|mtx_recursive)
@@ -188,9 +186,8 @@ mtx_init(mtx_t *mtx, int type)
 static inline int
 mtx_lock(mtx_t *mtx)
 {
-    if (!mtx) return thrd_error;
-    pthread_mutex_lock(mtx);
-    return thrd_success;
+    assert(mtx != NULL);
+    return (pthread_mutex_lock(mtx) == 0) ? thrd_success : thrd_error;
 }

 static inline int
@@ -203,7 +200,9 @@ thrd_yield(void);
 static inline int
 mtx_timedlock(mtx_t *mtx, const xtime *xt)
 {
-    if (!mtx || !xt) return thrd_error;
+    assert(mtx != NULL);
+    assert(xt != NULL);
+
    {
 #ifdef EMULATED_THREADS_USE_NATIVE_TIMEDLOCK
    struct timespec ts;
@@ -233,7 +232,7 @@ mtx_timedlock(mtx_t *mtx, const xtime *xt)
 static inline int
 mtx_trylock(mtx_t *mtx)
 {
-    if (!mtx) return thrd_error;
+    assert(mtx != NULL);
    return (pthread_mutex_trylock(mtx) == 0) ? thrd_success : thrd_busy;
 }

@@ -241,9 +240,8 @@ mtx_trylock(mtx_t *mtx)
 static inline int
 mtx_unlock(mtx_t *mtx)
 {
-    if (!mtx) return thrd_error;
-    pthread_mutex_unlock(mtx);
-    return thrd_success;
+    assert(mtx != NULL);
+    return (pthread_mutex_unlock(mtx) == 0) ? thrd_success : thrd_error;
 }


@@ -253,7 +251,7 @@ static inline int
 thrd_create(thrd_t *thr, thrd_start_t func, void *arg)
 {
    struct impl_thrd_param *pack;
-    if (!thr) return thrd_error;
+    assert(thr != NULL);
    pack = (struct impl_thrd_param *)malloc(sizeof(struct impl_thrd_param));
    if (!pack) return thrd_nomem;
    pack->func = func;
@@ -329,7 +327,7 @@ thrd_yield(void)
 static inline int
 tss_create(tss_t *key, tss_dtor_t dtor)
 {
-    if (!key) return thrd_error;
+    assert(key != NULL);
    return (pthread_key_create(key, dtor) == 0) ? thrd_success : thrd_error;
 }

--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -62,6 +62,7 @@ AM_CPPFLAGS = \
 noinst_LTLIBRARIES = libglsl_util.la

 libglsl_util_la_SOURCES = \
+	glsl/shader_enums.c \
 	mesa/main/imports.c \
 	mesa/program/prog_hash_table.c \
 	mesa/program/symbol_table.c \
--- a/src/egl/SConscript
+++ b/src/egl/SConscript
@@ -8,6 +8,7 @@ env = env.Clone()

 env.Append(CPPPATH = [
    '#/include',
+    '#/include/HaikuGL',
    '#/src/egl/main',
    '#/src',
 ])
@@ -15,7 +16,6 @@ env.Append(CPPPATH = [

 # parse Makefile.sources
 egl_sources = env.ParseSourceList('Makefile.sources', 'LIBEGL_C_FILES')
-egl_sources.append(env.ParseSourceList('Makefile.sources', 'dri2_backend_core_FILES'))

 env.Append(CPPDEFINES = [
    '_EGL_NATIVE_PLATFORM=_EGL_PLATFORM_HAIKU',
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -27,6 +27,7 @@

 #define WL_HIDE_DEPRECATED

+#include <stdbool.h>
 #include <stdint.h>
 #include <stdbool.h>
 #include <stdlib.h>
@@ -588,7 +589,8 @@ dri2_setup_screen(_EGLDisplay *disp)
                                   __DRI2_RENDERER_HAS_FRAMEBUFFER_SRGB))
      disp->Extensions.KHR_gl_colorspace = EGL_TRUE;

-   if (dri2_dpy->dri2 && dri2_dpy->dri2->base.version >= 3) {
+   if ((dri2_dpy->dri2 && dri2_dpy->dri2->base.version >= 3) ||
+       (dri2_dpy->swrast && dri2_dpy->swrast->base.version >= 3)) {
      disp->Extensions.KHR_create_context = EGL_TRUE;

      if (dri2_dpy->robustness)
@@ -784,7 +786,7 @@ dri2_terminate(_EGLDriver *drv, _EGLDisplay *disp)

   if (dri2_dpy->own_dri_screen)
      dri2_dpy->core->destroyScreen(dri2_dpy->dri_screen);
-   if (dri2_dpy->fd)
+   if (dri2_dpy->fd >= 0)
      close(dri2_dpy->fd);
   if (dri2_dpy->driver)
      dlclose(dri2_dpy->driver);
@@ -902,6 +904,55 @@ dri2_create_context_attribs_error(int dri_error)
   _eglError(egl_error, "dri2_create_context");
 }

+static bool
+dri2_fill_context_attribs(struct dri2_egl_context *dri2_ctx,
+                          struct dri2_egl_display *dri2_dpy,
+                          uint32_t *ctx_attribs,
+                          unsigned *num_attribs)
+{
+   int pos = 0;
+
+   assert(*num_attribs >= 8);
+
+   ctx_attribs[pos++] = __DRI_CTX_ATTRIB_MAJOR_VERSION;
+   ctx_attribs[pos++] = dri2_ctx->base.ClientMajorVersion;
+   ctx_attribs[pos++] = __DRI_CTX_ATTRIB_MINOR_VERSION;
+   ctx_attribs[pos++] = dri2_ctx->base.ClientMinorVersion;
+
+   if (dri2_ctx->base.Flags != 0) {
+      /* If the implementation doesn't support the __DRI2_ROBUSTNESS
+       * extension, don't even try to send it the robust-access flag.
+       * It may explode.  Instead, generate the required EGL error here.
+       */
+      if ((dri2_ctx->base.Flags & EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR) != 0
+            && !dri2_dpy->robustness) {
+         _eglError(EGL_BAD_MATCH, "eglCreateContext");
+         return false;
+      }
+
+      ctx_attribs[pos++] = __DRI_CTX_ATTRIB_FLAGS;
+      ctx_attribs[pos++] = dri2_ctx->base.Flags;
+   }
+
+   if (dri2_ctx->base.ResetNotificationStrategy != EGL_NO_RESET_NOTIFICATION_KHR) {
+      /* If the implementation doesn't support the __DRI2_ROBUSTNESS
+       * extension, don't even try to send it a reset strategy.  It may
+       * explode.  Instead, generate the required EGL error here.
+       */
+      if (!dri2_dpy->robustness) {
+         _eglError(EGL_BAD_CONFIG, "eglCreateContext");
+         return false;
+      }
+
+      ctx_attribs[pos++] = __DRI_CTX_ATTRIB_RESET_STRATEGY;
+      ctx_attribs[pos++] = __DRI_CTX_RESET_LOSE_CONTEXT;
+   }
+
+   *num_attribs = pos;
+
+   return true;
+}
+
 /**
 * Called via eglCreateContext(), drv->API.CreateContext().
 */
@@ -987,44 +1038,12 @@ dri2_create_context(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf,
   if (dri2_dpy->dri2) {
      if (dri2_dpy->dri2->base.version >= 3) {
         unsigned error;
-         unsigned num_attribs = 0;
+         unsigned num_attribs = 8;
         uint32_t ctx_attribs[8];

-         ctx_attribs[num_attribs++] = __DRI_CTX_ATTRIB_MAJOR_VERSION;
-         ctx_attribs[num_attribs++] = dri2_ctx->base.ClientMajorVersion;
-         ctx_attribs[num_attribs++] = __DRI_CTX_ATTRIB_MINOR_VERSION;
-         ctx_attribs[num_attribs++] = dri2_ctx->base.ClientMinorVersion;
-
-         if (dri2_ctx->base.Flags != 0) {
-            /* If the implementation doesn't support the __DRI2_ROBUSTNESS
-             * extension, don't even try to send it the robust-access flag.
-             * It may explode.  Instead, generate the required EGL error here.
-             */
-            if ((dri2_ctx->base.Flags & EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR) != 0
-                && !dri2_dpy->robustness) {
-               _eglError(EGL_BAD_MATCH, "eglCreateContext");
-               goto cleanup;
-            }
-
-            ctx_attribs[num_attribs++] = __DRI_CTX_ATTRIB_FLAGS;
-            ctx_attribs[num_attribs++] = dri2_ctx->base.Flags;
-         }
-
-         if (dri2_ctx->base.ResetNotificationStrategy != EGL_NO_RESET_NOTIFICATION_KHR) {
-            /* If the implementation doesn't support the __DRI2_ROBUSTNESS
-             * extension, don't even try to send it a reset strategy.  It may
-             * explode.  Instead, generate the required EGL error here.
-             */
-            if (!dri2_dpy->robustness) {
-               _eglError(EGL_BAD_CONFIG, "eglCreateContext");
-               goto cleanup;
-            }
-
-            ctx_attribs[num_attribs++] = __DRI_CTX_ATTRIB_RESET_STRATEGY;
-            ctx_attribs[num_attribs++] = __DRI_CTX_RESET_LOSE_CONTEXT;
-         }
-
-         assert(num_attribs <= ARRAY_SIZE(ctx_attribs));
+         if (!dri2_fill_context_attribs(dri2_ctx, dri2_dpy, ctx_attribs,
+                                        &num_attribs))
+            goto cleanup;

 	 dri2_ctx->dri_context =
 	    dri2_dpy->dri2->createContextAttribs(dri2_dpy->dri_screen,
@@ -1046,12 +1065,33 @@ dri2_create_context(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf,
      }
   } else {
      assert(dri2_dpy->swrast);
-      dri2_ctx->dri_context =
-         dri2_dpy->swrast->createNewContextForAPI(dri2_dpy->dri_screen,
-                                                  api,
-                                                  dri_config,
-                                                  shared,
-                                                  dri2_ctx);
+      if (dri2_dpy->swrast->base.version >= 3) {
+         unsigned error;
+         unsigned num_attribs = 8;
+         uint32_t ctx_attribs[8];
+
+         if (!dri2_fill_context_attribs(dri2_ctx, dri2_dpy, ctx_attribs,
+                                        &num_attribs))
+            goto cleanup;
+
+         dri2_ctx->dri_context =
+            dri2_dpy->swrast->createContextAttribs(dri2_dpy->dri_screen,
+                                                   api,
+                                                   dri_config,
+                                                   shared,
+                                                   num_attribs / 2,
+                                                   ctx_attribs,
+                                                   & error,
+                                                   dri2_ctx);
+         dri2_create_context_attribs_error(error);
+      } else {
+         dri2_ctx->dri_context =
+            dri2_dpy->swrast->createNewContextForAPI(dri2_dpy->dri_screen,
+                                                     api,
+                                                     dri_config,
+                                                     shared,
+                                                     dri2_ctx);
+      }
   }

   if (!dri2_ctx->dri_context)
@@ -2384,13 +2424,18 @@ dri2_client_wait_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync,
   unsigned wait_flags = 0;
   EGLint ret = EGL_CONDITION_SATISFIED_KHR;

-   if (flags & EGL_SYNC_FLUSH_COMMANDS_BIT_KHR)
+   /* The EGL_KHR_fence_sync spec states:
+    *
+    *    "If no context is current for the bound API,
+    *     the EGL_SYNC_FLUSH_COMMANDS_BIT_KHR bit is ignored.
+    */
+   if (dri2_ctx && flags & EGL_SYNC_FLUSH_COMMANDS_BIT_KHR)
      wait_flags |= __DRI2_FENCE_FLAG_FLUSH_COMMANDS;

   /* the sync object should take a reference while waiting */
   dri2_egl_ref_sync(dri2_sync);

-   if (dri2_dpy->fence->client_wait_sync(dri2_ctx->dri_context,
+   if (dri2_dpy->fence->client_wait_sync(dri2_ctx ? dri2_ctx->dri_context : NULL,
                                         dri2_sync->fence, wait_flags,
                                         timeout))
      dri2_sync->base.SyncStatus = EGL_SIGNALED_KHR;
--- a/src/egl/drivers/dri2/platform_drm.c
+++ b/src/egl/drivers/dri2/platform_drm.c
@@ -623,27 +623,19 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp)
      dri2_dpy->own_device = 1;
      gbm = gbm_create_device(fd);
      if (gbm == NULL)
-         return EGL_FALSE;
+         goto cleanup;
+   } else {
+      fd = fcntl(gbm_device_get_fd(gbm), F_DUPFD_CLOEXEC, 3);
+      if (fd < 0)
+         goto cleanup;
   }

-   if (strcmp(gbm_device_get_backend_name(gbm), "drm") != 0) {
-      free(dri2_dpy);
-      return EGL_FALSE;
-   }
+   if (strcmp(gbm_device_get_backend_name(gbm), "drm") != 0)
+      goto cleanup;

   dri2_dpy->gbm_dri = gbm_dri_device(gbm);
-   if (dri2_dpy->gbm_dri->base.type != GBM_DRM_DRIVER_TYPE_DRI) {
-      free(dri2_dpy);
-      return EGL_FALSE;
-   }
-
-   if (fd < 0) {
-      fd = fcntl(gbm_device_get_fd(gbm), F_DUPFD_CLOEXEC, 3);
-      if (fd < 0) {
-         free(dri2_dpy);
-         return EGL_FALSE;
-      }
-   }
+   if (dri2_dpy->gbm_dri->base.type != GBM_DRM_DRIVER_TYPE_DRI)
+      goto cleanup;

   dri2_dpy->fd = fd;
   dri2_dpy->device_name = loader_get_device_name_for_fd(dri2_dpy->fd);
@@ -727,4 +719,11 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp)
   dri2_dpy->vtbl = &dri2_drm_display_vtbl;

   return EGL_TRUE;
+
+cleanup:
+   if (fd >= 0)
+      close(fd);
+
+   free(dri2_dpy);
+   return EGL_FALSE;
 }
--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -1804,6 +1804,7 @@ dri2_initialize_wayland_swrast(_EGLDriver *drv, _EGLDisplay *disp)
   if (roundtrip(dri2_dpy) < 0 || dri2_dpy->formats == 0)
      goto cleanup_shm;

+   dri2_dpy->fd = -1;
   dri2_dpy->driver_name = strdup("swrast");
   if (!dri2_load_driver_swrast(disp))
      goto cleanup_shm;
--- a/src/egl/drivers/dri2/platform_x11.c
+++ b/src/egl/drivers/dri2/platform_x11.c
@@ -1161,6 +1161,7 @@ dri2_initialize_x11_swrast(_EGLDriver *drv, _EGLDisplay *disp)
    * Every hardware driver_name is set using strdup. Doing the same in
    * here will allow is to simply free the memory at dri2_terminate().
    */
+   dri2_dpy->fd = -1;
   dri2_dpy->driver_name = strdup("swrast");
   if (!dri2_load_driver_swrast(disp))
      goto cleanup_conn;
--- a/src/egl/main/eglcontext.c
+++ b/src/egl/main/eglcontext.c
@@ -152,12 +152,51 @@ _eglParseContextAttribList(_EGLContext *ctx, _EGLDisplay *dpy,

         /* The EGL_KHR_create_context spec says:
          *
-          *     "Flags are only defined for OpenGL context creation, and
-          *     specifying a flags value other than zero for other types of
-          *     contexts, including OpenGL ES contexts, will generate an
-          *     error."
+          *     "If the EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR flag bit is set in
+          *     EGL_CONTEXT_FLAGS_KHR, then a <debug context> will be created.
+          *     [...]
+          *     In some cases a debug context may be identical to a non-debug
+          *     context. This bit is supported for OpenGL and OpenGL ES
+          *     contexts."
          */
-         if (api != EGL_OPENGL_API && val != 0) {
+         if ((val & EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR) &&
+             (api != EGL_OPENGL_API && api != EGL_OPENGL_ES_API)) {
+            err = EGL_BAD_ATTRIBUTE;
+            break;
+         }
+
+         /* The EGL_KHR_create_context spec says:
+          *
+          *     "If the EGL_CONTEXT_OPENGL_FORWARD_COMPATIBLE_BIT_KHR flag bit
+          *     is set in EGL_CONTEXT_FLAGS_KHR, then a <forward-compatible>
+          *     context will be created. Forward-compatible contexts are
+          *     defined only for OpenGL versions 3.0 and later. They must not
+          *     support functionality marked as <deprecated> by that version of
+          *     the API, while a non-forward-compatible context must support
+          *     all functionality in that version, deprecated or not. This bit
+          *     is supported for OpenGL contexts, and requesting a
+          *     forward-compatible context for OpenGL versions less than 3.0
+          *     will generate an error."
+          */
+         if ((val & EGL_CONTEXT_OPENGL_FORWARD_COMPATIBLE_BIT_KHR) &&
+             (api != EGL_OPENGL_API || ctx->ClientMajorVersion < 3)) {
+            err = EGL_BAD_ATTRIBUTE;
+            break;
+         }
+
+         /* The EGL_KHR_create_context_spec says:
+          *
+          *     "If the EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR bit is set in
+          *     EGL_CONTEXT_FLAGS_KHR, then a context supporting <robust buffer
+          *     access> will be created. Robust buffer access is defined in the
+          *     GL_ARB_robustness extension specification, and the resulting
+          *     context must also support either the GL_ARB_robustness
+          *     extension, or a version of OpenGL incorporating equivalent
+          *     functionality. This bit is supported for OpenGL contexts.
+          */
+         if ((val & EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR) &&
+             (api != EGL_OPENGL_API ||
+              !dpy->Extensions.EXT_create_context_robustness)) {
            err = EGL_BAD_ATTRIBUTE;
            break;
         }
--- a/src/egl/wayland/wayland-drm/wayland-drm.c
+++ b/src/egl/wayland/wayland-drm/wayland-drm.c
@@ -197,7 +197,7 @@ drm_authenticate(struct wl_client *client,
 		wl_resource_post_event(resource, WL_DRM_AUTHENTICATED);
 }

-const static struct wl_drm_interface drm_interface = {
+static const struct wl_drm_interface drm_interface = {
 	drm_authenticate,
 	drm_create_buffer,
        drm_create_planar_buffer,
--- a/src/egl/wayland/wayland-egl/wayland-egl.c
+++ b/src/egl/wayland/wayland-egl/wayland-egl.c
@@ -1,3 +1,32 @@
+/*
+ * Copyright © 2011 Kristian Høgsberg
+ * Copyright © 2011 Benjamin Franzke
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Kristian Høgsberg <krh@bitplanet.net>
+ *    Benjamin Franzke <benjaminfranzke@googlemail.com>
+ */
+
 #include <stdlib.h>

 #include <wayland-client.h>
--- a/src/gallium/Makefile.am
+++ b/src/gallium/Makefile.am
@@ -11,6 +11,7 @@ SUBDIRS += auxiliary
 ##

 SUBDIRS += \
+	drivers/ddebug \
 	drivers/noop \
 	drivers/trace \
 	drivers/rbug
--- a/src/gallium/auxiliary/Makefile.am
+++ b/src/gallium/auxiliary/Makefile.am
@@ -38,18 +38,23 @@ libgallium_la_SOURCES += \

 endif

-indices/u_indices_gen.c: $(srcdir)/indices/u_indices_gen.py
-	$(AM_V_at)$(MKDIR_P) indices
-	$(AM_V_GEN) $(PYTHON2) $< > $@
+MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
+PYTHON_GEN =  $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)

-indices/u_unfilled_gen.c: $(srcdir)/indices/u_unfilled_gen.py
-	$(AM_V_at)$(MKDIR_P) indices
-	$(AM_V_GEN) $(PYTHON2) $< > $@
+indices/u_indices_gen.c: indices/u_indices_gen.py
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/indices/u_indices_gen.py > $@

-util/u_format_table.c: $(srcdir)/util/u_format_table.py $(srcdir)/util/u_format_pack.py $(srcdir)/util/u_format_parse.py $(srcdir)/util/u_format.csv
-	$(AM_V_at)$(MKDIR_P) util
-	$(AM_V_GEN) $(PYTHON2) $(srcdir)/util/u_format_table.py $(srcdir)/util/u_format.csv > $@
+indices/u_unfilled_gen.c: indices/u_unfilled_gen.py
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/indices/u_unfilled_gen.py > $@

+util/u_format_table.c: util/u_format_table.py \
+                       util/u_format_pack.py \
+                       util/u_format_parse.py \
+                       util/u_format.csv
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/util/u_format_table.py $(srcdir)/util/u_format.csv > $@

 noinst_LTLIBRARIES += libgalliumvl_stub.la
 libgalliumvl_stub_la_SOURCES = \
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -129,6 +129,8 @@ C_SOURCES := \
 	rtasm/rtasm_execmem.h \
 	rtasm/rtasm_x86sse.c \
 	rtasm/rtasm_x86sse.h \
+	tgsi/tgsi_aa_point.c \
+	tgsi/tgsi_aa_point.h \
 	tgsi/tgsi_build.c \
 	tgsi/tgsi_build.h \
 	tgsi/tgsi_dump.c \
@@ -144,6 +146,8 @@ C_SOURCES := \
 	tgsi/tgsi_opcode_tmp.h \
 	tgsi/tgsi_parse.c \
 	tgsi/tgsi_parse.h \
+	tgsi/tgsi_point_sprite.c \
+	tgsi/tgsi_point_sprite.h \
 	tgsi/tgsi_sanity.c \
 	tgsi/tgsi_sanity.h \
 	tgsi/tgsi_scan.c \
@@ -154,6 +158,8 @@ C_SOURCES := \
 	tgsi/tgsi_text.h \
 	tgsi/tgsi_transform.c \
 	tgsi/tgsi_transform.h \
+	tgsi/tgsi_two_side.c \
+	tgsi/tgsi_two_side.h \
 	tgsi/tgsi_ureg.c \
 	tgsi/tgsi_ureg.h \
 	tgsi/tgsi_util.c \
@@ -260,6 +266,8 @@ C_SOURCES := \
 	util/u_pack_color.h \
 	util/u_pointer.h \
 	util/u_prim.h \
+	util/u_prim_restart.c \
+	util/u_prim_restart.h \
 	util/u_pstipple.c \
 	util/u_pstipple.h \
 	util/u_range.h \
--- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
@@ -240,7 +240,8 @@ aa_transform_prolog(struct tgsi_transform_context *ctx)
                               TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_W);

   /* KILL_IF -tmp0.yyyy;   # if -tmp0.y < 0, KILL */
-   tgsi_transform_kill_inst(ctx, TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y);
+   tgsi_transform_kill_inst(ctx, TGSI_FILE_TEMPORARY, tmp0,
+                            TGSI_SWIZZLE_Y, TRUE);

   /* compute coverage factor = (1-d)/(1-k) */

--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -280,7 +280,8 @@ pstip_transform_prolog(struct tgsi_transform_context *ctx)

   /* KILL_IF -texTemp.wwww;   # if -texTemp < 0, KILL fragment */
   tgsi_transform_kill_inst(ctx,
-                            TGSI_FILE_TEMPORARY, pctx->texTemp, TGSI_SWIZZLE_W);
+                            TGSI_FILE_TEMPORARY, pctx->texTemp,
+                            TGSI_SWIZZLE_W, TRUE);
 }


--- a/src/gallium/auxiliary/gallivm/lp_bld_const.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_const.c
@@ -311,7 +311,7 @@ lp_build_const_elem(struct gallivm_state *gallivm,
   else {
      double dscale = lp_const_scale(type);

-      elem = LLVMConstInt(elem_type, round(val*dscale), 0);
+      elem = LLVMConstInt(elem_type, (long long) round(val*dscale), 0);
   }

   return elem;
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -81,6 +81,8 @@
 #  pragma pop_macro("DEBUG")
 #endif

+#include "c11/threads.h"
+#include "os/os_thread.h"
 #include "pipe/p_config.h"
 #include "util/u_debug.h"
 #include "util/u_cpu_detect.h"
@@ -103,6 +105,33 @@ static LLVMEnsureMultithreaded lLVMEnsureMultithreaded;

 }

+static once_flag init_native_targets_once_flag;
+
+static void init_native_targets()
+{
+   // If we have a native target, initialize it to ensure it is linked in and
+   // usable by the JIT.
+   llvm::InitializeNativeTarget();
+
+   llvm::InitializeNativeTargetAsmPrinter();
+
+   llvm::InitializeNativeTargetDisassembler();
+}
+
+/**
+ * The llvm target registry is not thread-safe, so drivers and state-trackers
+ * that want to initialize targets should use the gallivm_init_llvm_targets()
+ * function to safely initialize targets.
+ *
+ * LLVM targets should be initialized before the driver or state-tracker tries
+ * to access the registry.
+ */
+extern "C" void
+gallivm_init_llvm_targets(void)
+{
+   call_once(&init_native_targets_once_flag, init_native_targets);
+}
+
 extern "C" void
 lp_set_target_options(void)
 {
@@ -115,13 +144,7 @@ lp_set_target_options(void)
   llvm::DisablePrettyStackTrace = true;
 #endif

-   // If we have a native target, initialize it to ensure it is linked in and
-   // usable by the JIT.
-   llvm::InitializeNativeTarget();
-
-   llvm::InitializeNativeTargetAsmPrinter();
-
-   llvm::InitializeNativeTargetDisassembler();
+   gallivm_init_llvm_targets();
 }


--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.h
@@ -41,6 +41,8 @@ extern "C" {

 struct lp_generated_code;

+extern void
+gallivm_init_llvm_targets(void);

 extern void
 lp_set_target_options(void);
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -24,6 +24,7 @@

 #include "util/ralloc.h"
 #include "glsl/nir/nir.h"
+#include "glsl/nir/nir_control_flow.h"
 #include "glsl/nir/nir_builder.h"
 #include "glsl/list.h"
 #include "glsl/shader_enums.h"
@@ -64,24 +65,24 @@ struct ttn_compile {
   nir_register *addr_reg;

   /**
-    * Stack of cf_node_lists where instructions should be pushed as we pop
+    * Stack of nir_cursors where instructions should be pushed as we pop
    * back out of the control flow stack.
    *
    * For each IF/ELSE/ENDIF block, if_stack[if_stack_pos] has where the else
    * instructions should be placed, and if_stack[if_stack_pos - 1] has where
    * the next instructions outside of the if/then/else block go.
    */
-   struct exec_list **if_stack;
+   nir_cursor *if_stack;
   unsigned if_stack_pos;

   /**
-    * Stack of cf_node_lists where instructions should be pushed as we pop
+    * Stack of nir_cursors where instructions should be pushed as we pop
    * back out of the control flow stack.
    *
    * loop_stack[loop_stack_pos - 1] contains the cf_node_list for the outside
    * of the loop.
    */
-   struct exec_list **loop_stack;
+   nir_cursor *loop_stack;
   unsigned loop_stack_pos;

   /* How many TGSI_FILE_IMMEDIATE vec4s have been parsed so far. */
@@ -93,6 +94,128 @@ struct ttn_compile {
 #define ttn_channel(b, src, swiz) \
   nir_swizzle(b, src, SWIZ(swiz, swiz, swiz, swiz), 1, false)

+static gl_varying_slot
+tgsi_varying_semantic_to_slot(unsigned semantic, unsigned index)
+{
+   switch (semantic) {
+   case TGSI_SEMANTIC_POSITION:
+      return VARYING_SLOT_POS;
+   case TGSI_SEMANTIC_COLOR:
+      if (index == 0)
+         return VARYING_SLOT_COL0;
+      else
+         return VARYING_SLOT_COL1;
+   case TGSI_SEMANTIC_BCOLOR:
+      if (index == 0)
+         return VARYING_SLOT_BFC0;
+      else
+         return VARYING_SLOT_BFC1;
+   case TGSI_SEMANTIC_FOG:
+      return VARYING_SLOT_FOGC;
+   case TGSI_SEMANTIC_PSIZE:
+      return VARYING_SLOT_PSIZ;
+   case TGSI_SEMANTIC_GENERIC:
+      return VARYING_SLOT_VAR0 + index;
+   case TGSI_SEMANTIC_FACE:
+      return VARYING_SLOT_FACE;
+   case TGSI_SEMANTIC_EDGEFLAG:
+      return VARYING_SLOT_EDGE;
+   case TGSI_SEMANTIC_PRIMID:
+      return VARYING_SLOT_PRIMITIVE_ID;
+   case TGSI_SEMANTIC_CLIPDIST:
+      if (index == 0)
+         return VARYING_SLOT_CLIP_DIST0;
+      else
+         return VARYING_SLOT_CLIP_DIST1;
+   case TGSI_SEMANTIC_CLIPVERTEX:
+      return VARYING_SLOT_CLIP_VERTEX;
+   case TGSI_SEMANTIC_TEXCOORD:
+      return VARYING_SLOT_TEX0 + index;
+   case TGSI_SEMANTIC_PCOORD:
+      return VARYING_SLOT_PNTC;
+   case TGSI_SEMANTIC_VIEWPORT_INDEX:
+      return VARYING_SLOT_VIEWPORT;
+   case TGSI_SEMANTIC_LAYER:
+      return VARYING_SLOT_LAYER;
+   default:
+      fprintf(stderr, "Bad TGSI semantic: %d/%d\n", semantic, index);
+      abort();
+   }
+}
+
+/* Temporary helper to remap back to TGSI style semantic name/index
+ * values, for use in drivers that haven't been converted to using
+ * VARYING_SLOT_
+ */
+void
+varying_slot_to_tgsi_semantic(gl_varying_slot slot,
+                              unsigned *semantic_name, unsigned *semantic_index)
+{
+   static const unsigned map[][2] = {
+      [VARYING_SLOT_POS] = { TGSI_SEMANTIC_POSITION, 0 },
+      [VARYING_SLOT_COL0] = { TGSI_SEMANTIC_COLOR, 0 },
+      [VARYING_SLOT_COL1] = { TGSI_SEMANTIC_COLOR, 1 },
+      [VARYING_SLOT_BFC0] = { TGSI_SEMANTIC_BCOLOR, 0 },
+      [VARYING_SLOT_BFC1] = { TGSI_SEMANTIC_BCOLOR, 1 },
+      [VARYING_SLOT_FOGC] = { TGSI_SEMANTIC_FOG, 0 },
+      [VARYING_SLOT_PSIZ] = { TGSI_SEMANTIC_PSIZE, 0 },
+      [VARYING_SLOT_FACE] = { TGSI_SEMANTIC_FACE, 0 },
+      [VARYING_SLOT_EDGE] = { TGSI_SEMANTIC_EDGEFLAG, 0 },
+      [VARYING_SLOT_PRIMITIVE_ID] = { TGSI_SEMANTIC_PRIMID, 0 },
+      [VARYING_SLOT_CLIP_DIST0] = { TGSI_SEMANTIC_CLIPDIST, 0 },
+      [VARYING_SLOT_CLIP_DIST1] = { TGSI_SEMANTIC_CLIPDIST, 1 },
+      [VARYING_SLOT_CLIP_VERTEX] = { TGSI_SEMANTIC_CLIPVERTEX, 0 },
+      [VARYING_SLOT_PNTC] = { TGSI_SEMANTIC_PCOORD, 0 },
+      [VARYING_SLOT_VIEWPORT] = { TGSI_SEMANTIC_VIEWPORT_INDEX, 0 },
+      [VARYING_SLOT_LAYER] = { TGSI_SEMANTIC_LAYER, 0 },
+   };
+
+   if (slot >= VARYING_SLOT_VAR0) {
+      *semantic_name = TGSI_SEMANTIC_GENERIC;
+      *semantic_index = slot - VARYING_SLOT_VAR0;
+      return;
+   }
+
+   if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) {
+      *semantic_name = TGSI_SEMANTIC_TEXCOORD;
+      *semantic_index = slot - VARYING_SLOT_TEX0;
+      return;
+   }
+
+   if (slot >= ARRAY_SIZE(map)) {
+      fprintf(stderr, "Unknown varying slot %d\n", slot);
+      abort();
+   }
+
+   *semantic_name = map[slot][0];
+   *semantic_index = map[slot][1];
+}
+
+/* Temporary helper to remap back to TGSI style semantic name/index
+ * values, for use in drivers that haven't been converted to using
+ * FRAG_RESULT_
+ */
+void
+frag_result_to_tgsi_semantic(gl_frag_result slot,
+                             unsigned *semantic_name, unsigned *semantic_index)
+{
+   static const unsigned map[][2] = {
+      [FRAG_RESULT_DEPTH] = { TGSI_SEMANTIC_POSITION, 0 },
+      [FRAG_RESULT_COLOR] = { TGSI_SEMANTIC_COLOR, -1 },
+      [FRAG_RESULT_DATA0 + 0] = { TGSI_SEMANTIC_COLOR, 0 },
+      [FRAG_RESULT_DATA0 + 1] = { TGSI_SEMANTIC_COLOR, 1 },
+      [FRAG_RESULT_DATA0 + 2] = { TGSI_SEMANTIC_COLOR, 2 },
+      [FRAG_RESULT_DATA0 + 3] = { TGSI_SEMANTIC_COLOR, 3 },
+      [FRAG_RESULT_DATA0 + 4] = { TGSI_SEMANTIC_COLOR, 4 },
+      [FRAG_RESULT_DATA0 + 5] = { TGSI_SEMANTIC_COLOR, 5 },
+      [FRAG_RESULT_DATA0 + 6] = { TGSI_SEMANTIC_COLOR, 6 },
+      [FRAG_RESULT_DATA0 + 7] = { TGSI_SEMANTIC_COLOR, 7 },
+   };
+
+   *semantic_name = map[slot][0];
+   *semantic_index = map[slot][1];
+}
+
 static nir_ssa_def *
 ttn_src_for_dest(nir_builder *b, nir_alu_dest *dest)
 {
@@ -215,12 +338,15 @@ ttn_emit_declaration(struct ttn_compile *c)
            var->data.mode = nir_var_shader_in;
            var->name = ralloc_asprintf(var, "in_%d", idx);

-            /* We should probably translate to a VERT_ATTRIB_* or VARYING_SLOT_*
-             * instead, but nothing in NIR core is looking at the value
-             * currently, and this is less change to drivers.
-             */
-            var->data.location = decl->Semantic.Name;
-            var->data.index = decl->Semantic.Index;
+            if (c->scan->processor == TGSI_PROCESSOR_FRAGMENT) {
+               var->data.location =
+                  tgsi_varying_semantic_to_slot(decl->Semantic.Name,
+                                                decl->Semantic.Index);
+            } else {
+               assert(!decl->Declaration.Semantic);
+               var->data.location = VERT_ATTRIB_GENERIC0 + idx;
+            }
+            var->data.index = 0;

            /* We definitely need to translate the interpolation field, because
             * nir_print will decode it.
@@ -240,6 +366,8 @@ ttn_emit_declaration(struct ttn_compile *c)
            exec_list_push_tail(&b->shader->inputs, &var->node);
            break;
         case TGSI_FILE_OUTPUT: {
+            int semantic_name = decl->Semantic.Name;
+            int semantic_index = decl->Semantic.Index;
            /* Since we can't load from outputs in the IR, we make temporaries
             * for the outputs and emit stores to the real outputs at the end of
             * the shader.
@@ -251,14 +379,40 @@ ttn_emit_declaration(struct ttn_compile *c)

            var->data.mode = nir_var_shader_out;
            var->name = ralloc_asprintf(var, "out_%d", idx);
+            var->data.index = 0;

-            var->data.location = decl->Semantic.Name;
-            if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
-                decl->Semantic.Index == 0 &&
-                c->scan->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
-               var->data.index = -1;
-            else
-               var->data.index = decl->Semantic.Index;
+            if (c->scan->processor == TGSI_PROCESSOR_FRAGMENT) {
+               switch (semantic_name) {
+               case TGSI_SEMANTIC_COLOR: {
+                  /* TODO tgsi loses some information, so we cannot
+                   * actually differentiate here between DSB and MRT
+                   * at this point.  But so far no drivers using tgsi-
+                   * to-nir support dual source blend:
+                   */
+                  bool dual_src_blend = false;
+                  if (dual_src_blend && (semantic_index == 1)) {
+                     var->data.location = FRAG_RESULT_DATA0;
+                     var->data.index = 1;
+                  } else {
+                     if (c->scan->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
+                        var->data.location = FRAG_RESULT_COLOR;
+                     else
+                        var->data.location = FRAG_RESULT_DATA0 + semantic_index;
+                  }
+                  break;
+               }
+               case TGSI_SEMANTIC_POSITION:
+                  var->data.location = FRAG_RESULT_DEPTH;
+                  break;
+               default:
+                  fprintf(stderr, "Bad TGSI semantic: %d/%d\n",
+                          decl->Semantic.Name, decl->Semantic.Index);
+                  abort();
+               }
+            } else {
+               var->data.location =
+                  tgsi_varying_semantic_to_slot(semantic_name, semantic_index);
+            }

            if (is_array) {
               unsigned j;
@@ -307,7 +461,7 @@ ttn_emit_immediate(struct ttn_compile *c)
   for (i = 0; i < 4; i++)
      load_const->value.u[i] = tgsi_imm->u[i].Uint;

-   nir_instr_insert_after_cf_list(b->cf_node_list, &load_const->instr);
+   nir_builder_instr_insert(b, &load_const->instr);
 }

 static nir_src
@@ -363,7 +517,7 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
         load->variables[0] = ttn_array_deref(c, load, var, offset, indirect);

         nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
-         nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr);
+         nir_builder_instr_insert(b, &load->instr);

         src = nir_src_for_ssa(&load->dest.ssa);

@@ -414,7 +568,7 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
      load->num_components = ncomp;

      nir_ssa_dest_init(&load->instr, &load->dest, ncomp, NULL);
-      nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr);
+      nir_builder_instr_insert(b, &load->instr);

      src = nir_src_for_ssa(&load->dest.ssa);
      break;
@@ -476,7 +630,7 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
         srcn++;
      }
      nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
-      nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr);
+      nir_builder_instr_insert(b, &load->instr);

      src = nir_src_for_ssa(&load->dest.ssa);
      break;
@@ -552,7 +706,7 @@ ttn_get_dest(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst)

         load->dest = nir_dest_for_reg(reg);

-         nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr);
+         nir_builder_instr_insert(b, &load->instr);
      } else {
         assert(!tgsi_dst->Indirect);
         dest.dest.reg.reg = c->temp_regs[index].reg;
@@ -667,7 +821,7 @@ ttn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
      instr->src[i].src = nir_src_for_ssa(src[i]);

   instr->dest = dest;
-   nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr);
+   nir_builder_instr_insert(b, &instr->instr);
 }

 static void
@@ -683,7 +837,7 @@ ttn_move_dest_masked(nir_builder *b, nir_alu_dest dest,
   mov->src[0].src = nir_src_for_ssa(def);
   for (unsigned i = def->num_components; i < 4; i++)
      mov->src[0].swizzle[i] = def->num_components - 1;
-   nir_instr_insert_after_cf_list(b->cf_node_list, &mov->instr);
+   nir_builder_instr_insert(b, &mov->instr);
 }

 static void
@@ -902,7 +1056,7 @@ ttn_kill(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
 {
   nir_intrinsic_instr *discard =
      nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard);
-   nir_instr_insert_after_cf_list(b->cf_node_list, &discard->instr);
+   nir_builder_instr_insert(b, &discard->instr);
 }

 static void
@@ -912,7 +1066,7 @@ ttn_kill_if(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
   nir_intrinsic_instr *discard =
      nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if);
   discard->src[0] = nir_src_for_ssa(cmp);
-   nir_instr_insert_after_cf_list(b->cf_node_list, &discard->instr);
+   nir_builder_instr_insert(b, &discard->instr);
 }

 static void
@@ -920,10 +1074,6 @@ ttn_if(struct ttn_compile *c, nir_ssa_def *src, bool is_uint)
 {
   nir_builder *b = &c->build;

-   /* Save the outside-of-the-if-statement node list. */
-   c->if_stack[c->if_stack_pos] = b->cf_node_list;
-   c->if_stack_pos++;
-
   src = ttn_channel(b, src, X);

   nir_if *if_stmt = nir_if_create(b->shader);
@@ -932,11 +1082,14 @@ ttn_if(struct ttn_compile *c, nir_ssa_def *src, bool is_uint)
   } else {
      if_stmt->condition = nir_src_for_ssa(nir_fne(b, src, nir_imm_int(b, 0)));
   }
-   nir_cf_node_insert_end(b->cf_node_list, &if_stmt->cf_node);
+   nir_builder_cf_insert(b, &if_stmt->cf_node);

-   nir_builder_insert_after_cf_list(b, &if_stmt->then_list);
+   c->if_stack[c->if_stack_pos] = nir_after_cf_node(&if_stmt->cf_node);
+   c->if_stack_pos++;

-   c->if_stack[c->if_stack_pos] = &if_stmt->else_list;
+   b->cursor = nir_after_cf_list(&if_stmt->then_list);
+
+   c->if_stack[c->if_stack_pos] = nir_after_cf_list(&if_stmt->else_list);
   c->if_stack_pos++;
 }

@@ -945,7 +1098,7 @@ ttn_else(struct ttn_compile *c)
 {
   nir_builder *b = &c->build;

-   nir_builder_insert_after_cf_list(b, c->if_stack[c->if_stack_pos - 1]);
+   b->cursor = c->if_stack[c->if_stack_pos - 1];
 }

 static void
@@ -954,7 +1107,7 @@ ttn_endif(struct ttn_compile *c)
   nir_builder *b = &c->build;

   c->if_stack_pos -= 2;
-   nir_builder_insert_after_cf_list(b, c->if_stack[c->if_stack_pos]);
+   b->cursor = c->if_stack[c->if_stack_pos];
 }

 static void
@@ -962,28 +1115,27 @@ ttn_bgnloop(struct ttn_compile *c)
 {
   nir_builder *b = &c->build;

-   /* Save the outside-of-the-loop node list. */
-   c->loop_stack[c->loop_stack_pos] = b->cf_node_list;
+   nir_loop *loop = nir_loop_create(b->shader);
+   nir_builder_cf_insert(b, &loop->cf_node);
+
+   c->loop_stack[c->loop_stack_pos] = nir_after_cf_node(&loop->cf_node);
   c->loop_stack_pos++;

-   nir_loop *loop = nir_loop_create(b->shader);
-   nir_cf_node_insert_end(b->cf_node_list, &loop->cf_node);
-
-   nir_builder_insert_after_cf_list(b, &loop->body);
+   b->cursor = nir_after_cf_list(&loop->body);
 }

 static void
 ttn_cont(nir_builder *b)
 {
   nir_jump_instr *instr = nir_jump_instr_create(b->shader, nir_jump_continue);
-   nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr);
+   nir_builder_instr_insert(b, &instr->instr);
 }

 static void
 ttn_brk(nir_builder *b)
 {
   nir_jump_instr *instr = nir_jump_instr_create(b->shader, nir_jump_break);
-   nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr);
+   nir_builder_instr_insert(b, &instr->instr);
 }

 static void
@@ -992,7 +1144,7 @@ ttn_endloop(struct ttn_compile *c)
   nir_builder *b = &c->build;

   c->loop_stack_pos--;
-   nir_builder_insert_after_cf_list(b, c->loop_stack[c->loop_stack_pos]);
+   b->cursor = c->loop_stack[c->loop_stack_pos];
 }

 static void
@@ -1279,7 +1431,7 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
   assert(src_number == num_srcs);

   nir_ssa_dest_init(&instr->instr, &instr->dest, 4, NULL);
-   nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr);
+   nir_builder_instr_insert(b, &instr->instr);

   /* Resolve the writemask on the texture op. */
   ttn_move_dest(b, dest, &instr->dest.ssa);
@@ -1318,10 +1470,10 @@ ttn_txq(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
   txs->src[0].src_type = nir_tex_src_lod;

   nir_ssa_dest_init(&txs->instr, &txs->dest, 3, NULL);
-   nir_instr_insert_after_cf_list(b->cf_node_list, &txs->instr);
+   nir_builder_instr_insert(b, &txs->instr);

   nir_ssa_dest_init(&qlv->instr, &qlv->dest, 1, NULL);
-   nir_instr_insert_after_cf_list(b->cf_node_list, &qlv->instr);
+   nir_builder_instr_insert(b, &qlv->instr);

   ttn_move_dest_masked(b, dest, &txs->dest.ssa, TGSI_WRITEMASK_XYZ);
   ttn_move_dest_masked(b, dest, &qlv->dest.ssa, TGSI_WRITEMASK_W);
@@ -1730,7 +1882,7 @@ ttn_emit_instruction(struct ttn_compile *c)
      store->variables[0] = ttn_array_deref(c, store, var, offset, indirect);
      store->src[0] = nir_src_for_reg(dest.dest.reg.reg);

-      nir_instr_insert_after_cf_list(b->cf_node_list, &store->instr);
+      nir_builder_instr_insert(b, &store->instr);
   }
 }

@@ -1759,11 +1911,26 @@ ttn_add_output_stores(struct ttn_compile *c)
         store->const_index[0] = loc;
         store->src[0].reg.reg = c->output_regs[loc].reg;
         store->src[0].reg.base_offset = c->output_regs[loc].offset;
-         nir_instr_insert_after_cf_list(b->cf_node_list, &store->instr);
+         nir_builder_instr_insert(b, &store->instr);
      }
   }
 }

+static gl_shader_stage
+tgsi_processor_to_shader_stage(unsigned processor)
+{
+   switch (processor) {
+   case TGSI_PROCESSOR_FRAGMENT:  return MESA_SHADER_FRAGMENT;
+   case TGSI_PROCESSOR_VERTEX:    return MESA_SHADER_VERTEX;
+   case TGSI_PROCESSOR_GEOMETRY:  return MESA_SHADER_GEOMETRY;
+   case TGSI_PROCESSOR_TESS_CTRL: return MESA_SHADER_TESS_CTRL;
+   case TGSI_PROCESSOR_TESS_EVAL: return MESA_SHADER_TESS_EVAL;
+   case TGSI_PROCESSOR_COMPUTE:   return MESA_SHADER_COMPUTE;
+   default:
+      unreachable("invalid TGSI processor");
+   };
+}
+
 struct nir_shader *
 tgsi_to_nir(const void *tgsi_tokens,
            const nir_shader_compiler_options *options)
@@ -1775,17 +1942,19 @@ tgsi_to_nir(const void *tgsi_tokens,
   int ret;

   c = rzalloc(NULL, struct ttn_compile);
-   s = nir_shader_create(NULL, options);
+
+   tgsi_scan_shader(tgsi_tokens, &scan);
+   c->scan = &scan;
+
+   s = nir_shader_create(NULL, tgsi_processor_to_shader_stage(scan.processor),
+                         options);

   nir_function *func = nir_function_create(s, "main");
   nir_function_overload *overload = nir_function_overload_create(func);
   nir_function_impl *impl = nir_function_impl_create(overload);

   nir_builder_init(&c->build, impl);
-   nir_builder_insert_after_cf_list(&c->build, &impl->body);
-
-   tgsi_scan_shader(tgsi_tokens, &scan);
-   c->scan = &scan;
+   c->build.cursor = nir_after_cf_list(&impl->body);

   s->num_inputs = scan.file_max[TGSI_FILE_INPUT] + 1;
   s->num_uniforms = scan.const_file_max[0] + 1;
@@ -1801,10 +1970,10 @@ tgsi_to_nir(const void *tgsi_tokens,
   c->num_samp_types = scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
   c->samp_types = rzalloc_array(c, nir_alu_type, c->num_samp_types);

-   c->if_stack = rzalloc_array(c, struct exec_list *,
+   c->if_stack = rzalloc_array(c, nir_cursor,
                               (scan.opcode_count[TGSI_OPCODE_IF] +
                                scan.opcode_count[TGSI_OPCODE_UIF]) * 2);
-   c->loop_stack = rzalloc_array(c, struct exec_list *,
+   c->loop_stack = rzalloc_array(c, nir_cursor,
                                 scan.opcode_count[TGSI_OPCODE_BGNLOOP]);

   ret = tgsi_parse_init(&parser, tgsi_tokens);
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.h
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.h
@@ -28,3 +28,9 @@ struct nir_shader_compiler_options *options;
 struct nir_shader *
 tgsi_to_nir(const void *tgsi_tokens,
            const struct nir_shader_compiler_options *options);
+void
+varying_slot_to_tgsi_semantic(gl_varying_slot slot,
+                              unsigned *semantic_name, unsigned *semantic_index);
+void
+frag_result_to_tgsi_semantic(gl_frag_result slot,
+                             unsigned *semantic_name, unsigned *semantic_index);
--- a/src/gallium/auxiliary/os/os_misc.c
+++ b/src/gallium/auxiliary/os/os_misc.c
@@ -96,11 +96,13 @@ os_log_message(const char *message)
 }


+#if !defined(PIPE_SUBSYSTEM_EMBEDDED)
 const char *
 os_get_option(const char *name)
 {
   return getenv(name);
 }
+#endif /* !PIPE_SUBSYSTEM_EMBEDDED */


 /**
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
@@ -166,6 +166,11 @@ pb_cache_manager_create(struct pb_manager *provider,
                        unsigned bypass_usage,
                        uint64_t maximum_cache_size);

+/**
+ * Remove a buffer from the cache, but keep it alive.
+ */
+void
+pb_cache_manager_remove_buffer(struct pb_buffer *buf);

 struct pb_fence_ops;

--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
@@ -104,18 +104,42 @@ pb_cache_manager(struct pb_manager *mgr)
 }


+static void
+_pb_cache_manager_remove_buffer_locked(struct pb_cache_buffer *buf)
+{
+   struct pb_cache_manager *mgr = buf->mgr;
+
+   if (buf->head.next) {
+      LIST_DEL(&buf->head);
+      assert(mgr->numDelayed);
+      --mgr->numDelayed;
+      mgr->cache_size -= buf->base.size;
+   }
+   buf->mgr = NULL;
+}
+
+void
+pb_cache_manager_remove_buffer(struct pb_buffer *pb_buf)
+{
+   struct pb_cache_buffer *buf = (struct pb_cache_buffer*)pb_buf;
+   struct pb_cache_manager *mgr = buf->mgr;
+
+   if (!mgr)
+      return;
+
+   pipe_mutex_lock(mgr->mutex);
+   _pb_cache_manager_remove_buffer_locked(buf);
+   pipe_mutex_unlock(mgr->mutex);
+}
+
 /**
 * Actually destroy the buffer.
 */
 static inline void
 _pb_cache_buffer_destroy(struct pb_cache_buffer *buf)
 {
-   struct pb_cache_manager *mgr = buf->mgr;
-
-   LIST_DEL(&buf->head);
-   assert(mgr->numDelayed);
-   --mgr->numDelayed;
-   mgr->cache_size -= buf->base.size;
+   if (buf->mgr)
+      _pb_cache_manager_remove_buffer_locked(buf);
   assert(!pipe_is_referenced(&buf->base.reference));
   pb_reference(&buf->buffer, NULL);
   FREE(buf);
@@ -156,6 +180,12 @@ pb_cache_buffer_destroy(struct pb_buffer *_buf)
   struct pb_cache_buffer *buf = pb_cache_buffer(_buf);   
   struct pb_cache_manager *mgr = buf->mgr;

+   if (!mgr) {
+      pb_reference(&buf->buffer, NULL);
+      FREE(buf);
+      return;
+   }
+
   pipe_mutex_lock(mgr->mutex);
   assert(!pipe_is_referenced(&buf->base.reference));
   
--- a/src/gallium/auxiliary/target-helpers/inline_debug_helper.h
+++ b/src/gallium/auxiliary/target-helpers/inline_debug_helper.h
@@ -11,6 +11,10 @@
 * one or more debug driver: rbug, trace.
 */

+#ifdef GALLIUM_DDEBUG
+#include "ddebug/dd_public.h"
+#endif
+
 #ifdef GALLIUM_TRACE
 #include "trace/tr_public.h"
 #endif
@@ -30,6 +34,10 @@
 static inline struct pipe_screen *
 debug_screen_wrap(struct pipe_screen *screen)
 {
+#if defined(GALLIUM_DDEBUG)
+   screen = ddebug_screen_create(screen);
+#endif
+
 #if defined(GALLIUM_RBUG)
   screen = rbug_screen_create(screen);
 #endif
--- a/src/gallium/auxiliary/tgsi/tgsi_aa_point.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_aa_point.c
@@ -0,0 +1,309 @@
+/*
+ * Copyright 2014 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/**
+ * This utility transforms the fragment shader to support anti-aliasing points.
+ */
+
+#include "util/u_debug.h"
+#include "util/u_math.h"
+#include "tgsi_info.h"
+#include "tgsi_aa_point.h"
+#include "tgsi_transform.h"
+
+#define INVALID_INDEX 9999
+
+struct aa_transform_context
+{
+   struct tgsi_transform_context base;
+
+   unsigned tmp;           // temp register
+   unsigned color_out;     // frag color out register
+   unsigned color_tmp;     // frag color temp register
+   unsigned num_tmp;       // number of temp registers
+   unsigned num_imm;       // number of immediates
+   unsigned num_input;     // number of inputs
+   unsigned aa_point_coord_index;
+};
+
+static inline struct aa_transform_context *
+aa_transform_context(struct tgsi_transform_context *ctx)
+{
+   return (struct aa_transform_context *) ctx;
+}
+
+/**
+ * TGSI declaration transform callback.
+ */
+static void
+aa_decl(struct tgsi_transform_context *ctx,
+              struct tgsi_full_declaration *decl)
+{
+   struct aa_transform_context *ts = aa_transform_context(ctx);
+
+   if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
+       decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
+       decl->Semantic.Index == 0) {
+         ts->color_out = decl->Range.First;
+   }
+   else if (decl->Declaration.File == TGSI_FILE_INPUT) {
+      ts->num_input++;
+   }
+   else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
+      ts->num_tmp = MAX2(ts->num_tmp, decl->Range.Last + 1);
+   }
+
+   ctx->emit_declaration(ctx, decl);
+}
+
+/**
+ * TGSI immediate declaration transform callback.
+ */
+static void
+aa_immediate(struct tgsi_transform_context *ctx,
+                  struct tgsi_full_immediate *imm)
+{
+   struct aa_transform_context *ts = aa_transform_context(ctx);
+
+   ctx->emit_immediate(ctx, imm);
+   ts->num_imm++;
+}
+
+/**
+ * TGSI transform prolog callback.
+ */
+static void
+aa_prolog(struct tgsi_transform_context *ctx)
+{
+   struct aa_transform_context *ts = aa_transform_context(ctx);
+   unsigned tmp0;
+   unsigned texIn;
+   unsigned imm;
+
+   /* Declare two temporary registers, one for temporary and
+    * one for color.
+    */
+   ts->tmp = ts->num_tmp++;
+   ts->color_tmp = ts->num_tmp++;
+
+   tgsi_transform_temps_decl(ctx, ts->tmp, ts->color_tmp);
+
+   /* Declare new generic input/texcoord */
+   texIn = ts->num_input++;
+   tgsi_transform_input_decl(ctx, texIn, TGSI_SEMANTIC_GENERIC,
+                             ts->aa_point_coord_index, TGSI_INTERPOLATE_LINEAR);
+
+   /* Declare extra immediates */
+   imm = ts->num_imm++;
+   tgsi_transform_immediate_decl(ctx, 0.5, 0.5, 0.45, 1.0);
+
+   /*
+    * Emit code to compute fragment coverage.
+    * The point always has radius 0.5.  The threshold value will be a
+    * value less than, but close to 0.5, such as 0.45.
+    * We compute a coverage factor from the distance and threshold.
+    * If the coverage is negative, the fragment is outside the circle and
+    * it's discarded.
+    * If the coverage is >= 1, the fragment is fully inside the threshold
+    * distance.  We limit/clamp the coverage to 1.
+    * Otherwise, the fragment is between the threshold value and 0.5 and we
+    * compute a coverage value in [0,1].
+    *
+    * Input reg (texIn) usage:
+    *  texIn.x = x point coord in [0,1]
+    *  texIn.y = y point coord in [0,1]
+    *  texIn.z = "k" the smoothing threshold distance
+    *  texIn.w = unused
+    *
+    * Temp reg (t0) usage:
+    *  t0.x = distance of fragment from center point
+    *  t0.y = boolean, is t0.x > 0.5, also misc temp usage
+    *  t0.z = temporary for computing 1/(0.5-k) value
+    *  t0.w = final coverage value
+    */
+
+   tmp0 = ts->tmp;
+
+   /* SUB t0.xy, texIn, (0.5, 0,5) */
+   tgsi_transform_op2_inst(ctx, TGSI_OPCODE_SUB,
+                           TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_XY,
+                           TGSI_FILE_INPUT, texIn,
+                           TGSI_FILE_IMMEDIATE, imm);
+
+   /* DP2 t0.x, t0.xy, t0.xy;  # t0.x = x^2 + y^2 */
+   tgsi_transform_op2_inst(ctx, TGSI_OPCODE_DP2,
+                           TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_X,
+                           TGSI_FILE_TEMPORARY, tmp0,
+                           TGSI_FILE_TEMPORARY, tmp0);
+
+   /* SQRT t0.x, t0.x */
+   tgsi_transform_op1_inst(ctx, TGSI_OPCODE_SQRT,
+                           TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_X,
+                           TGSI_FILE_TEMPORARY, tmp0);
+
+   /* compute coverage factor = (0.5-d)/(0.5-k) */
+
+   /* SUB t0.w, 0.5, texIn.z;  # t0.w = 0.5-k */
+   tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SUB,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W,
+                               TGSI_FILE_IMMEDIATE, imm, TGSI_SWIZZLE_X,
+                               TGSI_FILE_INPUT, texIn, TGSI_SWIZZLE_Z);
+
+   /* SUB t0.y, 0.5, t0.x;  # t0.y = 0.5-d */
+   tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SUB,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Y,
+                               TGSI_FILE_IMMEDIATE, imm, TGSI_SWIZZLE_X,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X);
+
+   /* DIV t0.w, t0.y, t0.w;  # coverage = (0.5-d)/(0.5-k) */
+   tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_DIV,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_W);
+
+   /* If the coverage value is negative, it means the fragment is outside
+    * the point's circular boundary.  Kill it.
+    */
+   /* KILL_IF tmp0.w;  # if tmp0.w < 0 KILL */
+   tgsi_transform_kill_inst(ctx, TGSI_FILE_TEMPORARY, tmp0,
+                            TGSI_SWIZZLE_W, FALSE);
+
+   /* If the distance is less than the threshold, the coverage/alpha value
+    * will be greater than one.  Clamp to one here.
+    */
+   /* MIN tmp0.w, tmp0.w, 1.0 */
+   tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MIN,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_W,
+                               TGSI_FILE_IMMEDIATE, imm, TGSI_SWIZZLE_W);
+}
+
+/**
+ * TGSI instruction transform callback.
+ */
+static void
+aa_inst(struct tgsi_transform_context *ctx,
+        struct tgsi_full_instruction *inst)
+{
+   struct aa_transform_context *ts = aa_transform_context(ctx);
+   unsigned i;
+
+   /* Look for writes to color output reg and replace it with
+    * color temp reg.
+    */
+   for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
+      struct tgsi_full_dst_register *dst = &inst->Dst[i];
+      if (dst->Register.File == TGSI_FILE_OUTPUT &&
+          dst->Register.Index == ts->color_out) {
+         dst->Register.File = TGSI_FILE_TEMPORARY;
+         dst->Register.Index = ts->color_tmp;
+      }
+   }
+
+   ctx->emit_instruction(ctx, inst);
+}
+
+/**
+ * TGSI transform epilog callback.
+ */
+static void
+aa_epilog(struct tgsi_transform_context *ctx)
+{
+   struct aa_transform_context *ts = aa_transform_context(ctx);
+
+   /* add alpha modulation code at tail of program */
+   assert(ts->color_out != INVALID_INDEX);
+   assert(ts->color_tmp != INVALID_INDEX);
+
+   /* MOV output.color.xyz colorTmp */
+   tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV,
+                           TGSI_FILE_OUTPUT, ts->color_out,
+                           TGSI_WRITEMASK_XYZ,
+                           TGSI_FILE_TEMPORARY, ts->color_tmp);
+
+   /* MUL output.color.w colorTmp.w tmp0.w */
+   tgsi_transform_op2_inst(ctx, TGSI_OPCODE_MUL,
+                           TGSI_FILE_OUTPUT, ts->color_out,
+                           TGSI_WRITEMASK_W,
+                           TGSI_FILE_TEMPORARY, ts->color_tmp,
+                           TGSI_FILE_TEMPORARY, ts->tmp);
+}
+
+/**
+ * TGSI utility to transform a fragment shader to support antialiasing point.
+ *
+ * This utility accepts two inputs:
+ *\param tokens_in  -- the original token string of the shader
+ *\param aa_point_coord_index -- the semantic index of the generic register
+ *                            that contains the point sprite texture coord
+ *
+ * For each fragment in the point, we compute the distance of the fragment
+ * from the point center using the point sprite texture coordinates.
+ * If the distance is greater than 0.5, we'll discard the fragment.
+ * Otherwise, we'll compute a coverage value which approximates how much
+ * of the fragment is inside the bounding circle of the point. If the distance
+ * is less than 'k', the coverage is 1. Else, the coverage is between 0 and 1.
+ * The final fragment color's alpha channel is then modulated by the coverage
+ * value.
+ */
+struct tgsi_token *
+tgsi_add_aa_point(const struct tgsi_token *tokens_in,
+                  const int aa_point_coord_index)
+{
+   struct aa_transform_context transform;
+   const uint num_new_tokens = 200; /* should be enough */
+   const uint new_len = tgsi_num_tokens(tokens_in) + num_new_tokens;
+   struct tgsi_token *new_tokens;
+
+   /* allocate new tokens buffer */
+   new_tokens = tgsi_alloc_tokens(new_len);
+   if (!new_tokens)
+      return NULL;
+
+   /* setup transformation context */
+   memset(&transform, 0, sizeof(transform));
+   transform.base.transform_declaration = aa_decl;
+   transform.base.transform_instruction = aa_inst;
+   transform.base.transform_immediate = aa_immediate;
+   transform.base.prolog = aa_prolog;
+   transform.base.epilog = aa_epilog;
+
+   transform.tmp = INVALID_INDEX;
+   transform.color_out = INVALID_INDEX;
+   transform.color_tmp = INVALID_INDEX;
+
+   assert(aa_point_coord_index != -1);
+   transform.aa_point_coord_index = (unsigned)aa_point_coord_index;
+
+   transform.num_tmp = 0;
+   transform.num_imm = 0;
+   transform.num_input = 0;
+
+   /* transform the shader */
+   tgsi_transform_shader(tokens_in, new_tokens, new_len, &transform.base);
+
+   return new_tokens;
+}
--- a/src/gallium/auxiliary/tgsi/tgsi_aa_point.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_aa_point.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2014 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef TGSI_AA_POINT_H
+#define TGSI_AA_POINT_H
+
+struct tgsi_token;
+
+struct tgsi_token *
+tgsi_add_aa_point(const struct tgsi_token *tokens_in,
+                  const int aa_point_coord_index);
+
+#endif /* TGSI_AA_POINT_H */
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -2021,7 +2021,7 @@ fetch_sampler_unit(struct tgsi_exec_machine *mach,
 /*
 * execute a texture instruction.
 *
- * modifier is used to control the channel routing for the\
+ * modifier is used to control the channel routing for the
 * instruction variants like proj, lod, and texture with lod bias.
 * sampler indicates which src register the sampler is contained in.
 */
@@ -2032,7 +2032,7 @@ exec_tex(struct tgsi_exec_machine *mach,
 {
   const union tgsi_exec_channel *args[5], *proj = NULL;
   union tgsi_exec_channel r[5];
-   enum tgsi_sampler_control control =  tgsi_sampler_lod_none;
+   enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE;
   uint chan;
   uint unit;
   int8_t offsets[3];
@@ -2078,11 +2078,11 @@ exec_tex(struct tgsi_exec_machine *mach,
         args[i] = &ZeroVec;

      if (modifier == TEX_MODIFIER_EXPLICIT_LOD)
-         control = tgsi_sampler_lod_explicit;
+         control = TGSI_SAMPLER_LOD_EXPLICIT;
      else if (modifier == TEX_MODIFIER_LOD_BIAS)
-         control = tgsi_sampler_lod_bias;
+         control = TGSI_SAMPLER_LOD_BIAS;
      else if (modifier == TEX_MODIFIER_GATHER)
-         control = tgsi_sampler_gather;
+         control = TGSI_SAMPLER_GATHER;
   }
   else {
      for (i = dim; i < Elements(args); i++)
@@ -2132,6 +2132,46 @@ exec_tex(struct tgsi_exec_machine *mach,
   }
 }

+static void
+exec_lodq(struct tgsi_exec_machine *mach,
+          const struct tgsi_full_instruction *inst)
+{
+   uint unit;
+   int dim;
+   int i;
+   union tgsi_exec_channel coords[4];
+   const union tgsi_exec_channel *args[Elements(coords)];
+   union tgsi_exec_channel r[2];
+
+   unit = fetch_sampler_unit(mach, inst, 1);
+   dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture, NULL);
+   assert(dim <= Elements(coords));
+   /* fetch coordinates */
+   for (i = 0; i < dim; i++) {
+      FETCH(&coords[i], 0, TGSI_CHAN_X + i);
+      args[i] = &coords[i];
+   }
+   for (i = dim; i < Elements(coords); i++) {
+      args[i] = &ZeroVec;
+   }
+   mach->Sampler->query_lod(mach->Sampler, unit, unit,
+                            args[0]->f,
+                            args[1]->f,
+                            args[2]->f,
+                            args[3]->f,
+                            TGSI_SAMPLER_LOD_NONE,
+                            r[0].f,
+                            r[1].f);
+
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+      store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X,
+                 TGSI_EXEC_DATA_FLOAT);
+   }
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+      store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y,
+                 TGSI_EXEC_DATA_FLOAT);
+   }
+}

 static void
 exec_txd(struct tgsi_exec_machine *mach,
@@ -2155,7 +2195,7 @@ exec_txd(struct tgsi_exec_machine *mach,

      fetch_texel(mach->Sampler, unit, unit,
                  &r[0], &ZeroVec, &ZeroVec, &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                  &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
      break;

@@ -2171,7 +2211,7 @@ exec_txd(struct tgsi_exec_machine *mach,

      fetch_texel(mach->Sampler, unit, unit,
                  &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                  &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
      break;

@@ -2185,7 +2225,7 @@ exec_txd(struct tgsi_exec_machine *mach,

      fetch_texel(mach->Sampler, unit, unit,
                  &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                  &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
      break;

@@ -2205,7 +2245,7 @@ exec_txd(struct tgsi_exec_machine *mach,

      fetch_texel(mach->Sampler, unit, unit,
                  &r[0], &r[1], &r[2], &r[3], &ZeroVec,   /* inputs */
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                  &r[0], &r[1], &r[2], &r[3]);     /* outputs */
      break;

@@ -2225,7 +2265,7 @@ exec_txd(struct tgsi_exec_machine *mach,

      fetch_texel(mach->Sampler, unit, unit,
                  &r[0], &r[1], &r[2], &r[3], &ZeroVec,   /* inputs */
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                  &r[0], &r[1], &r[2], &r[3]);     /* outputs */
      break;

@@ -2364,7 +2404,7 @@ exec_sample(struct tgsi_exec_machine *mach,
   const uint sampler_unit = inst->Src[2].Register.Index;
   union tgsi_exec_channel r[5], c1;
   const union tgsi_exec_channel *lod = &ZeroVec;
-   enum tgsi_sampler_control control = tgsi_sampler_lod_none;
+   enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE;
   uint chan;
   unsigned char swizzles[4];
   int8_t offsets[3];
@@ -2378,16 +2418,16 @@ exec_sample(struct tgsi_exec_machine *mach,
      if (modifier == TEX_MODIFIER_LOD_BIAS) {
         FETCH(&c1, 3, TGSI_CHAN_X);
         lod = &c1;
-         control = tgsi_sampler_lod_bias;
+         control = TGSI_SAMPLER_LOD_BIAS;
      }
      else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
         FETCH(&c1, 3, TGSI_CHAN_X);
         lod = &c1;
-         control = tgsi_sampler_lod_explicit;
+         control = TGSI_SAMPLER_LOD_EXPLICIT;
      }
      else {
         assert(modifier == TEX_MODIFIER_LEVEL_ZERO);
-         control = tgsi_sampler_lod_zero;
+         control = TGSI_SAMPLER_LOD_ZERO;
      }
   }

@@ -2513,7 +2553,7 @@ exec_sample_d(struct tgsi_exec_machine *mach,

      fetch_texel(mach->Sampler, resource_unit, sampler_unit,
                  &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                  &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
      break;

@@ -2529,7 +2569,7 @@ exec_sample_d(struct tgsi_exec_machine *mach,

      fetch_texel(mach->Sampler, resource_unit, sampler_unit,
                  &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec,   /* inputs */
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                  &r[0], &r[1], &r[2], &r[3]);     /* outputs */
      break;

@@ -2547,7 +2587,7 @@ exec_sample_d(struct tgsi_exec_machine *mach,

      fetch_texel(mach->Sampler, resource_unit, sampler_unit,
                  &r[0], &r[1], &r[2], &r[3], &ZeroVec,
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                  &r[0], &r[1], &r[2], &r[3]);
      break;

@@ -4378,6 +4418,12 @@ exec_instruction(
      exec_tex(mach, inst, TEX_MODIFIER_GATHER, 2);
      break;

+   case TGSI_OPCODE_LODQ:
+      /* src[0] = texcoord */
+      /* src[1] = sampler unit */
+      exec_lodq(mach, inst);
+      break;
+
   case TGSI_OPCODE_UP2H:
      assert (0);
      break;
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -88,13 +88,14 @@ struct tgsi_interp_coef
   float dady[TGSI_NUM_CHANNELS];
 };

-enum tgsi_sampler_control {
-   tgsi_sampler_lod_none,
-   tgsi_sampler_lod_bias,
-   tgsi_sampler_lod_explicit,
-   tgsi_sampler_lod_zero,
-   tgsi_sampler_derivs_explicit,
-   tgsi_sampler_gather,
+enum tgsi_sampler_control
+{
+   TGSI_SAMPLER_LOD_NONE,
+   TGSI_SAMPLER_LOD_BIAS,
+   TGSI_SAMPLER_LOD_EXPLICIT,
+   TGSI_SAMPLER_LOD_ZERO,
+   TGSI_SAMPLER_DERIVS_EXPLICIT,
+   TGSI_SAMPLER_GATHER,
 };

 /**
@@ -138,6 +139,16 @@ struct tgsi_sampler
                     const int j[TGSI_QUAD_SIZE], const int k[TGSI_QUAD_SIZE],
                     const int lod[TGSI_QUAD_SIZE], const int8_t offset[3],
                     float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
+   void (*query_lod)(const struct tgsi_sampler *tgsi_sampler,
+                     const unsigned sview_index,
+                     const unsigned sampler_index,
+                     const float s[TGSI_QUAD_SIZE],
+                     const float t[TGSI_QUAD_SIZE],
+                     const float p[TGSI_QUAD_SIZE],
+                     const float c0[TGSI_QUAD_SIZE],
+                     const enum tgsi_sampler_control control,
+                     float mipmap[TGSI_QUAD_SIZE],
+                     float lod[TGSI_QUAD_SIZE]);
 };

 #define TGSI_EXEC_NUM_TEMPS       4096
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -141,7 +141,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
   { 0, 0, 0, 1, 1, 0, NONE, "ENDLOOP", TGSI_OPCODE_ENDLOOP },
   { 0, 0, 0, 0, 1, 0, NONE, "ENDSUB", TGSI_OPCODE_ENDSUB },
   { 1, 1, 1, 0, 0, 0, OTHR, "TXQ_LZ", TGSI_OPCODE_TXQ_LZ },
-   { 0, 0, 0, 0, 0, 0, NONE, "", 104 },     /* removed */
+   { 1, 1, 1, 0, 0, 0, OTHR, "TXQS", TGSI_OPCODE_TXQS },
   { 0, 0, 0, 0, 0, 0, NONE, "", 105 },     /* removed */
   { 0, 0, 0, 0, 0, 0, NONE, "", 106 },     /* removed */
   { 0, 0, 0, 0, 0, 0, NONE, "NOP", TGSI_OPCODE_NOP },
@@ -331,6 +331,7 @@ tgsi_opcode_infer_type( uint opcode )
   case TGSI_OPCODE_SAD: /* XXX some src args may be signed for SAD ? */
   case TGSI_OPCODE_TXQ:
   case TGSI_OPCODE_TXQ_LZ:
+   case TGSI_OPCODE_TXQS:
   case TGSI_OPCODE_F2U:
   case TGSI_OPCODE_UDIV:
   case TGSI_OPCODE_UMAD:
--- a/src/gallium/auxiliary/tgsi/tgsi_point_sprite.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_point_sprite.c
@@ -0,0 +1,582 @@
+/*
+ * Copyright 2014 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/**
+ * This utility transforms the geometry shader to emulate point sprite by
+ * drawing a quad. It also adds an extra output for the original point position
+ * if the point position is to be written to a stream output buffer.
+ * Note: It assumes the driver will add a constant for the inverse viewport
+ *       after the user defined constants.
+ */
+
+#include "util/u_debug.h"
+#include "util/u_math.h"
+#include "tgsi_info.h"
+#include "tgsi_point_sprite.h"
+#include "tgsi_transform.h"
+#include "pipe/p_state.h"
+
+#define INVALID_INDEX 9999
+
+/* Set swizzle based on the immediates (0, 1, 0, -1) */
+static inline unsigned
+set_swizzle(int x, int y, int z, int w)
+{
+   static const unsigned map[3] = {TGSI_SWIZZLE_W, TGSI_SWIZZLE_X,
+                                   TGSI_SWIZZLE_Y};
+   assert(x >= -1);
+   assert(x <= 1);
+   assert(y >= -1);
+   assert(y <= 1);
+   assert(z >= -1);
+   assert(z <= 1);
+   assert(w >= -1);
+   assert(w <= 1);
+
+   return map[x+1] | (map[y+1] << 2) | (map[z+1] << 4) | (map[w+1] << 6);
+}
+
+static inline unsigned
+get_swizzle(unsigned swizzle, unsigned component)
+{
+   assert(component < 4);
+   return (swizzle >> (component * 2)) & 0x3;
+}
+
+struct psprite_transform_context
+{
+   struct tgsi_transform_context base;
+   unsigned num_tmp;
+   unsigned num_out;
+   unsigned num_orig_out;
+   unsigned num_const;
+   unsigned num_imm;
+   unsigned point_size_in;          // point size input
+   unsigned point_size_out;         // point size output
+   unsigned point_size_tmp;         // point size temp
+   unsigned point_pos_in;           // point pos input
+   unsigned point_pos_out;          // point pos output
+   unsigned point_pos_sout;         // original point pos for streamout
+   unsigned point_pos_tmp;          // point pos temp
+   unsigned point_scale_tmp;        // point scale temp
+   unsigned point_color_out;        // point color output
+   unsigned point_color_tmp;        // point color temp
+   unsigned point_imm;              // point immediates
+   unsigned point_ivp;              // point inverseViewport constant
+   unsigned point_dir_swz[4];       // point direction swizzle
+   unsigned point_coord_swz[4];     // point coord swizzle
+   unsigned point_coord_enable;     // point coord enable mask
+   unsigned point_coord_decl;       // point coord output declared mask
+   unsigned point_coord_out;        // point coord output starting index
+   unsigned point_coord_aa;         // aa point coord semantic index
+   unsigned point_coord_k;          // aa point coord threshold distance
+   unsigned stream_out_point_pos:1; // set if to stream out original point pos
+   unsigned aa_point:1;             // set if doing aa point
+   unsigned out_tmp_index[PIPE_MAX_SHADER_OUTPUTS];
+   int max_generic;
+};
+
+static inline struct psprite_transform_context *
+psprite_transform_context(struct tgsi_transform_context *ctx)
+{
+   return (struct psprite_transform_context *) ctx;
+}
+
+
+/**
+ * TGSI declaration transform callback.
+ */
+static void
+psprite_decl(struct tgsi_transform_context *ctx,
+             struct tgsi_full_declaration *decl)
+{
+   struct psprite_transform_context *ts = psprite_transform_context(ctx);
+
+   if (decl->Declaration.File == TGSI_FILE_INPUT) {
+      if (decl->Semantic.Name == TGSI_SEMANTIC_PSIZE) {
+         ts->point_size_in = decl->Range.First;
+      }
+      else if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
+         ts->point_pos_in = decl->Range.First;
+      }
+   }
+   else if (decl->Declaration.File == TGSI_FILE_OUTPUT) {
+      if (decl->Semantic.Name == TGSI_SEMANTIC_PSIZE) {
+         ts->point_size_out = decl->Range.First;
+      }
+      else if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
+         ts->point_pos_out = decl->Range.First;
+      }
+      else if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC &&
+               decl->Semantic.Index < 32) {
+         ts->point_coord_decl |= 1 << decl->Semantic.Index;
+         ts->max_generic = MAX2(ts->max_generic, decl->Semantic.Index);
+      }
+      ts->num_out = MAX2(ts->num_out, decl->Range.Last + 1);
+   }
+   else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
+      ts->num_tmp = MAX2(ts->num_tmp, decl->Range.Last + 1);
+   }
+   else if (decl->Declaration.File == TGSI_FILE_CONSTANT) {
+      ts->num_const = MAX2(ts->num_const, decl->Range.Last + 1);
+   }
+
+   ctx->emit_declaration(ctx, decl);
+}
+
+/**
+ * TGSI immediate declaration transform callback.
+ */
+static void
+psprite_immediate(struct tgsi_transform_context *ctx,
+                  struct tgsi_full_immediate *imm)
+{
+   struct psprite_transform_context *ts = psprite_transform_context(ctx);
+
+   ctx->emit_immediate(ctx, imm);
+   ts->num_imm++;
+}
+
+
+/**
+ * TGSI transform prolog callback.
+ */
+static void
+psprite_prolog(struct tgsi_transform_context *ctx)
+{
+   struct psprite_transform_context *ts = psprite_transform_context(ctx);
+   unsigned point_coord_enable, en;
+   int i;
+
+   /* Replace output registers with temporary registers */
+   for (i = 0; i < ts->num_out; i++) {
+      ts->out_tmp_index[i] = ts->num_tmp++;
+   }
+   ts->num_orig_out = ts->num_out;
+
+   /* Declare a tmp register for point scale */
+   ts->point_scale_tmp = ts->num_tmp++;
+
+   if (ts->point_size_out != INVALID_INDEX)
+      ts->point_size_tmp = ts->out_tmp_index[ts->point_size_out];
+   else
+      ts->point_size_tmp = ts->num_tmp++;
+
+   assert(ts->point_pos_out != INVALID_INDEX);
+   ts->point_pos_tmp = ts->out_tmp_index[ts->point_pos_out];
+   ts->out_tmp_index[ts->point_pos_out] = INVALID_INDEX;
+
+   /* Declare one more tmp register for point coord threshold distance
+    * if we are generating anti-aliased point.
+    */
+   if (ts->aa_point)
+      ts->point_coord_k = ts->num_tmp++;
+
+   tgsi_transform_temps_decl(ctx, ts->point_size_tmp, ts->num_tmp-1);
+
+   /* Declare an extra output for the original point position for stream out */
+   if (ts->stream_out_point_pos) {
+      ts->point_pos_sout = ts->num_out++;
+      tgsi_transform_output_decl(ctx, ts->point_pos_sout,
+                                 TGSI_SEMANTIC_GENERIC, 0, 0);
+   }
+
+   /* point coord outputs to be declared */
+   point_coord_enable = ts->point_coord_enable & ~ts->point_coord_decl;
+
+   /* Declare outputs for those point coord that are enabled but are not
+    * already declared in this shader.
+    */
+   ts->point_coord_out = ts->num_out;
+   if (point_coord_enable) {
+      for (i = 0, en = point_coord_enable; en; en>>=1, i++) {
+         if (en & 0x1) {
+            tgsi_transform_output_decl(ctx, ts->num_out++,
+                                       TGSI_SEMANTIC_GENERIC, i, 0);
+            ts->max_generic = MAX2(ts->max_generic, i);
+         }
+      }
+   }
+
+   /* add an extra generic output for aa point texcoord */
+   if (ts->aa_point) {
+      ts->point_coord_aa = ts->max_generic + 1;
+      assert((ts->point_coord_enable & (1 << ts->point_coord_aa)) == 0);
+      ts->point_coord_enable |= 1 << (ts->point_coord_aa);
+      tgsi_transform_output_decl(ctx, ts->num_out++, TGSI_SEMANTIC_GENERIC,
+                                 ts->point_coord_aa, 0);
+   }
+
+   /* Declare extra immediates */
+   ts->point_imm = ts->num_imm;
+   tgsi_transform_immediate_decl(ctx, 0, 1, 0.5, -1);
+
+   /* Declare point constant -
+    * constant.xy -- inverseViewport
+    * constant.z -- current point size
+    * constant.w -- max point size
+    * The driver needs to add this constant to the constant buffer
+    */
+   ts->point_ivp = ts->num_const++;
+   tgsi_transform_const_decl(ctx, ts->point_ivp, ts->point_ivp);
+
+   /* If this geometry shader does not specify point size,
+    * get the current point size from the point constant.
+    */
+   if (ts->point_size_out == INVALID_INDEX) {
+      struct tgsi_full_instruction inst;
+
+      inst = tgsi_default_full_instruction();
+      inst.Instruction.Opcode = TGSI_OPCODE_MOV;
+      inst.Instruction.NumDstRegs = 1;
+      tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_TEMPORARY,
+                             ts->point_size_tmp, TGSI_WRITEMASK_XYZW);
+      inst.Instruction.NumSrcRegs = 1;
+      tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_CONSTANT,
+                             ts->point_ivp, TGSI_SWIZZLE_Z,
+                             TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z);
+      ctx->emit_instruction(ctx, &inst);
+   }
+}
+
+
+/**
+ * Add the point sprite emulation instructions at the emit vertex instruction
+ */
+static void
+psprite_emit_vertex_inst(struct tgsi_transform_context *ctx,
+                         struct tgsi_full_instruction *vert_inst)
+{
+   struct psprite_transform_context *ts = psprite_transform_context(ctx);
+   struct tgsi_full_instruction inst;
+   unsigned point_coord_enable, en;
+   unsigned i, j, s;
+
+   /* new point coord outputs */
+   point_coord_enable = ts->point_coord_enable & ~ts->point_coord_decl;
+
+   /* OUTPUT[pos_sout] = TEMP[pos] */
+   if (ts->point_pos_sout != INVALID_INDEX) {
+      tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV,
+                              TGSI_FILE_OUTPUT, ts->point_pos_sout,
+                              TGSI_WRITEMASK_XYZW,
+                              TGSI_FILE_TEMPORARY, ts->point_pos_tmp);
+   }
+
+   /**
+    * Set up the point scale vector
+    * scale = pointSize * pos.w * inverseViewport
+    */
+
+   /* MUL point_scale.x, point_size.x, point_pos.w */
+   tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MUL,
+                  TGSI_FILE_TEMPORARY, ts->point_scale_tmp, TGSI_WRITEMASK_X,
+                  TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_SWIZZLE_X,
+                  TGSI_FILE_TEMPORARY, ts->point_pos_tmp, TGSI_SWIZZLE_W);
+
+   /* MUL point_scale.xy, point_scale.xx, inverseViewport.xy */
+   inst = tgsi_default_full_instruction();
+   inst.Instruction.Opcode = TGSI_OPCODE_MUL;
+   inst.Instruction.NumDstRegs = 1;
+   tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_TEMPORARY,
+                          ts->point_scale_tmp, TGSI_WRITEMASK_XY);
+   inst.Instruction.NumSrcRegs = 2;
+   tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_TEMPORARY,
+                          ts->point_scale_tmp, TGSI_SWIZZLE_X,
+                          TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
+   tgsi_transform_src_reg(&inst.Src[1], TGSI_FILE_CONSTANT,
+                          ts->point_ivp, TGSI_SWIZZLE_X,
+                          TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z);
+   ctx->emit_instruction(ctx, &inst);
+
+   /**
+    * Set up the point coord threshold distance
+    * k = 0.5 - 1 / pointsize
+    */
+   if (ts->aa_point) {
+      tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_DIV,
+                                  TGSI_FILE_TEMPORARY, ts->point_coord_k,
+                                  TGSI_WRITEMASK_X,
+                                  TGSI_FILE_IMMEDIATE, ts->point_imm,
+                                  TGSI_SWIZZLE_Y,
+                                  TGSI_FILE_TEMPORARY, ts->point_size_tmp,
+                                  TGSI_SWIZZLE_X);
+
+      tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SUB,
+                                  TGSI_FILE_TEMPORARY, ts->point_coord_k,
+                                  TGSI_WRITEMASK_X,
+                                  TGSI_FILE_IMMEDIATE, ts->point_imm,
+                                  TGSI_SWIZZLE_Z,
+                                  TGSI_FILE_TEMPORARY, ts->point_coord_k,
+                                  TGSI_SWIZZLE_X);
+   }
+
+
+   for (i = 0; i < 4; i++) {
+      unsigned point_dir_swz = ts->point_dir_swz[i];
+      unsigned point_coord_swz = ts->point_coord_swz[i];
+
+      /* All outputs need to be emitted for each vertex */
+      for (j = 0; j < ts->num_orig_out; j++) {
+         if (ts->out_tmp_index[j] != INVALID_INDEX) {
+            tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV,
+                                    TGSI_FILE_OUTPUT, j,
+                                    TGSI_WRITEMASK_XYZW,
+                                    TGSI_FILE_TEMPORARY, ts->out_tmp_index[j]);
+         }
+      }
+
+      /* pos = point_scale * point_dir + point_pos */
+      inst = tgsi_default_full_instruction();
+      inst.Instruction.Opcode = TGSI_OPCODE_MAD;
+      inst.Instruction.NumDstRegs = 1;
+      tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_OUTPUT, ts->point_pos_out,
+                             TGSI_WRITEMASK_XYZW);
+      inst.Instruction.NumSrcRegs = 3;
+      tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_TEMPORARY, ts->point_scale_tmp,
+                             TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X,
+                             TGSI_SWIZZLE_X);
+      tgsi_transform_src_reg(&inst.Src[1], TGSI_FILE_IMMEDIATE, ts->point_imm,
+                             get_swizzle(point_dir_swz, 0),
+                             get_swizzle(point_dir_swz, 1),
+                             get_swizzle(point_dir_swz, 2),
+                             get_swizzle(point_dir_swz, 3));
+      tgsi_transform_src_reg(&inst.Src[2], TGSI_FILE_TEMPORARY, ts->point_pos_tmp,
+                             TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z,
+                             TGSI_SWIZZLE_W);
+      ctx->emit_instruction(ctx, &inst);
+
+      /* point coord */
+      for (j = 0, s = 0, en = point_coord_enable; en; en>>=1, s++) {
+         unsigned dstReg;
+
+         if (en & 0x1) {
+            dstReg = ts->point_coord_out + j;
+
+            inst = tgsi_default_full_instruction();
+            inst.Instruction.Opcode = TGSI_OPCODE_MOV;
+            inst.Instruction.NumDstRegs = 1;
+            tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_OUTPUT,
+                                   dstReg, TGSI_WRITEMASK_XYZW);
+            inst.Instruction.NumSrcRegs = 1;
+            tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_IMMEDIATE, ts->point_imm,
+                                   get_swizzle(point_coord_swz, 0),
+                                   get_swizzle(point_coord_swz, 1),
+                                   get_swizzle(point_coord_swz, 2),
+                                   get_swizzle(point_coord_swz, 3));
+            ctx->emit_instruction(ctx, &inst);
+
+            /* MOV point_coord.z  point_coord_k.x */
+            if (s == ts->point_coord_aa) {
+               tgsi_transform_op1_swz_inst(ctx, TGSI_OPCODE_MOV,
+                                           TGSI_FILE_OUTPUT, dstReg, TGSI_WRITEMASK_Z,
+                                           TGSI_FILE_TEMPORARY, ts->point_coord_k,
+                                           TGSI_SWIZZLE_X);
+            }
+            j++;  /* the next point coord output offset */
+         }
+      }
+
+      /* Emit the EMIT instruction for each vertex of the quad */
+      ctx->emit_instruction(ctx, vert_inst);
+   }
+
+   /* Emit the ENDPRIM instruction for the quad */
+   inst = tgsi_default_full_instruction();
+   inst.Instruction.Opcode = TGSI_OPCODE_ENDPRIM;
+   inst.Instruction.NumDstRegs = 0;
+   inst.Instruction.NumSrcRegs = 1;
+   inst.Src[0] = vert_inst->Src[0];
+   ctx->emit_instruction(ctx, &inst);
+}
+
+
+/**
+ * TGSI instruction transform callback.
+ */
+static void
+psprite_inst(struct tgsi_transform_context *ctx,
+             struct tgsi_full_instruction *inst)
+{
+   struct psprite_transform_context *ts = psprite_transform_context(ctx);
+
+   if (inst->Instruction.Opcode == TGSI_OPCODE_EMIT) {
+      psprite_emit_vertex_inst(ctx, inst);
+   }
+   else if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT &&
+            inst->Dst[0].Register.Index == ts->point_size_out) {
+      /**
+       * Replace point size output reg with tmp reg.
+       * The tmp reg will be later used as a src reg for computing
+       * the point scale factor.
+       */
+      inst->Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+      inst->Dst[0].Register.Index = ts->point_size_tmp;
+      ctx->emit_instruction(ctx, inst);
+
+      /* Clamp the point size */
+      /* MAX point_size_tmp.x, point_size_tmp.x, point_imm.y */
+      tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MAX,
+                 TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_WRITEMASK_X,
+                 TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_SWIZZLE_X,
+                 TGSI_FILE_IMMEDIATE, ts->point_imm, TGSI_SWIZZLE_Y);
+
+      /* MIN point_size_tmp.x, point_size_tmp.x, point_ivp.w */
+      tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MIN,
+                 TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_WRITEMASK_X,
+                 TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_SWIZZLE_X,
+                 TGSI_FILE_CONSTANT, ts->point_ivp, TGSI_SWIZZLE_W);
+   }
+   else if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT &&
+            inst->Dst[0].Register.Index == ts->point_pos_out) {
+      /**
+       * Replace point pos output reg with tmp reg.
+       */
+      inst->Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+      inst->Dst[0].Register.Index = ts->point_pos_tmp;
+      ctx->emit_instruction(ctx, inst);
+   }
+   else if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) {
+      /**
+       * Replace output reg with tmp reg.
+       */
+      inst->Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+      inst->Dst[0].Register.Index = ts->out_tmp_index[inst->Dst[0].Register.Index];
+      ctx->emit_instruction(ctx, inst);
+   }
+   else {
+      ctx->emit_instruction(ctx, inst);
+   }
+}
+
+
+/**
+ * TGSI property instruction transform callback.
+ * Transforms a point into a 4-vertex triangle strip.
+ */
+static void
+psprite_property(struct tgsi_transform_context *ctx,
+                 struct tgsi_full_property *prop)
+{
+   switch (prop->Property.PropertyName) {
+   case TGSI_PROPERTY_GS_OUTPUT_PRIM:
+       prop->u[0].Data = PIPE_PRIM_TRIANGLE_STRIP;
+       break;
+   case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
+       prop->u[0].Data *= 4;
+       break;
+   default:
+       break;
+   }
+   ctx->emit_property(ctx, prop);
+}
+
+/**
+ * TGSI utility to transform a geometry shader to support point sprite.
+ */
+struct tgsi_token *
+tgsi_add_point_sprite(const struct tgsi_token *tokens_in,
+                      const unsigned point_coord_enable,
+                      const bool sprite_origin_lower_left,
+                      const bool stream_out_point_pos,
+                      int *aa_point_coord_index)
+{
+   struct psprite_transform_context transform;
+   const uint num_new_tokens = 200; /* should be enough */
+   const uint new_len = tgsi_num_tokens(tokens_in) + num_new_tokens;
+   struct tgsi_token *new_tokens;
+
+   /* setup transformation context */
+   memset(&transform, 0, sizeof(transform));
+   transform.base.transform_declaration = psprite_decl;
+   transform.base.transform_instruction = psprite_inst;
+   transform.base.transform_property = psprite_property;
+   transform.base.transform_immediate = psprite_immediate;
+   transform.base.prolog = psprite_prolog;
+
+   transform.point_size_in = INVALID_INDEX;
+   transform.point_size_out = INVALID_INDEX;
+   transform.point_size_tmp = INVALID_INDEX;
+   transform.point_pos_in = INVALID_INDEX;
+   transform.point_pos_out = INVALID_INDEX;
+   transform.point_pos_sout = INVALID_INDEX;
+   transform.point_pos_tmp = INVALID_INDEX;
+   transform.point_scale_tmp = INVALID_INDEX;
+   transform.point_imm = INVALID_INDEX;
+   transform.point_coord_aa = INVALID_INDEX;
+   transform.point_coord_k = INVALID_INDEX;
+
+   transform.stream_out_point_pos = stream_out_point_pos;
+   transform.point_coord_enable = point_coord_enable;
+   transform.aa_point = aa_point_coord_index != NULL;
+   transform.max_generic = -1;
+
+   /* point sprite directions based on the immediates (0, 1, 0.5, -1) */
+   /* (-1, -1, 0, 0) */
+   transform.point_dir_swz[0] = set_swizzle(-1, -1, 0, 0);
+   /* (-1, 1, 0, 0) */
+   transform.point_dir_swz[1] = set_swizzle(-1, 1, 0, 0);
+   /* (1, -1, 0, 0) */
+   transform.point_dir_swz[2] = set_swizzle(1, -1, 0, 0);
+   /* (1, 1, 0, 0) */
+   transform.point_dir_swz[3] = set_swizzle(1, 1, 0, 0);
+
+   /* point coord based on the immediates (0, 1, 0, -1) */
+   if (sprite_origin_lower_left) {
+      /* (0, 0, 0, 1) */
+      transform.point_coord_swz[0] = set_swizzle(0, 0, 0, 1);
+      /* (0, 1, 0, 1) */
+      transform.point_coord_swz[1] = set_swizzle(0, 1, 0, 1);
+      /* (1, 0, 0, 1) */
+      transform.point_coord_swz[2] = set_swizzle(1, 0, 0, 1);
+      /* (1, 1, 0, 1) */
+      transform.point_coord_swz[3] = set_swizzle(1, 1, 0, 1);
+   }
+   else {
+      /* (0, 1, 0, 1) */
+      transform.point_coord_swz[0] = set_swizzle(0, 1, 0, 1);
+      /* (0, 0, 0, 1) */
+      transform.point_coord_swz[1] = set_swizzle(0, 0, 0, 1);
+      /* (1, 1, 0, 1) */
+      transform.point_coord_swz[2] = set_swizzle(1, 1, 0, 1);
+      /* (1, 0, 0, 1) */
+      transform.point_coord_swz[3] = set_swizzle(1, 0, 0, 1);
+   }
+
+
+   /* allocate new tokens buffer */
+   new_tokens = tgsi_alloc_tokens(new_len);
+   if (!new_tokens)
+      return NULL;
+
+   /* transform the shader */
+   tgsi_transform_shader(tokens_in, new_tokens, new_len, &transform.base);
+
+   if (aa_point_coord_index)
+      *aa_point_coord_index = transform.point_coord_aa;
+
+   return new_tokens;
+}
--- a/src/gallium/auxiliary/tgsi/tgsi_point_sprite.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_point_sprite.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2014 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef TGSI_POINT_SPRITE_H
+#define TGSI_POINT_SPRITE_H
+
+struct tgsi_token;
+
+struct tgsi_token *
+tgsi_add_point_sprite(const struct tgsi_token *tokens_in,
+                      const unsigned point_coord_enable,
+                      const bool sprite_origin_lower_left,
+                      const bool stream_out_point_pos,
+                      int *aa_point_coord_index);
+
+#endif /* TGSI_POINT_SPRITE_H */
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -56,6 +56,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
 {
   uint procType, i;
   struct tgsi_parse_context parse;
+   unsigned current_depth = 0;

   memset(info, 0, sizeof(*info));
   for (i = 0; i < TGSI_FILE_COUNT; i++)
@@ -100,6 +101,72 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
            assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST);
            info->opcode_count[fullinst->Instruction.Opcode]++;

+            switch (fullinst->Instruction.Opcode) {
+            case TGSI_OPCODE_IF:
+            case TGSI_OPCODE_UIF:
+            case TGSI_OPCODE_BGNLOOP:
+               current_depth++;
+               info->max_depth = MAX2(info->max_depth, current_depth);
+               break;
+            case TGSI_OPCODE_ENDIF:
+            case TGSI_OPCODE_ENDLOOP:
+               current_depth--;
+               break;
+            default:
+               break;
+            }
+
+            if (fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID ||
+                fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
+                fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
+               struct tgsi_full_src_register *src0 = &fullinst->Src[0];
+               unsigned input;
+
+               if (src0->Register.Indirect && src0->Indirect.ArrayID)
+                  input = info->input_array_first[src0->Indirect.ArrayID];
+               else
+                  input = src0->Register.Index;
+
+               /* For the INTERP opcodes, the interpolation is always
+                * PERSPECTIVE unless LINEAR is specified.
+                */
+               switch (info->input_interpolate[input]) {
+               case TGSI_INTERPOLATE_COLOR:
+               case TGSI_INTERPOLATE_CONSTANT:
+               case TGSI_INTERPOLATE_PERSPECTIVE:
+                  switch (fullinst->Instruction.Opcode) {
+                  case TGSI_OPCODE_INTERP_CENTROID:
+                     info->uses_persp_opcode_interp_centroid = true;
+                     break;
+                  case TGSI_OPCODE_INTERP_OFFSET:
+                     info->uses_persp_opcode_interp_offset = true;
+                     break;
+                  case TGSI_OPCODE_INTERP_SAMPLE:
+                     info->uses_persp_opcode_interp_sample = true;
+                     break;
+                  }
+                  break;
+
+               case TGSI_INTERPOLATE_LINEAR:
+                  switch (fullinst->Instruction.Opcode) {
+                  case TGSI_OPCODE_INTERP_CENTROID:
+                     info->uses_linear_opcode_interp_centroid = true;
+                     break;
+                  case TGSI_OPCODE_INTERP_OFFSET:
+                     info->uses_linear_opcode_interp_offset = true;
+                     break;
+                  case TGSI_OPCODE_INTERP_SAMPLE:
+                     info->uses_linear_opcode_interp_sample = true;
+                     break;
+                  }
+                  break;
+               }
+            }
+
+            if (fullinst->Instruction.Opcode >= TGSI_OPCODE_F2D &&
+                fullinst->Instruction.Opcode <= TGSI_OPCODE_DSSG)
+               info->uses_doubles = true;
+
            for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) {
               const struct tgsi_full_src_register *src =
                  &fullinst->Src[i];
@@ -216,8 +283,48 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
                  info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Interp.CylindricalWrap;
                  info->num_inputs++;

-                  if (fulldecl->Interp.Location == TGSI_INTERPOLATE_LOC_CENTROID)
-                     info->uses_centroid = TRUE;
+                  /* Only interpolated varyings. Don't include POSITION.
+                   * Don't include integer varyings, because they are not
+                   * interpolated.
+                   */
+                  if (semName == TGSI_SEMANTIC_GENERIC ||
+                      semName == TGSI_SEMANTIC_TEXCOORD ||
+                      semName == TGSI_SEMANTIC_COLOR ||
+                      semName == TGSI_SEMANTIC_BCOLOR ||
+                      semName == TGSI_SEMANTIC_FOG ||
+                      semName == TGSI_SEMANTIC_CLIPDIST ||
+                      semName == TGSI_SEMANTIC_CULLDIST) {
+                     switch (fulldecl->Interp.Interpolate) {
+                     case TGSI_INTERPOLATE_COLOR:
+                     case TGSI_INTERPOLATE_PERSPECTIVE:
+                        switch (fulldecl->Interp.Location) {
+                        case TGSI_INTERPOLATE_LOC_CENTER:
+                           info->uses_persp_center = true;
+                           break;
+                        case TGSI_INTERPOLATE_LOC_CENTROID:
+                           info->uses_persp_centroid = true;
+                           break;
+                        case TGSI_INTERPOLATE_LOC_SAMPLE:
+                           info->uses_persp_sample = true;
+                           break;
+                        }
+                        break;
+                     case TGSI_INTERPOLATE_LINEAR:
+                        switch (fulldecl->Interp.Location) {
+                        case TGSI_INTERPOLATE_LOC_CENTER:
+                           info->uses_linear_center = true;
+                           break;
+                        case TGSI_INTERPOLATE_LOC_CENTROID:
+                           info->uses_linear_centroid = true;
+                           break;
+                        case TGSI_INTERPOLATE_LOC_SAMPLE:
+                           info->uses_linear_sample = true;
+                           break;
+                        }
+                        break;
+                     /* TGSI_INTERPOLATE_CONSTANT doesn't do any interpolation. */
+                     }
+                  }

                  if (semName == TGSI_SEMANTIC_PRIMID)
                     info->uses_primid = TRUE;
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
@@ -82,7 +82,18 @@ struct tgsi_shader_info
   boolean writes_stencil; /**< does fragment shader write stencil value? */
   boolean writes_edgeflag; /**< vertex shader outputs edgeflag */
   boolean uses_kill;  /**< KILL or KILL_IF instruction used? */
-   boolean uses_centroid;
+   boolean uses_persp_center;
+   boolean uses_persp_centroid;
+   boolean uses_persp_sample;
+   boolean uses_linear_center;
+   boolean uses_linear_centroid;
+   boolean uses_linear_sample;
+   boolean uses_persp_opcode_interp_centroid;
+   boolean uses_persp_opcode_interp_offset;
+   boolean uses_persp_opcode_interp_sample;
+   boolean uses_linear_opcode_interp_centroid;
+   boolean uses_linear_opcode_interp_offset;
+   boolean uses_linear_opcode_interp_sample;
   boolean uses_instanceid;
   boolean uses_vertexid;
   boolean uses_vertexid_nobase;
@@ -95,7 +106,7 @@ struct tgsi_shader_info
   boolean writes_viewport_index;
   boolean writes_layer;
   boolean is_msaa_sampler[PIPE_MAX_SAMPLERS];
-
+   boolean uses_doubles; /**< uses any of the double instructions */
   unsigned clipdist_writemask;
   unsigned culldist_writemask;
   unsigned num_written_culldistance;
@@ -113,6 +124,11 @@ struct tgsi_shader_info
   unsigned indirect_files_written;

   unsigned properties[TGSI_PROPERTY_COUNT]; /* index with TGSI_PROPERTY_ */
+
+   /**
+    * Max nesting limit of loops/if's
+    */
+   unsigned max_depth;
 };

 extern void
--- a/src/gallium/auxiliary/tgsi/tgsi_transform.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_transform.h
@@ -95,19 +95,38 @@ struct tgsi_transform_context
 * Helper for emitting temporary register declarations.
 */
 static inline void
-tgsi_transform_temp_decl(struct tgsi_transform_context *ctx,
-                         unsigned index)
+tgsi_transform_temps_decl(struct tgsi_transform_context *ctx,
+                          unsigned firstIdx, unsigned lastIdx)
 {
   struct tgsi_full_declaration decl;

   decl = tgsi_default_full_declaration();
   decl.Declaration.File = TGSI_FILE_TEMPORARY;
-   decl.Range.First =
-   decl.Range.Last = index;
+   decl.Range.First = firstIdx;
+   decl.Range.Last = lastIdx;
   ctx->emit_declaration(ctx, &decl);
 }

+static inline void
+tgsi_transform_temp_decl(struct tgsi_transform_context *ctx,
+                         unsigned index)
+{
+   tgsi_transform_temps_decl(ctx, index, index);
+}

+static inline void
+tgsi_transform_const_decl(struct tgsi_transform_context *ctx,
+                          unsigned firstIdx, unsigned lastIdx)
+{
+   struct tgsi_full_declaration decl;
+
+   decl = tgsi_default_full_declaration();
+   decl.Declaration.File = TGSI_FILE_CONSTANT;
+   decl.Range.First = firstIdx;
+   decl.Range.Last = lastIdx;
+   ctx->emit_declaration(ctx, &decl);
+}
+ 
 static inline void
 tgsi_transform_input_decl(struct tgsi_transform_context *ctx,
                          unsigned index,
@@ -129,6 +148,26 @@ tgsi_transform_input_decl(struct tgsi_transform_context *ctx,
   ctx->emit_declaration(ctx, &decl);
 }

+static inline void
+tgsi_transform_output_decl(struct tgsi_transform_context *ctx,
+                          unsigned index,
+                          unsigned sem_name, unsigned sem_index,
+                          unsigned interp)
+{
+   struct tgsi_full_declaration decl;
+
+   decl = tgsi_default_full_declaration();
+   decl.Declaration.File = TGSI_FILE_OUTPUT;
+   decl.Declaration.Interpolate = 1;
+   decl.Declaration.Semantic = 1;
+   decl.Semantic.Name = sem_name;
+   decl.Semantic.Index = sem_index;
+   decl.Range.First =
+   decl.Range.Last = index;
+   decl.Interp.Interpolate = interp;
+
+   ctx->emit_declaration(ctx, &decl);
+}

 static inline void
 tgsi_transform_sampler_decl(struct tgsi_transform_context *ctx,
@@ -182,6 +221,28 @@ tgsi_transform_immediate_decl(struct tgsi_transform_context *ctx,
   ctx->emit_immediate(ctx, &immed);
 }

+static inline void
+tgsi_transform_dst_reg(struct tgsi_full_dst_register *reg,
+                       unsigned file, unsigned index, unsigned writemask)
+{
+   reg->Register.File = file;
+   reg->Register.Index = index;
+   reg->Register.WriteMask = writemask;
+}
+
+static inline void
+tgsi_transform_src_reg(struct tgsi_full_src_register *reg,
+                       unsigned file, unsigned index, 
+                       unsigned swizzleX, unsigned swizzleY,
+                       unsigned swizzleZ, unsigned swizzleW)
+{
+   reg->Register.File = file;
+   reg->Register.Index = index;
+   reg->Register.SwizzleX = swizzleX; 
+   reg->Register.SwizzleY = swizzleY; 
+   reg->Register.SwizzleZ = swizzleZ; 
+   reg->Register.SwizzleW = swizzleW; 
+}

 /**
 * Helper for emitting 1-operand instructions.
@@ -399,7 +460,8 @@ static inline void
 tgsi_transform_kill_inst(struct tgsi_transform_context *ctx,
                         unsigned src_file,
                         unsigned src_index,
-                         unsigned src_swizzle)
+                         unsigned src_swizzle,
+                         boolean negate)
 {
   struct tgsi_full_instruction inst;

@@ -413,7 +475,7 @@ tgsi_transform_kill_inst(struct tgsi_transform_context *ctx,
   inst.Src[0].Register.SwizzleY =
   inst.Src[0].Register.SwizzleZ =
   inst.Src[0].Register.SwizzleW = src_swizzle;
-   inst.Src[0].Register.Negate = 1;
+   inst.Src[0].Register.Negate = negate;

   ctx->emit_instruction(ctx, &inst);
 }
--- a/src/gallium/auxiliary/tgsi/tgsi_two_side.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_two_side.c
@@ -0,0 +1,228 @@
+/*
+ * Copyright 2013 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/**
+ * This utility transforms fragment shaders to facilitate two-sided lighting.
+ *
+ * Basically, if the FS has any color inputs (TGSI_SEMANTIC_COLOR) we'll:
+ * 1. create corresponding back-color inputs (TGSI_SEMANTIC_BCOLOR)
+ * 2. use the FACE register to choose between front/back colors and put the
+ *    selected color in new temp regs.
+ * 3. replace reads of the original color inputs with the new temp regs.
+ *
+ * Then, the driver just needs to link the VS front/back output colors to
+ * the FS front/back input colors.
+ */
+
+#include "util/u_debug.h"
+#include "util/u_math.h"
+#include "tgsi_info.h"
+#include "tgsi_two_side.h"
+#include "tgsi_transform.h"
+
+
+#define INVALID_INDEX 9999
+
+
+struct two_side_transform_context
+{
+   struct tgsi_transform_context base;
+   uint num_temps;
+   uint num_inputs;
+   uint face_input;           /**< index of the FACE input */
+   uint front_color_input[2]; /**< INPUT regs */
+   uint front_color_interp[2];/**< TGSI_INTERPOLATE_x */
+   uint back_color_input[2];  /**< INPUT regs */
+   uint new_colors[2];        /**< TEMP regs */
+};
+
+
+static inline struct two_side_transform_context *
+two_side_transform_context(struct tgsi_transform_context *ctx)
+{
+   return (struct two_side_transform_context *) ctx;
+}
+
+
+static void
+xform_decl(struct tgsi_transform_context *ctx,
+           struct tgsi_full_declaration *decl)
+{
+   struct two_side_transform_context *ts = two_side_transform_context(ctx);
+
+   if (decl->Declaration.File == TGSI_FILE_INPUT) {
+      if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR) {
+         /* found a front color */
+         assert(decl->Semantic.Index < 2);
+         ts->front_color_input[decl->Semantic.Index] = decl->Range.First;
+         ts->front_color_interp[decl->Semantic.Index] = decl->Interp.Interpolate;
+      }
+      else if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
+         ts->face_input = decl->Range.First;
+      }
+      ts->num_inputs = MAX2(ts->num_inputs, decl->Range.Last + 1);
+   }
+   else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
+      ts->num_temps = MAX2(ts->num_temps, decl->Range.Last + 1);
+   }
+
+   ctx->emit_declaration(ctx, decl);
+}
+
+
+static void
+emit_prolog(struct tgsi_transform_context *ctx)
+{
+   struct two_side_transform_context *ts = two_side_transform_context(ctx);
+   struct tgsi_full_declaration decl;
+   struct tgsi_full_instruction inst;
+   uint num_colors = 0;
+   uint i;
+
+   /* Declare 0, 1 or 2 new BCOLOR inputs */
+   for (i = 0; i < 2; i++) {
+      if (ts->front_color_input[i] != INVALID_INDEX) {
+         decl = tgsi_default_full_declaration();
+         decl.Declaration.File = TGSI_FILE_INPUT;
+         decl.Declaration.Interpolate = 1;
+         decl.Declaration.Semantic = 1;
+         decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR;
+         decl.Semantic.Index = i;
+         decl.Range.First = decl.Range.Last = ts->num_inputs++;
+         decl.Interp.Interpolate = ts->front_color_interp[i];
+         ctx->emit_declaration(ctx, &decl);
+         ts->back_color_input[i] = decl.Range.First;
+         num_colors++;
+      }
+   }
+
+   if (num_colors > 0) {
+      /* Declare 1 or 2 temp registers */
+      decl = tgsi_default_full_declaration();
+      decl.Declaration.File = TGSI_FILE_TEMPORARY;
+      decl.Range.First = ts->num_temps;
+      decl.Range.Last = ts->num_temps + num_colors - 1;
+      ctx->emit_declaration(ctx, &decl);
+      ts->new_colors[0] = ts->num_temps;
+      ts->new_colors[1] = ts->num_temps + 1;
+
+      if (ts->face_input == INVALID_INDEX) {
+         /* declare FACE INPUT register */
+         decl = tgsi_default_full_declaration();
+         decl.Declaration.File = TGSI_FILE_INPUT;
+         decl.Declaration.Semantic = 1;
+         decl.Semantic.Name = TGSI_SEMANTIC_FACE;
+         decl.Semantic.Index = 0;
+         decl.Range.First = decl.Range.Last = ts->num_inputs++;
+         ctx->emit_declaration(ctx, &decl);
+         ts->face_input = decl.Range.First;
+      }
+
+      /* CMP temp[c0], face, bcolor[c0], fcolor[c0]
+       * temp[c0] = face < 0.0 ? bcolor[c0] : fcolor[c0]
+       */
+      for (i = 0; i < 2; i++) {
+         if (ts->front_color_input[i] != INVALID_INDEX) {
+            inst = tgsi_default_full_instruction();
+            inst.Instruction.Opcode = TGSI_OPCODE_CMP;
+            inst.Instruction.NumDstRegs = 1;
+            inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+            inst.Dst[0].Register.Index = ts->new_colors[i];
+            inst.Instruction.NumSrcRegs = 3;
+            inst.Src[0].Register.File = TGSI_FILE_INPUT;
+            inst.Src[0].Register.Index = ts->face_input;
+            inst.Src[1].Register.File = TGSI_FILE_INPUT;
+            inst.Src[1].Register.Index = ts->back_color_input[i];
+            inst.Src[2].Register.File = TGSI_FILE_INPUT;
+            inst.Src[2].Register.Index = ts->front_color_input[i];
+
+            ctx->emit_instruction(ctx, &inst);
+         }
+      }
+   }
+}
+
+
+static void
+xform_inst(struct tgsi_transform_context *ctx,
+           struct tgsi_full_instruction *inst)
+{
+   struct two_side_transform_context *ts = two_side_transform_context(ctx);
+   const struct tgsi_opcode_info *info =
+      tgsi_get_opcode_info(inst->Instruction.Opcode);
+   uint i, j;
+
+   /* Look for src regs which reference the input color and replace
+    * them with the temp color.
+    */
+   for (i = 0; i < info->num_src; i++) {
+      if (inst->Src[i].Register.File == TGSI_FILE_INPUT) {
+         for (j = 0; j < 2; j++) {
+            if (inst->Src[i].Register.Index == ts->front_color_input[j]) {
+               /* replace color input with temp reg */
+               inst->Src[i].Register.File = TGSI_FILE_TEMPORARY;
+               inst->Src[i].Register.Index = ts->new_colors[j];
+               break;
+            }
+         }
+      }
+   }
+
+   ctx->emit_instruction(ctx, inst);
+}
+
+
+struct tgsi_token *
+tgsi_add_two_side(const struct tgsi_token *tokens_in)
+{
+   struct two_side_transform_context transform;
+   const uint num_new_tokens = 100; /* should be enough */
+   const uint new_len = tgsi_num_tokens(tokens_in) + num_new_tokens;
+   struct tgsi_token *new_tokens;
+
+   /* setup transformation context */
+   memset(&transform, 0, sizeof(transform));
+   transform.base.transform_declaration = xform_decl;
+   transform.base.transform_instruction = xform_inst;
+   transform.base.prolog = emit_prolog;
+   transform.face_input = INVALID_INDEX;
+   transform.front_color_input[0] = INVALID_INDEX;
+   transform.front_color_input[1] = INVALID_INDEX;
+   transform.front_color_interp[0] = TGSI_INTERPOLATE_COLOR;
+   transform.front_color_interp[1] = TGSI_INTERPOLATE_COLOR;
+   transform.back_color_input[0] = INVALID_INDEX;
+   transform.back_color_input[1] = INVALID_INDEX;
+
+   /* allocate new tokens buffer */
+   new_tokens = tgsi_alloc_tokens(new_len);
+   if (!new_tokens)
+      return NULL;
+
+   /* transform the shader */
+   tgsi_transform_shader(tokens_in, new_tokens, new_len, &transform.base);
+
+   return new_tokens;
+}
--- a/src/gallium/auxiliary/tgsi/tgsi_two_side.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_two_side.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2013 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef TGSI_TWO_SIDE_H
+#define TGSI_TWO_SIDE_H
+
+struct tgsi_token;
+
+struct tgsi_token *
+tgsi_add_two_side(const struct tgsi_token *tokens_in);
+
+#endif /* TGSI_TWO_SIDE_H */
--- a/src/gallium/auxiliary/tgsi/tgsi_util.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.c
@@ -462,3 +462,21 @@ tgsi_util_get_texture_coord_dim(int tgsi_tex, int *shadow_or_sample)

   return dim;
 }
+
+
+boolean
+tgsi_is_shadow_target(unsigned target)
+{
+   switch (target) {
+   case TGSI_TEXTURE_SHADOW1D:
+   case TGSI_TEXTURE_SHADOW2D:
+   case TGSI_TEXTURE_SHADOWRECT:
+   case TGSI_TEXTURE_SHADOW1D_ARRAY:
+   case TGSI_TEXTURE_SHADOW2D_ARRAY:
+   case TGSI_TEXTURE_SHADOWCUBE:
+   case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
+      return TRUE;
+   default:
+      return FALSE;
+   }
+}
--- a/src/gallium/auxiliary/tgsi/tgsi_util.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.h
@@ -82,6 +82,9 @@ tgsi_util_get_src_from_ind(const struct tgsi_ind_register *reg);
 int
 tgsi_util_get_texture_coord_dim(int tgsi_tex, int *shadow_or_sample);

+boolean
+tgsi_is_shadow_target(unsigned target);
+
 #if defined __cplusplus
 }
 #endif
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -1190,6 +1190,8 @@ static void blitter_draw(struct blitter_context_priv *ctx,

   u_upload_data(ctx->upload, 0, sizeof(ctx->vertices), ctx->vertices,
                 &vb.buffer_offset, &vb.buffer);
+   if (!vb.buffer)
+      return;
   u_upload_unmap(ctx->upload);

   pipe->set_vertex_buffers(pipe, ctx->base.vb_slot, 1, &vb);
@@ -2063,7 +2065,7 @@ void util_blitter_clear_buffer(struct blitter_context *blitter,
   struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
   struct pipe_context *pipe = ctx->base.pipe;
   struct pipe_vertex_buffer vb = {0};
-   struct pipe_stream_output_target *so_target;
+   struct pipe_stream_output_target *so_target = NULL;
   unsigned offsets[PIPE_MAX_SO_BUFFERS] = {0};

   assert(num_channels >= 1);
@@ -2089,6 +2091,9 @@ void util_blitter_clear_buffer(struct blitter_context *blitter,

   u_upload_data(ctx->upload, 0, num_channels*4, clear_value,
                 &vb.buffer_offset, &vb.buffer);
+   if (!vb.buffer)
+      goto out;
+
   vb.stride = 0;

   blitter_set_running_flag(ctx);
@@ -2112,6 +2117,7 @@ void util_blitter_clear_buffer(struct blitter_context *blitter,

   util_draw_arrays(pipe, PIPE_PRIM_POINTS, 0, size / 4);

+out:
   blitter_restore_vertex_states(ctx);
   blitter_restore_render_cond(ctx);
   blitter_unset_running_flag(ctx);
--- a/src/gallium/auxiliary/util/u_blitter.h
+++ b/src/gallium/auxiliary/util/u_blitter.h
@@ -372,30 +372,28 @@ void util_blitter_custom_resolve_color(struct blitter_context *blitter,
 *
 * States not listed here are not affected by util_blitter. */

-static inline
-void util_blitter_save_blend(struct blitter_context *blitter,
-                             void *state)
+static inline void
+util_blitter_save_blend(struct blitter_context *blitter, void *state)
 {
   blitter->saved_blend_state = state;
 }

-static inline
-void util_blitter_save_depth_stencil_alpha(struct blitter_context *blitter,
-                                           void *state)
+static inline void
+util_blitter_save_depth_stencil_alpha(struct blitter_context *blitter,
+                                      void *state)
 {
   blitter->saved_dsa_state = state;
 }

-static inline
-void util_blitter_save_vertex_elements(struct blitter_context *blitter,
-                                       void *state)
+static inline void
+util_blitter_save_vertex_elements(struct blitter_context *blitter, void *state)
 {
   blitter->saved_velem_state = state;
 }

-static inline
-void util_blitter_save_stencil_ref(struct blitter_context *blitter,
-                                   const struct pipe_stencil_ref *state)
+static inline void
+util_blitter_save_stencil_ref(struct blitter_context *blitter,
+                              const struct pipe_stencil_ref *state)
 {
   blitter->saved_stencil_ref = *state;
 }
@@ -407,23 +405,20 @@ void util_blitter_save_rasterizer(struct blitter_context *blitter,
   blitter->saved_rs_state = state;
 }

-static inline
-void util_blitter_save_fragment_shader(struct blitter_context *blitter,
-                                       void *fs)
+static inline void
+util_blitter_save_fragment_shader(struct blitter_context *blitter, void *fs)
 {
   blitter->saved_fs = fs;
 }

-static inline
-void util_blitter_save_vertex_shader(struct blitter_context *blitter,
-                                     void *vs)
+static inline void
+util_blitter_save_vertex_shader(struct blitter_context *blitter, void *vs)
 {
   blitter->saved_vs = vs;
 }

-static inline
-void util_blitter_save_geometry_shader(struct blitter_context *blitter,
-                                       void *gs)
+static inline void
+util_blitter_save_geometry_shader(struct blitter_context *blitter, void *gs)
 {
   blitter->saved_gs = gs;
 }
@@ -442,24 +437,24 @@ util_blitter_save_tesseval_shader(struct blitter_context *blitter,
   blitter->saved_tes = sh;
 }

-static inline
-void util_blitter_save_framebuffer(struct blitter_context *blitter,
-                                   const struct pipe_framebuffer_state *state)
+static inline void
+util_blitter_save_framebuffer(struct blitter_context *blitter,
+                              const struct pipe_framebuffer_state *state)
 {
   blitter->saved_fb_state.nr_cbufs = 0; /* It's ~0 now, meaning it's unsaved. */
   util_copy_framebuffer_state(&blitter->saved_fb_state, state);
 }

-static inline
-void util_blitter_save_viewport(struct blitter_context *blitter,
-                                struct pipe_viewport_state *state)
+static inline void
+util_blitter_save_viewport(struct blitter_context *blitter,
+                           struct pipe_viewport_state *state)
 {
   blitter->saved_viewport = *state;
 }

-static inline
-void util_blitter_save_scissor(struct blitter_context *blitter,
-                               struct pipe_scissor_state *state)
+static inline void
+util_blitter_save_scissor(struct blitter_context *blitter,
+                          struct pipe_scissor_state *state)
 {
   blitter->saved_scissor = *state;
 }
--- a/src/gallium/auxiliary/util/u_debug.c
+++ b/src/gallium/auxiliary/util/u_debug.c
@@ -41,6 +41,7 @@
 #include "util/u_tile.h" 
 #include "util/u_prim.h"
 #include "util/u_surface.h"
+#include <inttypes.h>

 #include <stdio.h>
 #include <limits.h> /* CHAR_BIT */
@@ -275,7 +276,7 @@ debug_get_flags_option(const char *name,
      for (; flags->name; ++flags)
         namealign = MAX2(namealign, strlen(flags->name));
      for (flags = orig; flags->name; ++flags)
-         _debug_printf("| %*s [0x%0*lx]%s%s\n", namealign, flags->name,
+         _debug_printf("| %*s [0x%0*"PRIu64"]%s%s\n", namealign, flags->name,
                      (int)sizeof(uint64_t)*CHAR_BIT/4, flags->value,
                      flags->desc ? " " : "", flags->desc ? flags->desc : "");
   }
@@ -290,9 +291,9 @@ debug_get_flags_option(const char *name,

   if (debug_get_option_should_print()) {
      if (str) {
-         debug_printf("%s: %s = 0x%lx (%s)\n", __FUNCTION__, name, result, str);
+         debug_printf("%s: %s = 0x%"PRIu64" (%s)\n", __FUNCTION__, name, result, str);
      } else {
-         debug_printf("%s: %s = 0x%lx\n", __FUNCTION__, name, result);
+         debug_printf("%s: %s = 0x%"PRIu64"\n", __FUNCTION__, name, result);
      }
   }

--- a/src/gallium/auxiliary/util/u_format_rgb9e5.h
+++ b/src/gallium/auxiliary/util/u_format_rgb9e5.h
@@ -21,7 +21,8 @@
 * DEALINGS IN THE SOFTWARE.
 */

-/* Copied from EXT_texture_shared_exponent and edited. */
+/* Copied from EXT_texture_shared_exponent and edited, getting rid of
+ * expensive float math bits too. */

 #ifndef RGB9E5_H
 #define RGB9E5_H
@@ -39,7 +40,6 @@
 #define RGB9E5_MANTISSA_VALUES       (1<<RGB9E5_MANTISSA_BITS)
 #define MAX_RGB9E5_MANTISSA          (RGB9E5_MANTISSA_VALUES-1)
 #define MAX_RGB9E5                   (((float)MAX_RGB9E5_MANTISSA)/RGB9E5_MANTISSA_VALUES * (1<<MAX_RGB9E5_EXP))
-#define EPSILON_RGB9E5               ((1.0/RGB9E5_MANTISSA_VALUES) / (1<<RGB9E5_EXP_BIAS))

 typedef union {
   unsigned int raw;
@@ -74,63 +74,59 @@ typedef union {
   } field;
 } rgb9e5;

-static inline float rgb9e5_ClampRange(float x)
-{
-   if (x > 0.0f) {
-      if (x >= MAX_RGB9E5) {
-         return MAX_RGB9E5;
-      } else {
-         return x;
-      }
-   } else {
-      /* NaN gets here too since comparisons with NaN always fail! */
-      return 0.0;
-   }
-}

-/* Ok, FloorLog2 is not correct for the denorm and zero values, but we
-   are going to do a max of this value with the minimum rgb9e5 exponent
-   that will hide these problem cases. */
-static inline int rgb9e5_FloorLog2(float x)
+static inline int rgb9e5_ClampRange(float x)
 {
   float754 f;
-
+   float754 max;
   f.value = x;
-   return (f.field.biasedexponent - 127);
+   max.value = MAX_RGB9E5;
+
+   if (f.raw > 0x7f800000)
+  /* catches neg, NaNs */
+      return 0;
+   else if (f.raw >= max.raw)
+      return max.raw;
+   else
+      return f.raw;
 }

 static inline unsigned float3_to_rgb9e5(const float rgb[3])
 {
   rgb9e5 retval;
-   float maxrgb;
-   int rm, gm, bm;
-   float rc, gc, bc;
-   int exp_shared, maxm;
-   double denom;
+   int rm, gm, bm, exp_shared;
+   float754 revdenom = {0};
+   float754 rc, bc, gc, maxrgb;

-   rc = rgb9e5_ClampRange(rgb[0]);
-   gc = rgb9e5_ClampRange(rgb[1]);
-   bc = rgb9e5_ClampRange(rgb[2]);
+   rc.raw = rgb9e5_ClampRange(rgb[0]);
+   gc.raw = rgb9e5_ClampRange(rgb[1]);
+   bc.raw = rgb9e5_ClampRange(rgb[2]);
+   maxrgb.raw = MAX3(rc.raw, gc.raw, bc.raw);

-   maxrgb = MAX3(rc, gc, bc);
-   exp_shared = MAX2(-RGB9E5_EXP_BIAS-1, rgb9e5_FloorLog2(maxrgb)) + 1 + RGB9E5_EXP_BIAS;
+   /*
+    * Compared to what the spec suggests, instead of conditionally adjusting
+    * the exponent after the fact do it here by doing the equivalent of +0.5 -
+    * the int add will spill over into the exponent in this case.
+    */
+   maxrgb.raw += maxrgb.raw & (1 << (23-9));
+   exp_shared = MAX2((maxrgb.raw >> 23), -RGB9E5_EXP_BIAS - 1 + 127) +
+                1 + RGB9E5_EXP_BIAS - 127;
+   revdenom.field.biasedexponent = 127 - (exp_shared - RGB9E5_EXP_BIAS -
+                                          RGB9E5_MANTISSA_BITS) + 1;
   assert(exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP);
-   assert(exp_shared >= 0);
-   /* This exp2 function could be replaced by a table. */
-   denom = exp2(exp_shared - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS);

-   maxm = (int) floor(maxrgb / denom + 0.5);
-   if (maxm == MAX_RGB9E5_MANTISSA+1) {
-      denom *= 2;
-      exp_shared += 1;
-      assert(exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP);
-   } else {
-      assert(maxm <= MAX_RGB9E5_MANTISSA);
-   }
-
-   rm = (int) floor(rc / denom + 0.5);
-   gm = (int) floor(gc / denom + 0.5);
-   bm = (int) floor(bc / denom + 0.5);
+   /*
+    * The spec uses strict round-up behavior (d3d10 disagrees, but in any case
+    * must match what is done above for figuring out exponent).
+    * We avoid the doubles ((int) rc * revdenom + 0.5) by doing the rounding
+    * ourselves (revdenom was adjusted by +1, above).
+    */
+   rm = (int) (rc.value * revdenom.value);
+   gm = (int) (gc.value * revdenom.value);
+   bm = (int) (bc.value * revdenom.value);
+   rm = (rm & 1) + (rm >> 1);
+   gm = (gm & 1) + (gm >> 1);
+   bm = (bm & 1) + (bm >> 1);

   assert(rm <= MAX_RGB9E5_MANTISSA);
   assert(gm <= MAX_RGB9E5_MANTISSA);
@@ -151,15 +147,15 @@ static inline void rgb9e5_to_float3(unsigned rgb, float retval[3])
 {
   rgb9e5 v;
   int exponent;
-   float scale;
+   float754 scale = {0};

   v.raw = rgb;
   exponent = v.field.biasedexponent - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS;
-   scale = exp2f(exponent);
+   scale.field.biasedexponent = exponent + 127;

-   retval[0] = v.field.r * scale;
-   retval[1] = v.field.g * scale;
-   retval[2] = v.field.b * scale;
+   retval[0] = v.field.r * scale.value;
+   retval[1] = v.field.g * scale.value;
+   retval[2] = v.field.b * scale.value;
 }

 #endif
--- a/src/gallium/auxiliary/util/u_helpers.c
+++ b/src/gallium/auxiliary/util/u_helpers.c
@@ -88,3 +88,18 @@ void util_set_vertex_buffers_count(struct pipe_vertex_buffer *dst,

   *dst_count = util_last_bit(enabled_buffers);
 }
+
+
+void
+util_set_index_buffer(struct pipe_index_buffer *dst,
+                      const struct pipe_index_buffer *src)
+{
+   if (src) {
+      pipe_resource_reference(&dst->buffer, src->buffer);
+      memcpy(dst, src, sizeof(*dst));
+   }
+   else {
+      pipe_resource_reference(&dst->buffer, NULL);
+      memset(dst, 0, sizeof(*dst));
+   }
+}
--- a/src/gallium/auxiliary/util/u_helpers.h
+++ b/src/gallium/auxiliary/util/u_helpers.h
@@ -44,6 +44,9 @@ void util_set_vertex_buffers_count(struct pipe_vertex_buffer *dst,
                                   const struct pipe_vertex_buffer *src,
                                   unsigned start_slot, unsigned count);

+void util_set_index_buffer(struct pipe_index_buffer *dst,
+                           const struct pipe_index_buffer *src);
+
 #ifdef __cplusplus
 }
 #endif
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -389,6 +389,26 @@ unsigned ffs( unsigned u )
 #define ffs __builtin_ffs
 #endif

+#ifdef HAVE___BUILTIN_FFSLL
+#define ffsll __builtin_ffsll
+#else
+static inline int
+ffsll(long long int val)
+{
+   int bit;
+
+   bit = ffs((unsigned) (val & 0xffffffff));
+   if (bit != 0)
+      return bit;
+
+   bit = ffs((unsigned) (val >> 32));
+   if (bit != 0)
+      return 32 + bit;
+
+   return 0;
+}
+#endif
+
 #endif /* FFS_DEFINED */

 /**
@@ -483,6 +503,26 @@ u_bit_scan64(uint64_t *mask)
 }
 #endif

+/* For looping over a bitmask when you want to loop over consecutive bits
+ * manually, for example:
+ *
+ * while (mask) {
+ *    int start, count, i;
+ *
+ *    u_bit_scan_consecutive_range(&mask, &start, &count);
+ *
+ *    for (i = 0; i < count; i++)
+ *       ... process element (start+i)
+ * }
+ */
+static inline void
+u_bit_scan_consecutive_range(unsigned *mask, int *start, int *count)
+{
+   *start = ffs(*mask) - 1;
+   *count = ffs(~(*mask >> *start)) - 1;
+   *mask &= ~(((1 << *count) - 1) << *start);
+}
+
 /**
 * Return float bits.
 */
--- a/src/gallium/auxiliary/util/u_prim_restart.c
+++ b/src/gallium/auxiliary/util/u_prim_restart.c
@@ -0,0 +1,267 @@
+/*
+ * Copyright 2014 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+
+#include "u_inlines.h"
+#include "u_memory.h"
+#include "u_prim_restart.h"
+
+
+/**
+ * Translate an index buffer for primitive restart.
+ * Create a new index buffer which is a copy of the original index buffer
+ * except that instances of 'restart_index' are converted to 0xffff or
+ * 0xffffffff.
+ * Also, index buffers using 1-byte indexes are converted to 2-byte indexes.
+ */
+enum pipe_error
+util_translate_prim_restart_ib(struct pipe_context *context,
+                               struct pipe_index_buffer *src_buffer,
+                               struct pipe_resource **dst_buffer,
+                               unsigned num_indexes,
+                               unsigned restart_index)
+{
+   struct pipe_screen *screen = context->screen;
+   struct pipe_transfer *src_transfer = NULL, *dst_transfer = NULL;
+   void *src_map = NULL, *dst_map = NULL;
+   const unsigned src_index_size = src_buffer->index_size;
+   unsigned dst_index_size;
+
+   /* 1-byte indexes are converted to 2-byte indexes, 4-byte stays 4-byte */
+   dst_index_size = MAX2(2, src_buffer->index_size);
+   assert(dst_index_size == 2 || dst_index_size == 4);
+
+   /* no user buffers for now */
+   assert(src_buffer->user_buffer == NULL);
+
+   /* Create new index buffer */
+   *dst_buffer = pipe_buffer_create(screen, PIPE_BIND_INDEX_BUFFER,
+                                    PIPE_USAGE_STREAM,
+                                    num_indexes * dst_index_size);
+   if (!*dst_buffer)
+      goto error;
+
+   /* Map new / dest index buffer */
+   dst_map = pipe_buffer_map(context, *dst_buffer,
+                             PIPE_TRANSFER_WRITE, &dst_transfer);
+   if (!dst_map)
+      goto error;
+
+   /* Map original / src index buffer */
+   src_map = pipe_buffer_map_range(context, src_buffer->buffer,
+                                   src_buffer->offset,
+                                   num_indexes * src_index_size,
+                                   PIPE_TRANSFER_READ,
+                                   &src_transfer);
+   if (!src_map)
+      goto error;
+
+   if (src_index_size == 1 && dst_index_size == 2) {
+      uint8_t *src = (uint8_t *) src_map;
+      uint16_t *dst = (uint16_t *) dst_map;
+      unsigned i;
+      for (i = 0; i < num_indexes; i++) {
+         dst[i] = (src[i] == restart_index) ? 0xffff : src[i];
+      }
+   }
+   else if (src_index_size == 2 && dst_index_size == 2) {
+      uint16_t *src = (uint16_t *) src_map;
+      uint16_t *dst = (uint16_t *) dst_map;
+      unsigned i;
+      for (i = 0; i < num_indexes; i++) {
+         dst[i] = (src[i] == restart_index) ? 0xffff : src[i];
+      }
+   }
+   else {
+      uint32_t *src = (uint32_t *) src_map;
+      uint32_t *dst = (uint32_t *) dst_map;
+      unsigned i;
+      assert(src_index_size == 4);
+      assert(dst_index_size == 4);
+      for (i = 0; i < num_indexes; i++) {
+         dst[i] = (src[i] == restart_index) ? 0xffffffff : src[i];
+      }
+   }
+
+   pipe_buffer_unmap(context, src_transfer);
+   pipe_buffer_unmap(context, dst_transfer);
+
+   return PIPE_OK;
+
+error:
+   if (src_transfer)
+      pipe_buffer_unmap(context, src_transfer);
+   if (dst_transfer)
+      pipe_buffer_unmap(context, dst_transfer);
+   if (*dst_buffer)
+      screen->resource_destroy(screen, *dst_buffer);
+   return PIPE_ERROR_OUT_OF_MEMORY;
+}
+
+
+/** Helper structs for util_draw_vbo_without_prim_restart() */
+
+struct range {
+   unsigned start, count;
+};
+
+struct range_info {
+   struct range *ranges;
+   unsigned count, max;
+};
+
+
+/**
+ * Helper function for util_draw_vbo_without_prim_restart()
+ * \return true for success, false if out of memory
+ */
+static boolean
+add_range(struct range_info *info, unsigned start, unsigned count)
+{
+   if (info->max == 0) {
+      info->max = 10;
+      info->ranges = MALLOC(info->max * sizeof(struct range));
+      if (!info->ranges) {
+         return FALSE;
+      }
+   }
+   else if (info->count == info->max) {
+      /* grow the ranges[] array */
+      info->ranges = REALLOC(info->ranges,
+                             info->max * sizeof(struct range),
+                             2 * info->max * sizeof(struct range));
+      if (!info->ranges) {
+         return FALSE;
+      }
+
+      info->max *= 2;
+   }
+
+   /* save the range */
+   info->ranges[info->count].start = start;
+   info->ranges[info->count].count = count;
+   info->count++;
+
+   return TRUE;
+}
+
+
+/**
+ * Implement primitive restart by breaking an indexed primitive into
+ * pieces which do not contain restart indexes.  Each piece is then
+ * drawn by calling pipe_context::draw_vbo().
+ * \return PIPE_OK if no error, an error code otherwise.
+ */
+enum pipe_error
+util_draw_vbo_without_prim_restart(struct pipe_context *context,
+                                   const struct pipe_index_buffer *ib,
+                                   const struct pipe_draw_info *info)
+{
+   const void *src_map;
+   struct range_info ranges = {0};
+   struct pipe_draw_info new_info;
+   struct pipe_transfer *src_transfer = NULL;
+   unsigned i, start, count;
+
+   assert(info->indexed);
+   assert(info->primitive_restart);
+
+   /* Get pointer to the index data */
+   if (ib->buffer) {
+      /* map the index buffer (only the range we need to scan) */
+      src_map = pipe_buffer_map_range(context, ib->buffer,
+                                      ib->offset + info->start * ib->index_size,
+                                      info->count * ib->index_size,
+                                      PIPE_TRANSFER_READ,
+                                      &src_transfer);
+      if (!src_map) {
+         return PIPE_ERROR_OUT_OF_MEMORY;
+      }
+   }
+   else {
+      if (!ib->user_buffer) {
+         debug_printf("User-space index buffer is null!");
+         return PIPE_ERROR_BAD_INPUT;
+      }
+      src_map = (const uint8_t *) ib->user_buffer
+         + ib->offset
+         + info->start * ib->index_size;
+   }
+
+#define SCAN_INDEXES(TYPE) \
+   for (i = 0; i <= info->count; i++) { \
+      if (i == info->count || \
+          ((const TYPE *) src_map)[i] == info->restart_index) { \
+         /* cut / restart */ \
+         if (count > 0) { \
+            if (!add_range(&ranges, info->start + start, count)) { \
+               if (src_transfer) \
+                  pipe_buffer_unmap(context, src_transfer); \
+               return PIPE_ERROR_OUT_OF_MEMORY; \
+            } \
+         } \
+         start = i + 1; \
+         count = 0; \
+      } \
+      else { \
+         count++; \
+      } \
+   }
+
+   start = info->start;
+   count = 0;
+   switch (ib->index_size) {
+   case 1:
+      SCAN_INDEXES(uint8_t);
+      break;
+   case 2:
+      SCAN_INDEXES(uint16_t);
+      break;
+   case 4:
+      SCAN_INDEXES(uint32_t);
+      break;
+   default:
+      assert(!"Bad index size");
+      return PIPE_ERROR_BAD_INPUT;
+   }
+
+   /* unmap index buffer */
+   if (src_transfer)
+      pipe_buffer_unmap(context, src_transfer);
+
+   /* draw ranges between the restart indexes */
+   new_info = *info;
+   new_info.primitive_restart = FALSE;
+   for (i = 0; i < ranges.count; i++) {
+      new_info.start = ranges.ranges[i].start;
+      new_info.count = ranges.ranges[i].count;
+      context->draw_vbo(context, &new_info);
+   }
+
+   FREE(ranges.ranges);
+
+   return PIPE_OK;
+}
--- a/src/gallium/auxiliary/util/u_prim_restart.h
+++ b/src/gallium/auxiliary/util/u_prim_restart.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright 2014 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef U_PRIM_RESTART_H
+#define U_PRIM_RESTART_H
+
+
+#include "pipe/p_defines.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+struct pipe_context;
+struct pipe_draw_info;
+struct pipe_index_buffer;
+struct pipe_resource;
+
+
+enum pipe_error
+util_translate_prim_restart_ib(struct pipe_context *context,
+                               struct pipe_index_buffer *src_buffer,
+                               struct pipe_resource **dst_buffer,
+                               unsigned num_indexes,
+                               unsigned restart_index);
+
+enum pipe_error
+util_draw_vbo_without_prim_restart(struct pipe_context *context,
+                                   const struct pipe_index_buffer *ib,
+                                   const struct pipe_draw_info *info);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/src/gallium/auxiliary/util/u_pstipple.c
+++ b/src/gallium/auxiliary/util/u_pstipple.c
@@ -339,7 +339,7 @@ pstip_transform_prolog(struct tgsi_transform_context *ctx)
   /* KILL_IF -texTemp;   # if -texTemp < 0, kill fragment */
   tgsi_transform_kill_inst(ctx,
                            TGSI_FILE_TEMPORARY, texTemp,
-                            TGSI_SWIZZLE_W);
+                            TGSI_SWIZZLE_W, TRUE);
 }


--- a/src/gallium/auxiliary/util/u_rect.h
+++ b/src/gallium/auxiliary/util/u_rect.h
@@ -42,6 +42,7 @@ struct u_rect {
 };

 /* Do two rectangles intersect?
+ * Note: empty rectangles are valid as inputs (and never intersect).
 */
 static inline boolean
 u_rect_test_intersection(const struct u_rect *a,
@@ -50,7 +51,11 @@ u_rect_test_intersection(const struct u_rect *a,
   return (!(a->x1 < b->x0 ||
             b->x1 < a->x0 ||
             a->y1 < b->y0 ||
-             b->y1 < a->y0));
+             b->y1 < a->y0 ||
+             a->x1 < a->x0 ||
+             a->y1 < a->y0 ||
+             b->x1 < b->x0 ||
+             b->y1 < b->y0));
 }

 /* Find the intersection of two rectangles known to intersect.
@@ -82,7 +87,12 @@ u_rect_possible_intersection(const struct u_rect *a,
      u_rect_find_intersection(a,b);
   }
   else {
-      b->x0 = b->x1 = b->y0 = b->y1 = 0;
+      /*
+       * Note the u_rect_xx tests deal with inclusive coordinates
+       * hence all-zero would not be an empty box.
+       */
+      b->x0 = b->y0 = 0;
+      b->x1 = b->y1 = -1;
   }
 }

--- a/src/gallium/auxiliary/util/u_simple_shaders.c
+++ b/src/gallium/auxiliary/util/u_simple_shaders.c
@@ -831,3 +831,54 @@ util_make_fs_msaa_resolve_bilinear(struct pipe_context *pipe,

   return ureg_create_shader_and_destroy(ureg, pipe);
 }
+
+void *
+util_make_geometry_passthrough_shader(struct pipe_context *pipe,
+                                      uint num_attribs,
+                                      const ubyte *semantic_names,
+                                      const ubyte *semantic_indexes)
+{
+   static const unsigned zero[4] = {0, 0, 0, 0};
+
+   struct ureg_program *ureg;
+   struct ureg_dst dst[PIPE_MAX_SHADER_OUTPUTS];
+   struct ureg_src src[PIPE_MAX_SHADER_INPUTS];
+   struct ureg_src imm;
+
+   unsigned i;
+
+   ureg = ureg_create(TGSI_PROCESSOR_GEOMETRY);
+   if (ureg == NULL)
+      return NULL;
+
+   ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM, PIPE_PRIM_POINTS);
+   ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM, PIPE_PRIM_POINTS);
+   ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES, 1);
+   ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS, 1);
+   imm = ureg_DECL_immediate_uint(ureg, zero, 4);
+
+   /**
+    * Loop over all the attribs and declare the corresponding
+    * declarations in the geometry shader
+    */
+   for (i = 0; i < num_attribs; i++) {
+      src[i] = ureg_DECL_input(ureg, semantic_names[i],
+                               semantic_indexes[i], 0, 1);
+      src[i] = ureg_src_dimension(src[i], 0);
+      dst[i] = ureg_DECL_output(ureg, semantic_names[i], semantic_indexes[i]);
+   }
+
+   /* MOV dst[i] src[i] */
+   for (i = 0; i < num_attribs; i++) {
+      ureg_MOV(ureg, dst[i], src[i]);
+   }
+
+   /* EMIT IMM[0] */
+   ureg_insn(ureg, TGSI_OPCODE_EMIT, NULL, 0, &imm, 1);
+
+   /* END */
+   ureg_END(ureg);
+
+   return ureg_create_shader_and_destroy(ureg, pipe);
+}
+
--- a/src/gallium/auxiliary/util/u_simple_shaders.h
+++ b/src/gallium/auxiliary/util/u_simple_shaders.h
@@ -146,6 +146,12 @@ util_make_fs_msaa_resolve_bilinear(struct pipe_context *pipe,
                                   unsigned tgsi_tex, unsigned nr_samples,
                                   enum tgsi_return_type stype);

+extern void *
+util_make_geometry_passthrough_shader(struct pipe_context *pipe,
+                                      uint num_attribs,
+                                      const ubyte *semantic_names,
+                                      const ubyte *semantic_indexes);
+
 #ifdef __cplusplus
 }
 #endif
--- a/src/gallium/auxiliary/util/u_string.h
+++ b/src/gallium/auxiliary/util/u_string.h
@@ -199,6 +199,8 @@ util_memmove(void *dest, const void *src, size_t n)
 }


+#define util_strcasecmp stricmp
+
 #else

 #define util_vsnprintf vsnprintf
@@ -211,6 +213,7 @@ util_memmove(void *dest, const void *src, size_t n)
 #define util_strncat strncat
 #define util_strstr strstr
 #define util_memmove memmove
+#define util_strcasecmp strcasecmp

 #endif

--- a/src/gallium/auxiliary/util/u_tests.c
+++ b/src/gallium/auxiliary/util/u_tests.c
@@ -457,7 +457,7 @@ null_constant_buffer(struct pipe_context *ctx)
 void
 util_run_tests(struct pipe_screen *screen)
 {
-   struct pipe_context *ctx = screen->context_create(screen, NULL);
+   struct pipe_context *ctx = screen->context_create(screen, NULL, 0);

   tgsi_vs_window_space_position(ctx);
   null_sampler_view(ctx, TGSI_TEXTURE_2D);
--- a/src/gallium/auxiliary/util/u_upload_mgr.c
+++ b/src/gallium/auxiliary/util/u_upload_mgr.c
@@ -129,9 +129,9 @@ void u_upload_destroy( struct u_upload_mgr *upload )
 }


-static enum pipe_error 
-u_upload_alloc_buffer( struct u_upload_mgr *upload,
-                       unsigned min_size )
+static void
+u_upload_alloc_buffer(struct u_upload_mgr *upload,
+                      unsigned min_size)
 {
   struct pipe_screen *screen = upload->pipe->screen;
   struct pipe_resource buffer;
@@ -161,9 +161,8 @@ u_upload_alloc_buffer( struct u_upload_mgr *upload,
   }

   upload->buffer = screen->resource_create(screen, &buffer);
-   if (upload->buffer == NULL) {
-      return PIPE_ERROR_OUT_OF_MEMORY;
-   }
+   if (upload->buffer == NULL)
+      return;

   /* Map the new buffer. */
   upload->map = pipe_buffer_map_range(upload->pipe, upload->buffer,
@@ -172,52 +171,54 @@ u_upload_alloc_buffer( struct u_upload_mgr *upload,
   if (upload->map == NULL) {
      upload->transfer = NULL;
      pipe_resource_reference(&upload->buffer, NULL);
-      return PIPE_ERROR_OUT_OF_MEMORY;
+      return;
   }

   upload->offset = 0;
-   return PIPE_OK;
 }

-enum pipe_error u_upload_alloc( struct u_upload_mgr *upload,
-                                unsigned min_out_offset,
-                                unsigned size,
-                                unsigned *out_offset,
-                                struct pipe_resource **outbuf,
-                                void **ptr )
+void
+u_upload_alloc(struct u_upload_mgr *upload,
+               unsigned min_out_offset,
+               unsigned size,
+               unsigned *out_offset,
+               struct pipe_resource **outbuf,
+               void **ptr)
 {
-   unsigned alloc_size = align( size, upload->alignment );
+   unsigned alloc_size = align(size, upload->alignment);
   unsigned alloc_offset = align(min_out_offset, upload->alignment);
+   unsigned buffer_size = upload->buffer ? upload->buffer->width0 : 0;
   unsigned offset;

-   /* Init these return values here in case we fail below to make
-    * sure the caller doesn't get garbage values.
-    */
-   *out_offset = ~0;
-   pipe_resource_reference(outbuf, NULL);
-   *ptr = NULL;
-
   /* Make sure we have enough space in the upload buffer
    * for the sub-allocation. */
-   if (!upload->buffer ||
-       MAX2(upload->offset, alloc_offset) + alloc_size > upload->buffer->width0) {
-      enum pipe_error ret = u_upload_alloc_buffer(upload,
-                                                  alloc_offset + alloc_size);
-      if (ret != PIPE_OK)
-         return ret;
+   if (unlikely(MAX2(upload->offset, alloc_offset) + alloc_size > buffer_size)) {
+      u_upload_alloc_buffer(upload, alloc_offset + alloc_size);
+
+      if (unlikely(!upload->buffer)) {
+         *out_offset = ~0;
+         pipe_resource_reference(outbuf, NULL);
+         *ptr = NULL;
+         return;
+      }
+
+      buffer_size = upload->buffer->width0;
   }

   offset = MAX2(upload->offset, alloc_offset);

-   if (!upload->map) {
+   if (unlikely(!upload->map)) {
      upload->map = pipe_buffer_map_range(upload->pipe, upload->buffer,
                                          offset,
-                                          upload->buffer->width0 - offset,
+                                          buffer_size - offset,
                                          upload->map_flags,
 					  &upload->transfer);
-      if (!upload->map) {
+      if (unlikely(!upload->map)) {
         upload->transfer = NULL;
-         return PIPE_ERROR_OUT_OF_MEMORY;
+         *out_offset = ~0;
+         pipe_resource_reference(outbuf, NULL);
+         *ptr = NULL;
+         return;
      }

      upload->map -= offset;
@@ -229,46 +230,37 @@ enum pipe_error u_upload_alloc( struct u_upload_mgr *upload,

   /* Emit the return values: */
   *ptr = upload->map + offset;
-   pipe_resource_reference( outbuf, upload->buffer );
+   pipe_resource_reference(outbuf, upload->buffer);
   *out_offset = offset;

   upload->offset = offset + alloc_size;
-   return PIPE_OK;
 }

-enum pipe_error u_upload_data( struct u_upload_mgr *upload,
-                               unsigned min_out_offset,
-                               unsigned size,
-                               const void *data,
-                               unsigned *out_offset,
-                               struct pipe_resource **outbuf)
+void u_upload_data(struct u_upload_mgr *upload,
+                   unsigned min_out_offset,
+                   unsigned size,
+                   const void *data,
+                   unsigned *out_offset,
+                   struct pipe_resource **outbuf)
 {
   uint8_t *ptr;
-   enum pipe_error ret = u_upload_alloc(upload, min_out_offset, size,
-                                        out_offset, outbuf,
-                                        (void**)&ptr);
-   if (ret != PIPE_OK)
-      return ret;

-   memcpy(ptr, data, size);
-   return PIPE_OK;
+   u_upload_alloc(upload, min_out_offset, size,
+                  out_offset, outbuf,
+                  (void**)&ptr);
+   if (ptr)
+      memcpy(ptr, data, size);
 }

-
-/* As above, but upload the full contents of a buffer.  Useful for
- * uploading user buffers, avoids generating an explosion of GPU
- * buffers if you have an app that does lots of small vertex buffer
- * renders or DrawElements calls.
- */
-enum pipe_error u_upload_buffer( struct u_upload_mgr *upload,
-                                 unsigned min_out_offset,
-                                 unsigned offset,
-                                 unsigned size,
-                                 struct pipe_resource *inbuf,
-                                 unsigned *out_offset,
-                                 struct pipe_resource **outbuf)
+/* XXX: Remove. It's basically a CPU fallback of resource_copy_region. */
+void u_upload_buffer(struct u_upload_mgr *upload,
+                     unsigned min_out_offset,
+                     unsigned offset,
+                     unsigned size,
+                     struct pipe_resource *inbuf,
+                     unsigned *out_offset,
+                     struct pipe_resource **outbuf)
 {
-   enum pipe_error ret = PIPE_OK;
   struct pipe_transfer *transfer = NULL;
   const char *map = NULL;

@@ -279,20 +271,13 @@ enum pipe_error u_upload_buffer( struct u_upload_mgr *upload,
                                             &transfer);

   if (map == NULL) {
-      return PIPE_ERROR_OUT_OF_MEMORY;
+      pipe_resource_reference(outbuf, NULL);
+      return;
   }

   if (0)
      debug_printf("upload ptr %p ofs %d sz %d\n", map, offset, size);

-   ret = u_upload_data( upload,
-                        min_out_offset,
-                        size,
-                        map,
-                        out_offset,
-                        outbuf);
-
+   u_upload_data(upload, min_out_offset, size, map, out_offset, outbuf);
   pipe_buffer_unmap( upload->pipe, transfer );
-
-   return ret;
 }
--- a/src/gallium/auxiliary/util/u_upload_mgr.h
+++ b/src/gallium/auxiliary/util/u_upload_mgr.h
@@ -78,12 +78,12 @@ void u_upload_unmap( struct u_upload_mgr *upload );
 * \param outbuf           Pointer to where the upload buffer will be returned.
 * \param ptr              Pointer to the allocated memory that is returned.
 */
-enum pipe_error u_upload_alloc( struct u_upload_mgr *upload,
-                                unsigned min_out_offset,
-                                unsigned size,
-                                unsigned *out_offset,
-                                struct pipe_resource **outbuf,
-                                void **ptr );
+void u_upload_alloc(struct u_upload_mgr *upload,
+                    unsigned min_out_offset,
+                    unsigned size,
+                    unsigned *out_offset,
+                    struct pipe_resource **outbuf,
+                    void **ptr);


 /**
@@ -92,12 +92,12 @@ enum pipe_error u_upload_alloc( struct u_upload_mgr *upload,
 * Same as u_upload_alloc, but in addition to that, it copies "data"
 * to the pointer returned from u_upload_alloc.
 */
-enum pipe_error u_upload_data( struct u_upload_mgr *upload,
-                               unsigned min_out_offset,
-                               unsigned size,
-                               const void *data,
-                               unsigned *out_offset,
-                               struct pipe_resource **outbuf);
+void u_upload_data(struct u_upload_mgr *upload,
+                   unsigned min_out_offset,
+                   unsigned size,
+                   const void *data,
+                   unsigned *out_offset,
+                   struct pipe_resource **outbuf);


 /**
@@ -106,13 +106,13 @@ enum pipe_error u_upload_data( struct u_upload_mgr *upload,
 * Same as u_upload_data, except that the input data comes from a buffer
 * instead of a user pointer.
 */
-enum pipe_error u_upload_buffer( struct u_upload_mgr *upload,
-                                 unsigned min_out_offset,
-                                 unsigned offset,
-                                 unsigned size,
-                                 struct pipe_resource *inbuf,
-                                 unsigned *out_offset,
-                                 struct pipe_resource **outbuf);
+void u_upload_buffer(struct u_upload_mgr *upload,
+                     unsigned min_out_offset,
+                     unsigned offset,
+                     unsigned size,
+                     struct pipe_resource *inbuf,
+                     unsigned *out_offset,
+                     struct pipe_resource **outbuf);



--- a/src/gallium/auxiliary/util/u_vbuf.c
+++ b/src/gallium/auxiliary/util/u_vbuf.c
@@ -406,7 +406,6 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,
   struct pipe_resource *out_buffer = NULL;
   uint8_t *out_map;
   unsigned out_offset, mask;
-   enum pipe_error err;

   /* Get a translate object. */
   tr = translate_cache_find(mgr->translate_cache, key);
@@ -454,12 +453,12 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,
      assert((ib->buffer || ib->user_buffer) && ib->index_size);

      /* Create and map the output buffer. */
-      err = u_upload_alloc(mgr->uploader, 0,
-                           key->output_stride * num_indices,
-                           &out_offset, &out_buffer,
-                           (void**)&out_map);
-      if (err != PIPE_OK)
-         return err;
+      u_upload_alloc(mgr->uploader, 0,
+                     key->output_stride * num_indices,
+                     &out_offset, &out_buffer,
+                     (void**)&out_map);
+      if (!out_buffer)
+         return PIPE_ERROR_OUT_OF_MEMORY;

      if (ib->user_buffer) {
         map = (uint8_t*)ib->user_buffer + offset;
@@ -486,13 +485,13 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,
      }
   } else {
      /* Create and map the output buffer. */
-      err = u_upload_alloc(mgr->uploader,
-                           key->output_stride * start_vertex,
-                           key->output_stride * num_vertices,
-                           &out_offset, &out_buffer,
-                           (void**)&out_map);
-      if (err != PIPE_OK)
-         return err;
+      u_upload_alloc(mgr->uploader,
+                     key->output_stride * start_vertex,
+                     key->output_stride * num_vertices,
+                     &out_offset, &out_buffer,
+                     (void**)&out_map);
+      if (!out_buffer)
+         return PIPE_ERROR_OUT_OF_MEMORY;

      out_offset -= key->output_stride * start_vertex;

@@ -977,7 +976,6 @@ u_vbuf_upload_buffers(struct u_vbuf *mgr,
      unsigned start, end;
      struct pipe_vertex_buffer *real_vb;
      const uint8_t *ptr;
-      enum pipe_error err;

      i = u_bit_scan(&buffer_mask);

@@ -988,10 +986,10 @@ u_vbuf_upload_buffers(struct u_vbuf *mgr,
      real_vb = &mgr->real_vertex_buffer[i];
      ptr = mgr->vertex_buffer[i].user_buffer;

-      err = u_upload_data(mgr->uploader, start, end - start, ptr + start,
-                          &real_vb->buffer_offset, &real_vb->buffer);
-      if (err != PIPE_OK)
-         return err;
+      u_upload_data(mgr->uploader, start, end - start, ptr + start,
+                    &real_vb->buffer_offset, &real_vb->buffer);
+      if (!real_vb->buffer)
+         return PIPE_ERROR_OUT_OF_MEMORY;

      real_vb->buffer_offset -= start;
   }
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -1120,7 +1120,7 @@ vl_create_mpeg12_decoder(struct pipe_context *context,

   dec->base = *templat;
   dec->base.context = context;
-   dec->context = context->screen->context_create(context->screen, NULL);
+   dec->context = context->screen->context_create(context->screen, NULL, 0);

   dec->base.destroy = vl_mpeg12_destroy;
   dec->base.begin_frame = vl_mpeg12_begin_frame;
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -267,6 +267,16 @@ The integer capabilities:
 * ``PIPE_CAP_DEPTH_BOUNDS_TEST``: Whether bounds_test, bounds_min, and
  bounds_max states of pipe_depth_stencil_alpha_state behave according
  to the GL_EXT_depth_bounds_test specification.
+* ``PIPE_CAP_TGSI_TXQS``: Whether the `TXQS` opcode is supported
+* ``PIPE_CAP_FORCE_PERSAMPLE_INTERP``: If the driver can force per-sample
+  interpolation for all fragment shader inputs if
+  pipe_rasterizer_state::force_persample_interp is set. This is only used
+  by GL3-level sample shading (ARB_sample_shading). GL4-level sample shading
+  (ARB_gpu_shader5) doesn't use this. While GL3 hardware has a state for it,
+  GL4 hardware will likely need to emulate it with a shader variant, or by
+  selecting the interpolation weights with a conditional assignment
+  in the shader.
+


 .. _pipe_capf:
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -960,7 +960,6 @@ XXX doesn't look like most of the opcodes really belong here.
  For components which don't return a resource dimension, their value
  is undefined.

-
 .. math::

  lod = src0.x
@@ -973,6 +972,17 @@ XXX doesn't look like most of the opcodes really belong here.

  dst.w = texture\_levels(unit)

+
+.. opcode:: TXQS - Texture Samples Query
+
+  This retrieves the number of samples in the texture, and stores it
+  into the x component. The other components are undefined.
+
+.. math::
+
+  dst.x = texture\_samples(unit)
+
+
 .. opcode:: TG4 - Texture Gather

  As per ARB_texture_gather, gathers the four texels to be used in a bi-linear
--- a/src/gallium/drivers/ddebug/Makefile.am
+++ b/src/gallium/drivers/ddebug/Makefile.am
@@ -0,0 +1,9 @@
+include Makefile.sources
+include $(top_srcdir)/src/gallium/Automake.inc
+
+AM_CFLAGS = \
+	$(GALLIUM_DRIVER_CFLAGS)
+
+noinst_LTLIBRARIES = libddebug.la
+
+libddebug_la_SOURCES = $(C_SOURCES)
--- a/src/gallium/drivers/ddebug/Makefile.sources
+++ b/src/gallium/drivers/ddebug/Makefile.sources
@@ -0,0 +1,6 @@
+C_SOURCES := \
+	dd_pipe.h \
+	dd_public.h \
+	dd_context.c \
+	dd_draw.c \
+	dd_screen.c
--- a/src/gallium/drivers/ddebug/dd_context.c
+++ b/src/gallium/drivers/ddebug/dd_context.c
@@ -0,0 +1,771 @@
+/**************************************************************************
+ *
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ * Copyright 2008 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "dd_pipe.h"
+#include "tgsi/tgsi_parse.h"
+#include "util/u_memory.h"
+
+
+static void
+safe_memcpy(void *dst, const void *src, size_t size)
+{
+   if (src)
+      memcpy(dst, src, size);
+   else
+      memset(dst, 0, size);
+}
+
+
+/********************************************************************
+ * queries
+ */
+
+static struct dd_query *
+dd_query(struct pipe_query *query)
+{
+   return (struct dd_query *)query;
+}
+
+static struct pipe_query *
+dd_query_unwrap(struct pipe_query *query)
+{
+   if (query) {
+      return dd_query(query)->query;
+   } else {
+      return NULL;
+   }
+}
+
+static struct pipe_query *
+dd_context_create_query(struct pipe_context *_pipe, unsigned query_type,
+                        unsigned index)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+   struct pipe_query *query;
+
+   query = pipe->create_query(pipe, query_type, index);
+
+   /* Wrap query object. */
+   if (query) {
+      struct dd_query *dd_query = CALLOC_STRUCT(dd_query);
+      if (dd_query) {
+         dd_query->type = query_type;
+         dd_query->query = query;
+         query = (struct pipe_query *)dd_query;
+      } else {
+         pipe->destroy_query(pipe, query);
+         query = NULL;
+      }
+   }
+
+   return query;
+}
+
+static void
+dd_context_destroy_query(struct pipe_context *_pipe,
+                         struct pipe_query *query)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+
+   pipe->destroy_query(pipe, dd_query_unwrap(query));
+   FREE(query);
+}
+
+static boolean
+dd_context_begin_query(struct pipe_context *_pipe, struct pipe_query *query)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+
+   return pipe->begin_query(pipe, dd_query_unwrap(query));
+}
+
+static void
+dd_context_end_query(struct pipe_context *_pipe, struct pipe_query *query)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+
+   pipe->end_query(pipe, dd_query_unwrap(query));
+}
+
+static boolean
+dd_context_get_query_result(struct pipe_context *_pipe,
+                            struct pipe_query *query, boolean wait,
+                            union pipe_query_result *result)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+
+   return pipe->get_query_result(pipe, dd_query_unwrap(query), wait, result);
+}
+
+static void
+dd_context_render_condition(struct pipe_context *_pipe,
+                            struct pipe_query *query, boolean condition,
+                            uint mode)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+
+   pipe->render_condition(pipe, dd_query_unwrap(query), condition, mode);
+   dctx->render_cond.query = dd_query(query);
+   dctx->render_cond.condition = condition;
+   dctx->render_cond.mode = mode;
+}
+
+
+/********************************************************************
+ * constant (immutable) non-shader states
+ */
+
+#define DD_CSO_CREATE(name, shortname) \
+   static void * \
+   dd_context_create_##name##_state(struct pipe_context *_pipe, \
+                                    const struct pipe_##name##_state *state) \
+   { \
+      struct pipe_context *pipe = dd_context(_pipe)->pipe; \
+      struct dd_state *hstate = CALLOC_STRUCT(dd_state); \
+ \
+      if (!hstate) \
+         return NULL; \
+      hstate->cso = pipe->create_##name##_state(pipe, state); \
+      hstate->state.shortname = *state; \
+      return hstate; \
+   }
+
+#define DD_CSO_BIND(name, shortname) \
+   static void \
+   dd_context_bind_##name##_state(struct pipe_context *_pipe, void *state) \
+   { \
+      struct dd_context *dctx = dd_context(_pipe); \
+      struct pipe_context *pipe = dctx->pipe; \
+      struct dd_state *hstate = state; \
+ \
+      dctx->shortname = hstate; \
+      pipe->bind_##name##_state(pipe, hstate ? hstate->cso : NULL); \
+   }
+
+#define DD_CSO_DELETE(name) \
+   static void \
+   dd_context_delete_##name##_state(struct pipe_context *_pipe, void *state) \
+   { \
+      struct dd_context *dctx = dd_context(_pipe); \
+      struct pipe_context *pipe = dctx->pipe; \
+      struct dd_state *hstate = state; \
+ \
+      pipe->delete_##name##_state(pipe, hstate->cso); \
+      FREE(hstate); \
+   }
+
+#define DD_CSO_WHOLE(name, shortname) \
+   DD_CSO_CREATE(name, shortname) \
+   DD_CSO_BIND(name, shortname) \
+   DD_CSO_DELETE(name)
+
+DD_CSO_WHOLE(blend, blend)
+DD_CSO_WHOLE(rasterizer, rs)
+DD_CSO_WHOLE(depth_stencil_alpha, dsa)
+
+DD_CSO_CREATE(sampler, sampler)
+DD_CSO_DELETE(sampler)
+
+static void
+dd_context_bind_sampler_states(struct pipe_context *_pipe, unsigned shader,
+                               unsigned start, unsigned count, void **states)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+
+   memcpy(&dctx->sampler_states[shader][start], states,
+          sizeof(void*) * count);
+
+   if (states) {
+      void *samp[PIPE_MAX_SAMPLERS];
+      int i;
+
+      for (i = 0; i < count; i++) {
+         struct dd_state *s = states[i];
+         samp[i] = s ? s->cso : NULL;
+      }
+
+      pipe->bind_sampler_states(pipe, shader, start, count, samp);
+   }
+   else
+      pipe->bind_sampler_states(pipe, shader, start, count, NULL);
+}
+
+static void *
+dd_context_create_vertex_elements_state(struct pipe_context *_pipe,
+                                        unsigned num_elems,
+                                        const struct pipe_vertex_element *elems)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+   struct dd_state *hstate = CALLOC_STRUCT(dd_state);
+
+   if (!hstate)
+      return NULL;
+   hstate->cso = pipe->create_vertex_elements_state(pipe, num_elems, elems);
+   memcpy(hstate->state.velems.velems, elems, sizeof(elems[0]) * num_elems);
+   hstate->state.velems.count = num_elems;
+   return hstate;
+}
+
+DD_CSO_BIND(vertex_elements, velems)
+DD_CSO_DELETE(vertex_elements)
+
+
+/********************************************************************
+ * shaders
+ */
+
+#define DD_SHADER(NAME, name) \
+   static void * \
+   dd_context_create_##name##_state(struct pipe_context *_pipe, \
+                                    const struct pipe_shader_state *state) \
+   { \
+      struct pipe_context *pipe = dd_context(_pipe)->pipe; \
+      struct dd_state *hstate = CALLOC_STRUCT(dd_state); \
+ \
+      if (!hstate) \
+         return NULL; \
+      hstate->cso = pipe->create_##name##_state(pipe, state); \
+      hstate->state.shader = *state; \
+      hstate->state.shader.tokens = tgsi_dup_tokens(state->tokens); \
+      return hstate; \
+   } \
+    \
+   static void \
+   dd_context_bind_##name##_state(struct pipe_context *_pipe, void *state) \
+   { \
+      struct dd_context *dctx = dd_context(_pipe); \
+      struct pipe_context *pipe = dctx->pipe; \
+      struct dd_state *hstate = state; \
+   \
+      dctx->shaders[PIPE_SHADER_##NAME] = hstate; \
+      pipe->bind_##name##_state(pipe, hstate ? hstate->cso : NULL); \
+   } \
+    \
+   static void \
+   dd_context_delete_##name##_state(struct pipe_context *_pipe, void *state) \
+   { \
+      struct dd_context *dctx = dd_context(_pipe); \
+      struct pipe_context *pipe = dctx->pipe; \
+      struct dd_state *hstate = state; \
+   \
+      pipe->delete_##name##_state(pipe, hstate->cso); \
+      tgsi_free_tokens(hstate->state.shader.tokens); \
+      FREE(hstate); \
+   }
+
+DD_SHADER(FRAGMENT, fs)
+DD_SHADER(VERTEX, vs)
+DD_SHADER(GEOMETRY, gs)
+DD_SHADER(TESS_CTRL, tcs)
+DD_SHADER(TESS_EVAL, tes)
+
+
+/********************************************************************
+ * immediate states
+ */
+
+#define DD_IMM_STATE(name, type, deref, ref) \
+   static void \
+   dd_context_set_##name(struct pipe_context *_pipe, type deref) \
+   { \
+      struct dd_context *dctx = dd_context(_pipe); \
+      struct pipe_context *pipe = dctx->pipe; \
+ \
+      dctx->name = deref; \
+      pipe->set_##name(pipe, ref); \
+   }
+
+DD_IMM_STATE(blend_color, const struct pipe_blend_color, *state, state)
+DD_IMM_STATE(stencil_ref, const struct pipe_stencil_ref, *state, state)
+DD_IMM_STATE(clip_state, const struct pipe_clip_state, *state, state)
+DD_IMM_STATE(sample_mask, unsigned, sample_mask, sample_mask)
+DD_IMM_STATE(min_samples, unsigned, min_samples, min_samples)
+DD_IMM_STATE(framebuffer_state, const struct pipe_framebuffer_state, *state, state)
+DD_IMM_STATE(polygon_stipple, const struct pipe_poly_stipple, *state, state)
+
+static void
+dd_context_set_constant_buffer(struct pipe_context *_pipe,
+                               uint shader, uint index,
+                               struct pipe_constant_buffer *constant_buffer)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+
+   safe_memcpy(&dctx->constant_buffers[shader][index], constant_buffer,
+          sizeof(*constant_buffer));
+   pipe->set_constant_buffer(pipe, shader, index, constant_buffer);
+}
+
+static void
+dd_context_set_scissor_states(struct pipe_context *_pipe,
+                              unsigned start_slot, unsigned num_scissors,
+                              const struct pipe_scissor_state *states)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+
+   safe_memcpy(&dctx->scissors[start_slot], states,
+               sizeof(*states) * num_scissors);
+   pipe->set_scissor_states(pipe, start_slot, num_scissors, states);
+}
+
+static void
+dd_context_set_viewport_states(struct pipe_context *_pipe,
+                               unsigned start_slot, unsigned num_viewports,
+                               const struct pipe_viewport_state *states)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+
+   safe_memcpy(&dctx->viewports[start_slot], states,
+               sizeof(*states) * num_viewports);
+   pipe->set_viewport_states(pipe, start_slot, num_viewports, states);
+}
+
+static void dd_context_set_tess_state(struct pipe_context *_pipe,
+                                      const float default_outer_level[4],
+                                      const float default_inner_level[2])
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+
+   memcpy(dctx->tess_default_levels, default_outer_level, sizeof(float) * 4);
+   memcpy(dctx->tess_default_levels+4, default_inner_level, sizeof(float) * 2);
+   pipe->set_tess_state(pipe, default_outer_level, default_inner_level);
+}
+
+
+/********************************************************************
+ * views
+ */
+
+static struct pipe_surface *
+dd_context_create_surface(struct pipe_context *_pipe,
+                          struct pipe_resource *resource,
+                          const struct pipe_surface *surf_tmpl)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+   struct pipe_surface *view =
+      pipe->create_surface(pipe, resource, surf_tmpl);
+
+   if (!view)
+      return NULL;
+   view->context = _pipe;
+   return view;
+}
+
+static void
+dd_context_surface_destroy(struct pipe_context *_pipe,
+                           struct pipe_surface *surf)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+
+   pipe->surface_destroy(pipe, surf);
+}
+
+static struct pipe_sampler_view *
+dd_context_create_sampler_view(struct pipe_context *_pipe,
+                               struct pipe_resource *resource,
+                               const struct pipe_sampler_view *templ)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+   struct pipe_sampler_view *view =
+      pipe->create_sampler_view(pipe, resource, templ);
+
+   if (!view)
+      return NULL;
+   view->context = _pipe;
+   return view;
+}
+
+static void
+dd_context_sampler_view_destroy(struct pipe_context *_pipe,
+                                struct pipe_sampler_view *view)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+
+   pipe->sampler_view_destroy(pipe, view);
+}
+
+static struct pipe_image_view *
+dd_context_create_image_view(struct pipe_context *_pipe,
+                             struct pipe_resource *resource,
+                             const struct pipe_image_view *templ)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+   struct pipe_image_view *view =
+      pipe->create_image_view(pipe, resource, templ);
+
+   if (!view)
+      return NULL;
+   view->context = _pipe;
+   return view;
+}
+
+static void
+dd_context_image_view_destroy(struct pipe_context *_pipe,
+                              struct pipe_image_view *view)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+
+   pipe->image_view_destroy(pipe, view);
+}
+
+static struct pipe_stream_output_target *
+dd_context_create_stream_output_target(struct pipe_context *_pipe,
+                                       struct pipe_resource *res,
+                                       unsigned buffer_offset,
+                                       unsigned buffer_size)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+   struct pipe_stream_output_target *view =
+      pipe->create_stream_output_target(pipe, res, buffer_offset,
+                                        buffer_size);
+
+   if (!view)
+      return NULL;
+   view->context = _pipe;
+   return view;
+}
+
+static void
+dd_context_stream_output_target_destroy(struct pipe_context *_pipe,
+                                        struct pipe_stream_output_target *target)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+
+   pipe->stream_output_target_destroy(pipe, target);
+}
+
+
+/********************************************************************
+ * set states
+ */
+
+static void
+dd_context_set_sampler_views(struct pipe_context *_pipe, unsigned shader,
+                             unsigned start, unsigned num,
+                             struct pipe_sampler_view **views)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+
+   safe_memcpy(&dctx->sampler_views[shader][start], views,
+               sizeof(views[0]) * num);
+   pipe->set_sampler_views(pipe, shader, start, num, views);
+}
+
+static void
+dd_context_set_shader_images(struct pipe_context *_pipe, unsigned shader,
+                             unsigned start, unsigned num,
+                             struct pipe_image_view **views)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+
+   safe_memcpy(&dctx->shader_images[shader][start], views,
+               sizeof(views[0]) * num);
+   pipe->set_shader_images(pipe, shader, start, num, views);
+}
+
+static void
+dd_context_set_shader_buffers(struct pipe_context *_pipe, unsigned shader,
+                              unsigned start, unsigned num_buffers,
+                              struct pipe_shader_buffer *buffers)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+
+   safe_memcpy(&dctx->shader_buffers[shader][start], buffers,
+               sizeof(buffers[0]) * num_buffers);
+   pipe->set_shader_buffers(pipe, shader, start, num_buffers, buffers);
+}
+
+static void
+dd_context_set_vertex_buffers(struct pipe_context *_pipe,
+                              unsigned start, unsigned num_buffers,
+                              const struct pipe_vertex_buffer *buffers)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+
+   safe_memcpy(&dctx->vertex_buffers[start], buffers,
+               sizeof(buffers[0]) * num_buffers);
+   pipe->set_vertex_buffers(pipe, start, num_buffers, buffers);
+}
+
+static void
+dd_context_set_index_buffer(struct pipe_context *_pipe,
+                            const struct pipe_index_buffer *ib)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+
+   safe_memcpy(&dctx->index_buffer, ib, sizeof(*ib));
+   pipe->set_index_buffer(pipe, ib);
+}
+
+static void
+dd_context_set_stream_output_targets(struct pipe_context *_pipe,
+                                     unsigned num_targets,
+                                     struct pipe_stream_output_target **tgs,
+                                     const unsigned *offsets)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+
+   dctx->num_so_targets = num_targets;
+   safe_memcpy(dctx->so_targets, tgs, sizeof(*tgs) * num_targets);
+   safe_memcpy(dctx->so_offsets, offsets, sizeof(*offsets) * num_targets);
+   pipe->set_stream_output_targets(pipe, num_targets, tgs, offsets);
+}
+
+static void
+dd_context_destroy(struct pipe_context *_pipe)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+
+   pipe->destroy(pipe);
+   FREE(dctx);
+}
+
+
+/********************************************************************
+ * transfer
+ */
+
+static void *
+dd_context_transfer_map(struct pipe_context *_pipe,
+                        struct pipe_resource *resource, unsigned level,
+                        unsigned usage, const struct pipe_box *box,
+                        struct pipe_transfer **transfer)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+
+   return pipe->transfer_map(pipe, resource, level, usage, box, transfer);
+}
+
+static void
+dd_context_transfer_flush_region(struct pipe_context *_pipe,
+                                 struct pipe_transfer *transfer,
+                                 const struct pipe_box *box)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+
+   pipe->transfer_flush_region(pipe, transfer, box);
+}
+
+static void
+dd_context_transfer_unmap(struct pipe_context *_pipe,
+                          struct pipe_transfer *transfer)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+
+   pipe->transfer_unmap(pipe, transfer);
+}
+
+static void
+dd_context_transfer_inline_write(struct pipe_context *_pipe,
+                                 struct pipe_resource *resource,
+                                 unsigned level, unsigned usage,
+                                 const struct pipe_box *box,
+                                 const void *data, unsigned stride,
+                                 unsigned layer_stride)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+
+   pipe->transfer_inline_write(pipe, resource, level, usage, box, data,
+                               stride, layer_stride);
+}
+
+
+/********************************************************************
+ * miscellaneous
+ */
+
+static void
+dd_context_texture_barrier(struct pipe_context *_pipe)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+
+   pipe->texture_barrier(pipe);
+}
+
+static void
+dd_context_memory_barrier(struct pipe_context *_pipe, unsigned flags)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+
+   pipe->memory_barrier(pipe, flags);
+}
+
+static void
+dd_context_get_sample_position(struct pipe_context *_pipe,
+                               unsigned sample_count, unsigned sample_index,
+                               float *out_value)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+
+   return pipe->get_sample_position(pipe, sample_count, sample_index,
+                                    out_value);
+}
+
+static void
+dd_context_invalidate_resource(struct pipe_context *_pipe,
+                               struct pipe_resource *resource)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+
+   pipe->invalidate_resource(pipe, resource);
+}
+
+static enum pipe_reset_status
+dd_context_get_device_reset_status(struct pipe_context *_pipe)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+
+   return pipe->get_device_reset_status(pipe);
+}
+
+static void
+dd_context_dump_debug_state(struct pipe_context *_pipe, FILE *stream,
+                            unsigned flags)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+
+   return pipe->dump_debug_state(pipe, stream, flags);
+}
+
+struct pipe_context *
+dd_context_create(struct dd_screen *dscreen, struct pipe_context *pipe)
+{
+   struct dd_context *dctx;
+
+   if (!pipe)
+      return NULL;
+
+   dctx = CALLOC_STRUCT(dd_context);
+   if (!dctx) {
+      pipe->destroy(pipe);
+      return NULL;
+   }
+
+   dctx->pipe = pipe;
+   dctx->base.priv = pipe->priv; /* expose wrapped priv data */
+   dctx->base.screen = &dscreen->base;
+
+   dctx->base.destroy = dd_context_destroy;
+
+   CTX_INIT(render_condition);
+   CTX_INIT(create_query);
+   CTX_INIT(destroy_query);
+   CTX_INIT(begin_query);
+   CTX_INIT(end_query);
+   CTX_INIT(get_query_result);
+   CTX_INIT(create_blend_state);
+   CTX_INIT(bind_blend_state);
+   CTX_INIT(delete_blend_state);
+   CTX_INIT(create_sampler_state);
+   CTX_INIT(bind_sampler_states);
+   CTX_INIT(delete_sampler_state);
+   CTX_INIT(create_rasterizer_state);
+   CTX_INIT(bind_rasterizer_state);
+   CTX_INIT(delete_rasterizer_state);
+   CTX_INIT(create_depth_stencil_alpha_state);
+   CTX_INIT(bind_depth_stencil_alpha_state);
+   CTX_INIT(delete_depth_stencil_alpha_state);
+   CTX_INIT(create_fs_state);
+   CTX_INIT(bind_fs_state);
+   CTX_INIT(delete_fs_state);
+   CTX_INIT(create_vs_state);
+   CTX_INIT(bind_vs_state);
+   CTX_INIT(delete_vs_state);
+   CTX_INIT(create_gs_state);
+   CTX_INIT(bind_gs_state);
+   CTX_INIT(delete_gs_state);
+   CTX_INIT(create_tcs_state);
+   CTX_INIT(bind_tcs_state);
+   CTX_INIT(delete_tcs_state);
+   CTX_INIT(create_tes_state);
+   CTX_INIT(bind_tes_state);
+   CTX_INIT(delete_tes_state);
+   CTX_INIT(create_vertex_elements_state);
+   CTX_INIT(bind_vertex_elements_state);
+   CTX_INIT(delete_vertex_elements_state);
+   CTX_INIT(set_blend_color);
+   CTX_INIT(set_stencil_ref);
+   CTX_INIT(set_sample_mask);
+   CTX_INIT(set_min_samples);
+   CTX_INIT(set_clip_state);
+   CTX_INIT(set_constant_buffer);
+   CTX_INIT(set_framebuffer_state);
+   CTX_INIT(set_polygon_stipple);
+   CTX_INIT(set_scissor_states);
+   CTX_INIT(set_viewport_states);
+   CTX_INIT(set_sampler_views);
+   CTX_INIT(set_tess_state);
+   CTX_INIT(set_shader_buffers);
+   CTX_INIT(set_shader_images);
+   CTX_INIT(set_vertex_buffers);
+   CTX_INIT(set_index_buffer);
+   CTX_INIT(create_stream_output_target);
+   CTX_INIT(stream_output_target_destroy);
+   CTX_INIT(set_stream_output_targets);
+   CTX_INIT(create_sampler_view);
+   CTX_INIT(sampler_view_destroy);
+   CTX_INIT(create_surface);
+   CTX_INIT(surface_destroy);
+   CTX_INIT(create_image_view);
+   CTX_INIT(image_view_destroy);
+   CTX_INIT(transfer_map);
+   CTX_INIT(transfer_flush_region);
+   CTX_INIT(transfer_unmap);
+   CTX_INIT(transfer_inline_write);
+   CTX_INIT(texture_barrier);
+   CTX_INIT(memory_barrier);
+   /* create_video_codec */
+   /* create_video_buffer */
+   /* create_compute_state */
+   /* bind_compute_state */
+   /* delete_compute_state */
+   /* set_compute_resources */
+   /* set_global_binding */
+   CTX_INIT(get_sample_position);
+   CTX_INIT(invalidate_resource);
+   CTX_INIT(get_device_reset_status);
+   CTX_INIT(dump_debug_state);
+
+   dd_init_draw_functions(dctx);
+
+   dctx->sample_mask = ~0;
+   return &dctx->base;
+}
--- a/src/gallium/drivers/ddebug/dd_draw.c
+++ b/src/gallium/drivers/ddebug/dd_draw.c
@@ -0,0 +1,784 @@
+/**************************************************************************
+ *
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ * Copyright 2008 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "dd_pipe.h"
+
+#include "util/u_dump.h"
+#include "util/u_format.h"
+#include "tgsi/tgsi_scan.h"
+
+
+enum call_type
+{
+   CALL_DRAW_VBO,
+   CALL_RESOURCE_COPY_REGION,
+   CALL_BLIT,
+   CALL_FLUSH_RESOURCE,
+   CALL_CLEAR,
+   CALL_CLEAR_BUFFER,
+   CALL_CLEAR_RENDER_TARGET,
+   CALL_CLEAR_DEPTH_STENCIL,
+};
+
+struct call_resource_copy_region
+{
+   struct pipe_resource *dst;
+   unsigned dst_level;
+   unsigned dstx, dsty, dstz;
+   struct pipe_resource *src;
+   unsigned src_level;
+   const struct pipe_box *src_box;
+};
+
+struct call_clear
+{
+   unsigned buffers;
+   const union pipe_color_union *color;
+   double depth;
+   unsigned stencil;
+};
+
+struct call_clear_buffer
+{
+   struct pipe_resource *res;
+   unsigned offset;
+   unsigned size;
+   const void *clear_value;
+   int clear_value_size;
+};
+
+struct dd_call
+{
+   enum call_type type;
+
+   union {
+      struct pipe_draw_info draw_vbo;
+      struct call_resource_copy_region resource_copy_region;
+      struct pipe_blit_info blit;
+      struct pipe_resource *flush_resource;
+      struct call_clear clear;
+      struct call_clear_buffer clear_buffer;
+   } info;
+};
+
+static FILE *
+dd_get_file_stream(struct dd_context *dctx)
+{
+   struct pipe_screen *screen = dctx->pipe->screen;
+   FILE *f = dd_get_debug_file();
+   if (!f)
+      return NULL;
+
+   fprintf(f, "Driver vendor: %s\n", screen->get_vendor(screen));
+   fprintf(f, "Device vendor: %s\n", screen->get_device_vendor(screen));
+   fprintf(f, "Device name: %s\n\n", screen->get_name(screen));
+   return f;
+}
+
+static void
+dd_close_file_stream(FILE *f)
+{
+   fclose(f);
+}
+
+static unsigned
+dd_num_active_viewports(struct dd_context *dctx)
+{
+   struct tgsi_shader_info info;
+   const struct tgsi_token *tokens;
+
+   if (dctx->shaders[PIPE_SHADER_GEOMETRY])
+      tokens = dctx->shaders[PIPE_SHADER_GEOMETRY]->state.shader.tokens;
+   else if (dctx->shaders[PIPE_SHADER_TESS_EVAL])
+      tokens = dctx->shaders[PIPE_SHADER_TESS_EVAL]->state.shader.tokens;
+   else if (dctx->shaders[PIPE_SHADER_VERTEX])
+      tokens = dctx->shaders[PIPE_SHADER_VERTEX]->state.shader.tokens;
+   else
+      return 1;
+
+   tgsi_scan_shader(tokens, &info);
+   return info.writes_viewport_index ? PIPE_MAX_VIEWPORTS : 1;
+}
+
+#define COLOR_RESET	"\033[0m"
+#define COLOR_SHADER	"\033[1;32m"
+#define COLOR_STATE	"\033[1;33m"
+
+#define DUMP(name, var) do { \
+   fprintf(f, COLOR_STATE #name ": " COLOR_RESET); \
+   util_dump_##name(f, var); \
+   fprintf(f, "\n"); \
+} while(0)
+
+#define DUMP_I(name, var, i) do { \
+   fprintf(f, COLOR_STATE #name " %i: " COLOR_RESET, i); \
+   util_dump_##name(f, var); \
+   fprintf(f, "\n"); \
+} while(0)
+
+#define DUMP_M(name, var, member) do { \
+   fprintf(f, "  " #member ": "); \
+   util_dump_##name(f, (var)->member); \
+   fprintf(f, "\n"); \
+} while(0)
+
+#define DUMP_M_ADDR(name, var, member) do { \
+   fprintf(f, "  " #member ": "); \
+   util_dump_##name(f, &(var)->member); \
+   fprintf(f, "\n"); \
+} while(0)
+
+static void
+print_named_value(FILE *f, const char *name, int value)
+{
+   fprintf(f, COLOR_STATE "%s" COLOR_RESET " = %i\n", name, value);
+}
+
+static void
+print_named_xvalue(FILE *f, const char *name, int value)
+{
+   fprintf(f, COLOR_STATE "%s" COLOR_RESET " = 0x%08x\n", name, value);
+}
+
+static void
+util_dump_uint(FILE *f, unsigned i)
+{
+   fprintf(f, "%u", i);
+}
+
+static void
+util_dump_hex(FILE *f, unsigned i)
+{
+   fprintf(f, "0x%x", i);
+}
+
+static void
+util_dump_double(FILE *f, double d)
+{
+   fprintf(f, "%f", d);
+}
+
+static void
+util_dump_format(FILE *f, enum pipe_format format)
+{
+   fprintf(f, "%s", util_format_name(format));
+}
+
+static void
+util_dump_color_union(FILE *f, const union pipe_color_union *color)
+{
+   fprintf(f, "{f = {%f, %f, %f, %f}, ui = {%u, %u, %u, %u}",
+           color->f[0], color->f[1], color->f[2], color->f[3],
+           color->ui[0], color->ui[1], color->ui[2], color->ui[3]);
+}
+
+static void
+util_dump_query(FILE *f, struct dd_query *query)
+{
+   if (query->type >= PIPE_QUERY_DRIVER_SPECIFIC)
+      fprintf(f, "PIPE_QUERY_DRIVER_SPECIFIC + %i",
+              query->type - PIPE_QUERY_DRIVER_SPECIFIC);
+   else
+      fprintf(f, "%s", util_dump_query_type(query->type, false));
+}
+
+static void
+dd_dump_render_condition(struct dd_context *dctx, FILE *f)
+{
+   if (dctx->render_cond.query) {
+      fprintf(f, "render condition:\n");
+      DUMP_M(query, &dctx->render_cond, query);
+      DUMP_M(uint, &dctx->render_cond, condition);
+      DUMP_M(uint, &dctx->render_cond, mode);
+      fprintf(f, "\n");
+   }
+}
+
+static void
+dd_dump_draw_vbo(struct dd_context *dctx, struct pipe_draw_info *info, FILE *f)
+{
+   int sh, i;
+   const char *shader_str[PIPE_SHADER_TYPES];
+
+   shader_str[PIPE_SHADER_VERTEX] = "VERTEX";
+   shader_str[PIPE_SHADER_TESS_CTRL] = "TESS_CTRL";
+   shader_str[PIPE_SHADER_TESS_EVAL] = "TESS_EVAL";
+   shader_str[PIPE_SHADER_GEOMETRY] = "GEOMETRY";
+   shader_str[PIPE_SHADER_FRAGMENT] = "FRAGMENT";
+   shader_str[PIPE_SHADER_COMPUTE] = "COMPUTE";
+
+   DUMP(draw_info, info);
+   if (info->indexed) {
+      DUMP(index_buffer, &dctx->index_buffer);
+      if (dctx->index_buffer.buffer)
+         DUMP_M(resource, &dctx->index_buffer, buffer);
+   }
+   if (info->count_from_stream_output)
+      DUMP_M(stream_output_target, info,
+             count_from_stream_output);
+   if (info->indirect)
+      DUMP_M(resource, info, indirect);
+   fprintf(f, "\n");
+
+   /* TODO: dump active queries */
+
+   dd_dump_render_condition(dctx, f);
+
+   for (i = 0; i < PIPE_MAX_ATTRIBS; i++)
+      if (dctx->vertex_buffers[i].buffer ||
+          dctx->vertex_buffers[i].user_buffer) {
+         DUMP_I(vertex_buffer, &dctx->vertex_buffers[i], i);
+         if (dctx->vertex_buffers[i].buffer)
+            DUMP_M(resource, &dctx->vertex_buffers[i], buffer);
+      }
+
+   if (dctx->velems) {
+      print_named_value(f, "num vertex elements",
+                        dctx->velems->state.velems.count);
+      for (i = 0; i < dctx->velems->state.velems.count; i++) {
+         fprintf(f, "  ");
+         DUMP_I(vertex_element, &dctx->velems->state.velems.velems[i], i);
+      }
+   }
+
+   print_named_value(f, "num stream output targets", dctx->num_so_targets);
+   for (i = 0; i < dctx->num_so_targets; i++)
+      if (dctx->so_targets[i]) {
+         DUMP_I(stream_output_target, dctx->so_targets[i], i);
+         DUMP_M(resource, dctx->so_targets[i], buffer);
+         fprintf(f, "  offset = %i\n", dctx->so_offsets[i]);
+      }
+
+   fprintf(f, "\n");
+   for (sh = 0; sh < PIPE_SHADER_TYPES; sh++) {
+      if (sh == PIPE_SHADER_COMPUTE)
+         continue;
+
+      if (sh == PIPE_SHADER_TESS_CTRL &&
+          !dctx->shaders[PIPE_SHADER_TESS_CTRL] &&
+          dctx->shaders[PIPE_SHADER_TESS_EVAL])
+         fprintf(f, "tess_state: {default_outer_level = {%f, %f, %f, %f}, "
+                 "default_inner_level = {%f, %f}}\n",
+                 dctx->tess_default_levels[0],
+                 dctx->tess_default_levels[1],
+                 dctx->tess_default_levels[2],
+                 dctx->tess_default_levels[3],
+                 dctx->tess_default_levels[4],
+                 dctx->tess_default_levels[5]);
+
+      if (sh == PIPE_SHADER_FRAGMENT)
+         if (dctx->rs) {
+            unsigned num_viewports = dd_num_active_viewports(dctx);
+
+            if (dctx->rs->state.rs.clip_plane_enable)
+               DUMP(clip_state, &dctx->clip_state);
+
+            for (i = 0; i < num_viewports; i++)
+               DUMP_I(viewport_state, &dctx->viewports[i], i);
+
+            if (dctx->rs->state.rs.scissor)
+               for (i = 0; i < num_viewports; i++)
+                  DUMP_I(scissor_state, &dctx->scissors[i], i);
+
+            DUMP(rasterizer_state, &dctx->rs->state.rs);
+
+            if (dctx->rs->state.rs.poly_stipple_enable)
+               DUMP(poly_stipple, &dctx->polygon_stipple);
+            fprintf(f, "\n");
+         }
+
+      if (!dctx->shaders[sh])
+         continue;
+
+      fprintf(f, COLOR_SHADER "begin shader: %s" COLOR_RESET "\n", shader_str[sh]);
+      DUMP(shader_state, &dctx->shaders[sh]->state.shader);
+
+      for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++)
+         if (dctx->constant_buffers[sh][i].buffer ||
+             dctx->constant_buffers[sh][i].user_buffer) {
+            DUMP_I(constant_buffer, &dctx->constant_buffers[sh][i], i);
+            if (dctx->constant_buffers[sh][i].buffer)
+               DUMP_M(resource, &dctx->constant_buffers[sh][i], buffer);
+         }
+
+      for (i = 0; i < PIPE_MAX_SAMPLERS; i++)
+         if (dctx->sampler_states[sh][i])
+            DUMP_I(sampler_state, &dctx->sampler_states[sh][i]->state.sampler, i);
+
+      for (i = 0; i < PIPE_MAX_SAMPLERS; i++)
+         if (dctx->sampler_views[sh][i]) {
+            DUMP_I(sampler_view, dctx->sampler_views[sh][i], i);
+            DUMP_M(resource, dctx->sampler_views[sh][i], texture);
+         }
+
+      /* TODO: print shader images */
+      /* TODO: print shader buffers */
+
+      fprintf(f, COLOR_SHADER "end shader: %s" COLOR_RESET "\n\n", shader_str[sh]);
+   }
+
+   if (dctx->dsa)
+      DUMP(depth_stencil_alpha_state, &dctx->dsa->state.dsa);
+   DUMP(stencil_ref, &dctx->stencil_ref);
+
+   if (dctx->blend)
+      DUMP(blend_state, &dctx->blend->state.blend);
+   DUMP(blend_color, &dctx->blend_color);
+
+   print_named_value(f, "min_samples", dctx->min_samples);
+   print_named_xvalue(f, "sample_mask", dctx->sample_mask);
+   fprintf(f, "\n");
+
+   DUMP(framebuffer_state, &dctx->framebuffer_state);
+   for (i = 0; i < dctx->framebuffer_state.nr_cbufs; i++)
+      if (dctx->framebuffer_state.cbufs[i]) {
+         fprintf(f, "  " COLOR_STATE "cbufs[%i]:" COLOR_RESET "\n    ", i);
+         DUMP(surface, dctx->framebuffer_state.cbufs[i]);
+         fprintf(f, "    ");
+         DUMP(resource, dctx->framebuffer_state.cbufs[i]->texture);
+      }
+   if (dctx->framebuffer_state.zsbuf) {
+      fprintf(f, "  " COLOR_STATE "zsbuf:" COLOR_RESET "\n    ");
+      DUMP(surface, dctx->framebuffer_state.zsbuf);
+      fprintf(f, "    ");
+      DUMP(resource, dctx->framebuffer_state.zsbuf->texture);
+   }
+   fprintf(f, "\n");
+}
+
+static void
+dd_dump_resource_copy_region(struct dd_context *dctx,
+                             struct call_resource_copy_region *info,
+                             FILE *f)
+{
+   fprintf(f, "%s:\n", __func__+8);
+   DUMP_M(resource, info, dst);
+   DUMP_M(uint, info, dst_level);
+   DUMP_M(uint, info, dstx);
+   DUMP_M(uint, info, dsty);
+   DUMP_M(uint, info, dstz);
+   DUMP_M(resource, info, src);
+   DUMP_M(uint, info, src_level);
+   DUMP_M(box, info, src_box);
+}
+
+static void
+dd_dump_blit(struct dd_context *dctx, struct pipe_blit_info *info, FILE *f)
+{
+   fprintf(f, "%s:\n", __func__+8);
+   DUMP_M(resource, info, dst.resource);
+   DUMP_M(uint, info, dst.level);
+   DUMP_M_ADDR(box, info, dst.box);
+   DUMP_M(format, info, dst.format);
+
+   DUMP_M(resource, info, src.resource);
+   DUMP_M(uint, info, src.level);
+   DUMP_M_ADDR(box, info, src.box);
+   DUMP_M(format, info, src.format);
+
+   DUMP_M(hex, info, mask);
+   DUMP_M(uint, info, filter);
+   DUMP_M(uint, info, scissor_enable);
+   DUMP_M_ADDR(scissor_state, info, scissor);
+   DUMP_M(uint, info, render_condition_enable);
+
+   if (info->render_condition_enable)
+      dd_dump_render_condition(dctx, f);
+}
+
+static void
+dd_dump_flush_resource(struct dd_context *dctx, struct pipe_resource *res,
+                       FILE *f)
+{
+   fprintf(f, "%s:\n", __func__+8);
+   DUMP(resource, res);
+}
+
+static void
+dd_dump_clear(struct dd_context *dctx, struct call_clear *info, FILE *f)
+{
+   fprintf(f, "%s:\n", __func__+8);
+   DUMP_M(uint, info, buffers);
+   DUMP_M(color_union, info, color);
+   DUMP_M(double, info, depth);
+   DUMP_M(hex, info, stencil);
+}
+
+static void
+dd_dump_clear_buffer(struct dd_context *dctx, struct call_clear_buffer *info,
+                     FILE *f)
+{
+   int i;
+   const char *value = (const char*)info->clear_value;
+
+   fprintf(f, "%s:\n", __func__+8);
+   DUMP_M(resource, info, res);
+   DUMP_M(uint, info, offset);
+   DUMP_M(uint, info, size);
+   DUMP_M(uint, info, clear_value_size);
+
+   fprintf(f, "  clear_value:");
+   for (i = 0; i < info->clear_value_size; i++)
+      fprintf(f, " %02x", value[i]);
+   fprintf(f, "\n");
+}
+
+static void
+dd_dump_clear_render_target(struct dd_context *dctx, FILE *f)
+{
+   fprintf(f, "%s:\n", __func__+8);
+   /* TODO */
+}
+
+static void
+dd_dump_clear_depth_stencil(struct dd_context *dctx, FILE *f)
+{
+   fprintf(f, "%s:\n", __func__+8);
+   /* TODO */
+}
+
+static void
+dd_dump_driver_state(struct dd_context *dctx, FILE *f, unsigned flags)
+{
+   if (dctx->pipe->dump_debug_state) {
+	   fprintf(f,"\n\n**************************************************"
+		     "***************************\n");
+	   fprintf(f, "Driver-specific state:\n\n");
+	   dctx->pipe->dump_debug_state(dctx->pipe, f, flags);
+   }
+}
+
+static void
+dd_dump_call(struct dd_context *dctx, struct dd_call *call, unsigned flags)
+{
+   FILE *f = dd_get_file_stream(dctx);
+
+   if (!f)
+      return;
+
+   switch (call->type) {
+   case CALL_DRAW_VBO:
+      dd_dump_draw_vbo(dctx, &call->info.draw_vbo, f);
+      break;
+   case CALL_RESOURCE_COPY_REGION:
+      dd_dump_resource_copy_region(dctx, &call->info.resource_copy_region, f);
+      break;
+   case CALL_BLIT:
+      dd_dump_blit(dctx, &call->info.blit, f);
+      break;
+   case CALL_FLUSH_RESOURCE:
+      dd_dump_flush_resource(dctx, call->info.flush_resource, f);
+      break;
+   case CALL_CLEAR:
+      dd_dump_clear(dctx, &call->info.clear, f);
+      break;
+   case CALL_CLEAR_BUFFER:
+      dd_dump_clear_buffer(dctx, &call->info.clear_buffer, f);
+      break;
+   case CALL_CLEAR_RENDER_TARGET:
+      dd_dump_clear_render_target(dctx, f);
+      break;
+   case CALL_CLEAR_DEPTH_STENCIL:
+      dd_dump_clear_depth_stencil(dctx, f);
+   }
+
+   dd_dump_driver_state(dctx, f, flags);
+   dd_close_file_stream(f);
+}
+
+static void
+dd_kill_process(void)
+{
+   sync();
+   fprintf(stderr, "dd: Aborting the process...\n");
+   fflush(stdout);
+   fflush(stderr);
+   abort();
+}
+
+static bool
+dd_flush_and_check_hang(struct dd_context *dctx,
+                        struct pipe_fence_handle **flush_fence,
+                        unsigned flush_flags)
+{
+   struct pipe_fence_handle *fence = NULL;
+   struct pipe_context *pipe = dctx->pipe;
+   struct pipe_screen *screen = pipe->screen;
+   uint64_t timeout_ms = dd_screen(dctx->base.screen)->timeout_ms;
+   bool idle;
+
+   assert(timeout_ms > 0);
+
+   pipe->flush(pipe, &fence, flush_flags);
+   if (flush_fence)
+      screen->fence_reference(screen, flush_fence, fence);
+   if (!fence)
+      return false;
+
+   idle = screen->fence_finish(screen, fence, timeout_ms * 1000000);
+   screen->fence_reference(screen, &fence, NULL);
+   if (!idle)
+      fprintf(stderr, "dd: GPU hang detected!\n");
+   return !idle;
+}
+
+static void
+dd_flush_and_handle_hang(struct dd_context *dctx,
+                         struct pipe_fence_handle **fence, unsigned flags,
+                         const char *cause)
+{
+   if (dd_flush_and_check_hang(dctx, fence, flags)) {
+      FILE *f = dd_get_file_stream(dctx);
+
+      if (f) {
+         fprintf(f, "dd: %s.\n", cause);
+         dd_dump_driver_state(dctx, f, PIPE_DEBUG_DEVICE_IS_HUNG);
+         dd_close_file_stream(f);
+      }
+
+      /* Terminate the process to prevent future hangs. */
+      dd_kill_process();
+   }
+}
+
+static void
+dd_context_flush(struct pipe_context *_pipe,
+                 struct pipe_fence_handle **fence, unsigned flags)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+
+   switch (dd_screen(dctx->base.screen)->mode) {
+   case DD_DETECT_HANGS:
+      dd_flush_and_handle_hang(dctx, fence, flags,
+                               "GPU hang detected in pipe->flush()");
+      break;
+   case DD_DUMP_ALL_CALLS:
+      pipe->flush(pipe, fence, flags);
+      break;
+   default:
+      assert(0);
+   }
+}
+
+static void
+dd_before_draw(struct dd_context *dctx)
+{
+   if (dd_screen(dctx->base.screen)->mode == DD_DETECT_HANGS &&
+       !dd_screen(dctx->base.screen)->no_flush)
+      dd_flush_and_handle_hang(dctx, NULL, 0,
+                               "GPU hang most likely caused by internal "
+                               "driver commands");
+}
+
+static void
+dd_after_draw(struct dd_context *dctx, struct dd_call *call)
+{
+   switch (dd_screen(dctx->base.screen)->mode) {
+   case DD_DETECT_HANGS:
+      if (!dd_screen(dctx->base.screen)->no_flush &&
+          dd_flush_and_check_hang(dctx, NULL, 0)) {
+         dd_dump_call(dctx, call, PIPE_DEBUG_DEVICE_IS_HUNG);
+
+         /* Terminate the process to prevent future hangs. */
+         dd_kill_process();
+      }
+      break;
+   case DD_DUMP_ALL_CALLS:
+      dd_dump_call(dctx, call, 0);
+      break;
+   default:
+      assert(0);
+   }
+}
+
+static void
+dd_context_draw_vbo(struct pipe_context *_pipe,
+                    const struct pipe_draw_info *info)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+   struct dd_call call;
+
+   call.type = CALL_DRAW_VBO;
+   call.info.draw_vbo = *info;
+
+   dd_before_draw(dctx);
+   pipe->draw_vbo(pipe, info);
+   dd_after_draw(dctx, &call);
+}
+
+static void
+dd_context_resource_copy_region(struct pipe_context *_pipe,
+                                struct pipe_resource *dst, unsigned dst_level,
+                                unsigned dstx, unsigned dsty, unsigned dstz,
+                                struct pipe_resource *src, unsigned src_level,
+                                const struct pipe_box *src_box)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+   struct dd_call call;
+
+   call.type = CALL_RESOURCE_COPY_REGION;
+   call.info.resource_copy_region.dst = dst;
+   call.info.resource_copy_region.dst_level = dst_level;
+   call.info.resource_copy_region.dstx = dstx;
+   call.info.resource_copy_region.dsty = dsty;
+   call.info.resource_copy_region.dstz = dstz;
+   call.info.resource_copy_region.src = src;
+   call.info.resource_copy_region.src_level = src_level;
+   call.info.resource_copy_region.src_box = src_box;
+
+   dd_before_draw(dctx);
+   pipe->resource_copy_region(pipe,
+                              dst, dst_level, dstx, dsty, dstz,
+                              src, src_level, src_box);
+   dd_after_draw(dctx, &call);
+}
+
+static void
+dd_context_blit(struct pipe_context *_pipe, const struct pipe_blit_info *info)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+   struct dd_call call;
+
+   call.type = CALL_BLIT;
+   call.info.blit = *info;
+
+   dd_before_draw(dctx);
+   pipe->blit(pipe, info);
+   dd_after_draw(dctx, &call);
+}
+
+static void
+dd_context_flush_resource(struct pipe_context *_pipe,
+                          struct pipe_resource *resource)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+   struct dd_call call;
+
+   call.type = CALL_FLUSH_RESOURCE;
+   call.info.flush_resource = resource;
+
+   dd_before_draw(dctx);
+   pipe->flush_resource(pipe, resource);
+   dd_after_draw(dctx, &call);
+}
+
+static void
+dd_context_clear(struct pipe_context *_pipe, unsigned buffers,
+                 const union pipe_color_union *color, double depth,
+                 unsigned stencil)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+   struct dd_call call;
+
+   call.type = CALL_CLEAR;
+   call.info.clear.buffers = buffers;
+   call.info.clear.color = color;
+   call.info.clear.depth = depth;
+   call.info.clear.stencil = stencil;
+
+   dd_before_draw(dctx);
+   pipe->clear(pipe, buffers, color, depth, stencil);
+   dd_after_draw(dctx, &call);
+}
+
+static void
+dd_context_clear_render_target(struct pipe_context *_pipe,
+                               struct pipe_surface *dst,
+                               const union pipe_color_union *color,
+                               unsigned dstx, unsigned dsty,
+                               unsigned width, unsigned height)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+   struct dd_call call;
+
+   call.type = CALL_CLEAR_RENDER_TARGET;
+
+   dd_before_draw(dctx);
+   pipe->clear_render_target(pipe, dst, color, dstx, dsty, width, height);
+   dd_after_draw(dctx, &call);
+}
+
+static void
+dd_context_clear_depth_stencil(struct pipe_context *_pipe,
+                               struct pipe_surface *dst, unsigned clear_flags,
+                               double depth, unsigned stencil, unsigned dstx,
+                               unsigned dsty, unsigned width, unsigned height)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+   struct dd_call call;
+
+   call.type = CALL_CLEAR_DEPTH_STENCIL;
+
+   dd_before_draw(dctx);
+   pipe->clear_depth_stencil(pipe, dst, clear_flags, depth, stencil,
+                             dstx, dsty, width, height);
+   dd_after_draw(dctx, &call);
+}
+
+static void
+dd_context_clear_buffer(struct pipe_context *_pipe, struct pipe_resource *res,
+                        unsigned offset, unsigned size,
+                        const void *clear_value, int clear_value_size)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+   struct dd_call call;
+
+   call.type = CALL_CLEAR_BUFFER;
+   call.info.clear_buffer.res = res;
+   call.info.clear_buffer.offset = offset;
+   call.info.clear_buffer.size = size;
+   call.info.clear_buffer.clear_value = clear_value;
+   call.info.clear_buffer.clear_value_size = clear_value_size;
+
+   dd_before_draw(dctx);
+   pipe->clear_buffer(pipe, res, offset, size, clear_value, clear_value_size);
+   dd_after_draw(dctx, &call);
+}
+
+void
+dd_init_draw_functions(struct dd_context *dctx)
+{
+   CTX_INIT(flush);
+   CTX_INIT(draw_vbo);
+   CTX_INIT(resource_copy_region);
+   CTX_INIT(blit);
+   CTX_INIT(clear);
+   CTX_INIT(clear_render_target);
+   CTX_INIT(clear_depth_stencil);
+   CTX_INIT(clear_buffer);
+   CTX_INIT(flush_resource);
+   /* launch_grid */
+}
--- a/src/gallium/drivers/ddebug/dd_pipe.h
+++ b/src/gallium/drivers/ddebug/dd_pipe.h
@@ -0,0 +1,139 @@
+/**************************************************************************
+ *
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ * Copyright 2008 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef DD_H_
+#define DD_H_
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "pipe/p_screen.h"
+#include "dd_util.h"
+
+enum dd_mode {
+   DD_DETECT_HANGS,
+   DD_DUMP_ALL_CALLS
+};
+
+struct dd_screen
+{
+   struct pipe_screen base;
+   struct pipe_screen *screen;
+   unsigned timeout_ms;
+   enum dd_mode mode;
+   bool no_flush;
+};
+
+struct dd_query
+{
+   unsigned type;
+   struct pipe_query *query;
+};
+
+struct dd_state
+{
+   void *cso;
+
+   union {
+      struct pipe_blend_state blend;
+      struct pipe_depth_stencil_alpha_state dsa;
+      struct pipe_rasterizer_state rs;
+      struct pipe_sampler_state sampler;
+      struct {
+         struct pipe_vertex_element velems[PIPE_MAX_ATTRIBS];
+         unsigned count;
+      } velems;
+      struct pipe_shader_state shader;
+   } state;
+};
+
+struct dd_context
+{
+   struct pipe_context base;
+   struct pipe_context *pipe;
+
+   struct {
+      struct dd_query *query;
+      bool condition;
+      unsigned mode;
+   } render_cond;
+
+   struct pipe_index_buffer index_buffer;
+   struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
+
+   unsigned num_so_targets;
+   struct pipe_stream_output_target *so_targets[PIPE_MAX_SO_BUFFERS];
+   unsigned so_offsets[PIPE_MAX_SO_BUFFERS];
+
+   struct dd_state *shaders[PIPE_SHADER_TYPES];
+   struct pipe_constant_buffer constant_buffers[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS];
+   struct pipe_sampler_view *sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
+   struct dd_state *sampler_states[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
+   struct pipe_image_view *shader_images[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES];
+   struct pipe_shader_buffer shader_buffers[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_BUFFERS];
+
+   struct dd_state *velems;
+   struct dd_state *rs;
+   struct dd_state *dsa;
+   struct dd_state *blend;
+
+   struct pipe_blend_color blend_color;
+   struct pipe_stencil_ref stencil_ref;
+   unsigned sample_mask;
+   unsigned min_samples;
+   struct pipe_clip_state clip_state;
+   struct pipe_framebuffer_state framebuffer_state;
+   struct pipe_poly_stipple polygon_stipple;
+   struct pipe_scissor_state scissors[PIPE_MAX_VIEWPORTS];
+   struct pipe_viewport_state viewports[PIPE_MAX_VIEWPORTS];
+   float tess_default_levels[6];
+};
+
+
+struct pipe_context *
+dd_context_create(struct dd_screen *dscreen, struct pipe_context *pipe);
+
+void
+dd_init_draw_functions(struct dd_context *dctx);
+
+
+static inline struct dd_context *
+dd_context(struct pipe_context *pipe)
+{
+   return (struct dd_context *)pipe;
+}
+
+static inline struct dd_screen *
+dd_screen(struct pipe_screen *screen)
+{
+   return (struct dd_screen*)screen;
+}
+
+
+#define CTX_INIT(_member) \
+   dctx->base._member = dctx->pipe->_member ? dd_context_##_member : NULL
+
+#endif /* DD_H_ */
--- a/src/gallium/drivers/radeonsi/si_commands.c
+++ b/src/gallium/drivers/radeonsi/si_commands.c
@@ -1,5 +1,8 @@
-/*
- * Copyright 2012 Advanced Micro Devices, Inc.
+/**************************************************************************
+ *
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -20,17 +23,14 @@
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
- * Authors:
- *      Christian König <christian.koenig@amd.com>
- */
+ **************************************************************************/

-#include "sid.h"
-#include "si_pipe.h"
+#ifndef DD_PUBLIC_H_
+#define DD_PUBLIC_H_

-void si_cmd_context_control(struct si_pm4_state *pm4)
-{
-	si_pm4_cmd_begin(pm4, PKT3_CONTEXT_CONTROL);
-	si_pm4_cmd_add(pm4, 0x80000000);
-	si_pm4_cmd_add(pm4, 0x80000000);
-	si_pm4_cmd_end(pm4, false);
-}
+struct pipe_screen;
+
+struct pipe_screen *
+ddebug_screen_create(struct pipe_screen *screen);
+
+#endif /* DD_PUBLIC_H_ */
--- a/src/gallium/drivers/ddebug/dd_screen.c
+++ b/src/gallium/drivers/ddebug/dd_screen.c
@@ -0,0 +1,353 @@
+/**************************************************************************
+ *
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ * Copyright 2008 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "dd_pipe.h"
+#include "dd_public.h"
+#include "util/u_memory.h"
+#include <stdio.h>
+
+
+static const char *
+dd_screen_get_name(struct pipe_screen *_screen)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+   return screen->get_name(screen);
+}
+
+static const char *
+dd_screen_get_vendor(struct pipe_screen *_screen)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+   return screen->get_vendor(screen);
+}
+
+static const char *
+dd_screen_get_device_vendor(struct pipe_screen *_screen)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+   return screen->get_device_vendor(screen);
+}
+
+static int
+dd_screen_get_param(struct pipe_screen *_screen,
+                    enum pipe_cap param)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+   return screen->get_param(screen, param);
+}
+
+static float
+dd_screen_get_paramf(struct pipe_screen *_screen,
+                     enum pipe_capf param)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+   return screen->get_paramf(screen, param);
+}
+
+static int
+dd_screen_get_shader_param(struct pipe_screen *_screen, unsigned shader,
+                           enum pipe_shader_cap param)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+   return screen->get_shader_param(screen, shader, param);
+}
+
+static uint64_t
+dd_screen_get_timestamp(struct pipe_screen *_screen)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+   return screen->get_timestamp(screen);
+}
+
+static struct pipe_context *
+dd_screen_context_create(struct pipe_screen *_screen, void *priv,
+                         unsigned flags)
+{
+   struct dd_screen *dscreen = dd_screen(_screen);
+   struct pipe_screen *screen = dscreen->screen;
+
+   flags |= PIPE_CONTEXT_DEBUG;
+
+   return dd_context_create(dscreen,
+                            screen->context_create(screen, priv, flags));
+}
+
+static boolean
+dd_screen_is_format_supported(struct pipe_screen *_screen,
+                              enum pipe_format format,
+                              enum pipe_texture_target target,
+                              unsigned sample_count,
+                              unsigned tex_usage)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+   return screen->is_format_supported(screen, format, target, sample_count,
+                                      tex_usage);
+}
+
+static boolean
+dd_screen_can_create_resource(struct pipe_screen *_screen,
+                              const struct pipe_resource *templat)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+   return screen->can_create_resource(screen, templat);
+}
+
+static void
+dd_screen_flush_frontbuffer(struct pipe_screen *_screen,
+                            struct pipe_resource *resource,
+                            unsigned level, unsigned layer,
+                            void *context_private,
+                            struct pipe_box *sub_box)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+   screen->flush_frontbuffer(screen, resource, level, layer, context_private,
+                             sub_box);
+}
+
+static int
+dd_screen_get_driver_query_info(struct pipe_screen *_screen,
+                                unsigned index,
+                                struct pipe_driver_query_info *info)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+   return screen->get_driver_query_info(screen, index, info);
+}
+
+static int
+dd_screen_get_driver_query_group_info(struct pipe_screen *_screen,
+                                      unsigned index,
+                                      struct pipe_driver_query_group_info *info)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+   return screen->get_driver_query_group_info(screen, index, info);
+}
+
+
+/********************************************************************
+ * resource
+ */
+
+static struct pipe_resource *
+dd_screen_resource_create(struct pipe_screen *_screen,
+                          const struct pipe_resource *templat)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+   struct pipe_resource *res = screen->resource_create(screen, templat);
+
+   if (!res)
+      return NULL;
+   res->screen = _screen;
+   return res;
+}
+
+static struct pipe_resource *
+dd_screen_resource_from_handle(struct pipe_screen *_screen,
+                               const struct pipe_resource *templ,
+                               struct winsys_handle *handle)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+   struct pipe_resource *res =
+      screen->resource_from_handle(screen, templ, handle);
+
+   if (!res)
+      return NULL;
+   res->screen = _screen;
+   return res;
+}
+
+static struct pipe_resource *
+dd_screen_resource_from_user_memory(struct pipe_screen *_screen,
+                                    const struct pipe_resource *templ,
+                                    void *user_memory)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+   struct pipe_resource *res =
+      screen->resource_from_user_memory(screen, templ, user_memory);
+
+   if (!res)
+      return NULL;
+   res->screen = _screen;
+   return res;
+}
+
+static void
+dd_screen_resource_destroy(struct pipe_screen *_screen,
+                           struct pipe_resource *res)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+   screen->resource_destroy(screen, res);
+}
+
+static boolean
+dd_screen_resource_get_handle(struct pipe_screen *_screen,
+                              struct pipe_resource *resource,
+                              struct winsys_handle *handle)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+   return screen->resource_get_handle(screen, resource, handle);
+}
+
+
+/********************************************************************
+ * fence
+ */
+
+static void
+dd_screen_fence_reference(struct pipe_screen *_screen,
+                          struct pipe_fence_handle **pdst,
+                          struct pipe_fence_handle *src)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+   screen->fence_reference(screen, pdst, src);
+}
+
+static boolean
+dd_screen_fence_finish(struct pipe_screen *_screen,
+                       struct pipe_fence_handle *fence,
+                       uint64_t timeout)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+   return screen->fence_finish(screen, fence, timeout);
+}
+
+
+/********************************************************************
+ * screen
+ */
+
+static void
+dd_screen_destroy(struct pipe_screen *_screen)
+{
+   struct dd_screen *dscreen = dd_screen(_screen);
+   struct pipe_screen *screen = dscreen->screen;
+
+   screen->destroy(screen);
+   FREE(dscreen);
+}
+
+struct pipe_screen *
+ddebug_screen_create(struct pipe_screen *screen)
+{
+   struct dd_screen *dscreen;
+   const char *option = debug_get_option("GALLIUM_DDEBUG", NULL);
+   bool dump_always = option && !strcmp(option, "always");
+   bool no_flush = option && strstr(option, "noflush");
+   bool help = option && !strcmp(option, "help");
+   unsigned timeout = 0;
+
+   if (help) {
+      puts("Gallium driver debugger");
+      puts("");
+      puts("Usage:");
+      puts("");
+      puts("  GALLIUM_DDEBUG=always");
+      puts("    Dump context and driver information after every draw call into");
+      puts("    $HOME/"DD_DIR"/.");
+      puts("");
+      puts("  GALLIUM_DDEBUG=[timeout in ms] noflush");
+      puts("    Flush and detect a device hang after every draw call based on the given");
+      puts("    fence timeout and dump context and driver information into");
+      puts("    $HOME/"DD_DIR"/ when a hang is detected.");
+      puts("    If 'noflush' is specified, only detect hangs in pipe->flush.");
+      puts("");
+      exit(0);
+   }
+
+   if (!option)
+      return screen;
+   if (!dump_always && sscanf(option, "%u", &timeout) != 1)
+      return screen;
+
+   dscreen = CALLOC_STRUCT(dd_screen);
+   if (!dscreen)
+      return NULL;
+
+#define SCR_INIT(_member) \
+   dscreen->base._member = screen->_member ? dd_screen_##_member : NULL
+
+   dscreen->base.destroy = dd_screen_destroy;
+   dscreen->base.get_name = dd_screen_get_name;
+   dscreen->base.get_vendor = dd_screen_get_vendor;
+   dscreen->base.get_device_vendor = dd_screen_get_device_vendor;
+   dscreen->base.get_param = dd_screen_get_param;
+   dscreen->base.get_paramf = dd_screen_get_paramf;
+   dscreen->base.get_shader_param = dd_screen_get_shader_param;
+   /* get_video_param */
+   /* get_compute_param */
+   SCR_INIT(get_timestamp);
+   dscreen->base.context_create = dd_screen_context_create;
+   dscreen->base.is_format_supported = dd_screen_is_format_supported;
+   /* is_video_format_supported */
+   SCR_INIT(can_create_resource);
+   dscreen->base.resource_create = dd_screen_resource_create;
+   dscreen->base.resource_from_handle = dd_screen_resource_from_handle;
+   SCR_INIT(resource_from_user_memory);
+   dscreen->base.resource_get_handle = dd_screen_resource_get_handle;
+   dscreen->base.resource_destroy = dd_screen_resource_destroy;
+   SCR_INIT(flush_frontbuffer);
+   SCR_INIT(fence_reference);
+   SCR_INIT(fence_finish);
+   SCR_INIT(get_driver_query_info);
+   SCR_INIT(get_driver_query_group_info);
+
+#undef SCR_INIT
+
+   dscreen->screen = screen;
+   dscreen->timeout_ms = timeout;
+   dscreen->mode = dump_always ? DD_DUMP_ALL_CALLS : DD_DETECT_HANGS;
+   dscreen->no_flush = no_flush;
+
+   switch (dscreen->mode) {
+   case DD_DUMP_ALL_CALLS:
+      fprintf(stderr, "Gallium debugger active. Logging all calls.\n");
+      break;
+   case DD_DETECT_HANGS:
+      fprintf(stderr, "Gallium debugger active. "
+              "The hang detection timout is %i ms.\n", timeout);
+      break;
+   default:
+      assert(0);
+   }
+
+   return &dscreen->base;
+}
--- a/src/gallium/drivers/ddebug/dd_util.h
+++ b/src/gallium/drivers/ddebug/dd_util.h
@@ -0,0 +1,71 @@
+/**************************************************************************
+ *
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ * Copyright 2008 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef DD_UTIL_H
+#define DD_UTIL_H
+
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+#include "os/os_process.h"
+#include "util/u_debug.h"
+
+/* name of the directory in home */
+#define DD_DIR "ddebug_dumps"
+
+static inline FILE *
+dd_get_debug_file()
+{
+   static unsigned index;
+   char proc_name[128], dir[256], name[512];
+   FILE *f;
+
+   if (!os_get_process_name(proc_name, sizeof(proc_name))) {
+      fprintf(stderr, "dd: can't get the process name\n");
+      return NULL;
+   }
+
+   snprintf(dir, sizeof(dir), "%s/"DD_DIR, debug_get_option("HOME", "."));
+
+   if (mkdir(dir, 0774) && errno != EEXIST) {
+      fprintf(stderr, "dd: can't create a directory (%i)\n", errno);
+      return NULL;
+   }
+
+   snprintf(name, sizeof(name), "%s/%s_%u_%08u", dir, proc_name, getpid(), index++);
+   f = fopen(name, "w");
+   if (!f) {
+      fprintf(stderr, "dd: can't open file %s\n", name);
+      return NULL;
+   }
+
+   return f;
+}
+
+#endif /* DD_UTIL_H */
--- a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
+++ b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
@@ -11,10 +11,10 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    364 bytes, from 2015-05-20 20:03:07)
 - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2015-05-20 20:03:07)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10755 bytes, from 2015-09-14 20:46:55)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67120 bytes, from 2015-08-14 23:22:03)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63785 bytes, from 2015-08-14 18:27:06)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67771 bytes, from 2015-09-14 20:46:55)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63970 bytes, from 2015-09-14 20:50:12)

 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
--- a/src/gallium/drivers/freedreno/a2xx/fd2_context.c
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_context.c
@@ -86,7 +86,7 @@ static const uint8_t a20x_primtypes[PIPE_PRIM_MAX] = {
 };

 struct pipe_context *
-fd2_context_create(struct pipe_screen *pscreen, void *priv)
+fd2_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
 {
 	struct fd_screen *screen = fd_screen(pscreen);
 	struct fd2_context *fd2_ctx = CALLOC_STRUCT(fd2_context);
--- a/src/gallium/drivers/freedreno/a2xx/fd2_context.h
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_context.h
@@ -47,6 +47,6 @@ fd2_context(struct fd_context *ctx)
 }

 struct pipe_context *
-fd2_context_create(struct pipe_screen *pscreen, void *priv);
+fd2_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags);

 #endif /* FD2_CONTEXT_H_ */
--- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
+++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
@@ -11,10 +11,10 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    364 bytes, from 2015-05-20 20:03:07)
 - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2015-05-20 20:03:07)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10755 bytes, from 2015-09-14 20:46:55)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67120 bytes, from 2015-08-14 23:22:03)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63785 bytes, from 2015-08-14 18:27:06)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67771 bytes, from 2015-09-14 20:46:55)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63970 bytes, from 2015-09-14 20:50:12)

 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
@@ -280,6 +280,8 @@ enum a3xx_rb_blend_opcode {
 enum a3xx_intp_mode {
 	SMOOTH = 0,
 	FLAT = 1,
+	ZERO = 2,
+	ONE = 3,
 };

 enum a3xx_repl_mode {
@@ -680,9 +682,16 @@ static inline uint32_t REG_A3XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000460
 #define A3XX_GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE		0x00080000
 #define A3XX_GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE			0x00100000
 #define A3XX_GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE		0x00200000
+#define A3XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z			0x00400000
 #define A3XX_GRAS_CL_CLIP_CNTL_ZCOORD				0x00800000
 #define A3XX_GRAS_CL_CLIP_CNTL_WCOORD				0x01000000
 #define A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE			0x02000000
+#define A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES__MASK	0x1c000000
+#define A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES__SHIFT	26
+static inline uint32_t A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES(uint32_t val)
+{
+	return ((val) << A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES__SHIFT) & A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES__MASK;
+}

 #define REG_A3XX_GRAS_CL_GB_CLIP_ADJ				0x00002044
 #define A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__MASK			0x000003ff
@@ -773,7 +782,7 @@ static inline uint32_t A3XX_GRAS_SU_POINT_SIZE(float val)
 #define A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT		0
 static inline uint32_t A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL(float val)
 {
-	return ((((int32_t)(val * 16384.0))) << A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT) & A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__MASK;
+	return ((((int32_t)(val * 1048576.0))) << A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT) & A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__MASK;
 }

 #define REG_A3XX_GRAS_SU_POLY_OFFSET_OFFSET			0x0000206d
@@ -894,6 +903,9 @@ static inline uint32_t A3XX_RB_MODE_CONTROL_MRT(uint32_t val)
 #define A3XX_RB_MODE_CONTROL_PACKER_TIMER_ENABLE		0x00010000

 #define REG_A3XX_RB_RENDER_CONTROL				0x000020c1
+#define A3XX_RB_RENDER_CONTROL_DUAL_COLOR_IN_ENABLE		0x00000001
+#define A3XX_RB_RENDER_CONTROL_YUV_IN_ENABLE			0x00000002
+#define A3XX_RB_RENDER_CONTROL_COV_VALUE_INPUT_ENABLE		0x00000004
 #define A3XX_RB_RENDER_CONTROL_FACENESS				0x00000008
 #define A3XX_RB_RENDER_CONTROL_BIN_WIDTH__MASK			0x00000ff0
 #define A3XX_RB_RENDER_CONTROL_BIN_WIDTH__SHIFT			4
@@ -907,6 +919,8 @@ static inline uint32_t A3XX_RB_RENDER_CONTROL_BIN_WIDTH(uint32_t val)
 #define A3XX_RB_RENDER_CONTROL_YCOORD				0x00008000
 #define A3XX_RB_RENDER_CONTROL_ZCOORD				0x00010000
 #define A3XX_RB_RENDER_CONTROL_WCOORD				0x00020000
+#define A3XX_RB_RENDER_CONTROL_I_CLAMP_ENABLE			0x00080000
+#define A3XX_RB_RENDER_CONTROL_COV_VALUE_OUTPUT_ENABLE		0x00100000
 #define A3XX_RB_RENDER_CONTROL_ALPHA_TEST			0x00400000
 #define A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__MASK		0x07000000
 #define A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__SHIFT		24
@@ -914,6 +928,8 @@ static inline uint32_t A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(enum adreno_compar
 {
 	return ((val) << A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__SHIFT) & A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__MASK;
 }
+#define A3XX_RB_RENDER_CONTROL_ALPHA_TO_COVERAGE		0x40000000
+#define A3XX_RB_RENDER_CONTROL_ALPHA_TO_ONE			0x80000000

 #define REG_A3XX_RB_MSAA_CONTROL				0x000020c2
 #define A3XX_RB_MSAA_CONTROL_DISABLE				0x00000400
--- a/src/gallium/drivers/freedreno/a3xx/fd3_blend.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_blend.c
@@ -28,6 +28,7 @@

 #include "pipe/p_state.h"
 #include "util/u_blend.h"
+#include "util/u_dual_blend.h"
 #include "util/u_string.h"
 #include "util/u_memory.h"

@@ -131,5 +132,8 @@ fd3_blend_state_create(struct pipe_context *pctx,
 			so->rb_mrt[i].control |= A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_ALWAYS);
 	}

+	if (cso->rt[0].blend_enable && util_blend_state_is_dual(cso, 0))
+		so->rb_render_control = A3XX_RB_RENDER_CONTROL_DUAL_COLOR_IN_ENABLE;
+
 	return so;
 }
--- a/src/gallium/drivers/freedreno/a3xx/fd3_blend.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_blend.h
@@ -36,6 +36,7 @@

 struct fd3_blend_stateobj {
 	struct pipe_blend_state base;
+	uint32_t rb_render_control;
 	struct {
 		/* Blend control bits for color if there is an alpha channel */
 		uint32_t blend_control_rgb;
--- a/src/gallium/drivers/freedreno/a3xx/fd3_context.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.c
@@ -98,7 +98,7 @@ static const uint8_t primtypes[PIPE_PRIM_MAX] = {
 };

 struct pipe_context *
-fd3_context_create(struct pipe_screen *pscreen, void *priv)
+fd3_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
 {
 	struct fd_screen *screen = fd_screen(pscreen);
 	struct fd3_context *fd3_ctx = CALLOC_STRUCT(fd3_context);
--- a/src/gallium/drivers/freedreno/a3xx/fd3_context.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.h
@@ -73,22 +73,6 @@ struct fd3_context {
 	 */
 	struct fd_vertex_state blit_vbuf_state;

-
-	/*
-	 * Border color layout *appears* to be as arrays of 0x40 byte
-	 * elements, with frag shader elements starting at (16 x 0x40).
-	 * But at some point I should probably experiment more with
-	 * samplers in vertex shaders to be sure.  Unclear about why
-	 * there is this offset when there are separate VS and FS base
-	 * addr regs.
-	 *
-	 * The first 8 bytes of each entry are the requested border
-	 * color in fp16.  Unclear about the rest.. could be used for
-	 * other formats, or could simply be for aligning the pitch
-	 * to 32 pixels.
-	 */
-#define BORDERCOLOR_SIZE 0x40
-
 	struct u_upload_mgr *border_color_uploader;
 	struct pipe_resource *border_color_buf;

@@ -119,6 +103,6 @@ fd3_context(struct fd_context *ctx)
 }

 struct pipe_context *
-fd3_context_create(struct pipe_screen *pscreen, void *priv);
+fd3_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags);

 #endif /* FD3_CONTEXT_H_ */
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -149,6 +149,8 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
 			&fd3_ctx->border_color_buf,
 			&ptr);

+	fd_setup_border_colors(tex, ptr, tex_off[sb]);
+
 	if (tex->num_samplers > 0) {
 		/* output sampler state: */
 		OUT_PKT3(ring, CP_LOAD_STATE, 2 + (2 * tex->num_samplers));
@@ -163,57 +165,6 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
 			const struct fd3_sampler_stateobj *sampler = tex->samplers[i] ?
 					fd3_sampler_stateobj(tex->samplers[i]) :
 					&dummy_sampler;
-			uint16_t *bcolor = (uint16_t *)((uint8_t *)ptr +
-					(BORDERCOLOR_SIZE * tex_off[sb]) +
-					(BORDERCOLOR_SIZE * i));
-			uint32_t *bcolor32 = (uint32_t *)&bcolor[16];
-
-			/*
-			 * XXX HACK ALERT XXX
-			 *
-			 * The border colors need to be swizzled in a particular
-			 * format-dependent order. Even though samplers don't know about
-			 * formats, we can assume that with a GL state tracker, there's a
-			 * 1:1 correspondence between sampler and texture. Take advantage
-			 * of that knowledge.
-			 */
-			if (i < tex->num_textures && tex->textures[i]) {
-				const struct util_format_description *desc =
-					util_format_description(tex->textures[i]->format);
-				for (j = 0; j < 4; j++) {
-					if (desc->swizzle[j] >= 4)
-						continue;
-
-					const struct util_format_channel_description *chan =
-						&desc->channel[desc->swizzle[j]];
-					int size = chan->size;
-
-					/* The Z16 texture format we use seems to look in the
-					 * 32-bit border color slots
-					 */
-					if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS)
-						size = 32;
-
-					/* Formats like R11G11B10 or RGB9_E5 don't specify
-					 * per-channel sizes properly.
-					 */
-					if (desc->layout == UTIL_FORMAT_LAYOUT_OTHER)
-						size = 16;
-
-					if (chan->pure_integer && size > 16)
-						bcolor32[desc->swizzle[j] + 4] =
-							sampler->base.border_color.i[j];
-					else if (size > 16)
-						bcolor32[desc->swizzle[j]] =
-							fui(sampler->base.border_color.f[j]);
-					else if (chan->pure_integer)
-						bcolor[desc->swizzle[j] + 8] =
-							sampler->base.border_color.i[j];
-					else
-						bcolor[desc->swizzle[j]] =
-							util_float_to_half(sampler->base.border_color.f[j]);
-				}
-			}

 			OUT_RING(ring, sampler->texsamp0);
 			OUT_RING(ring, sampler->texsamp1);
@@ -400,15 +351,27 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
 	unsigned vtxcnt_regid = regid(63, 0);

 	for (i = 0; i < vp->inputs_count; i++) {
-		uint8_t semantic = sem2name(vp->inputs[i].semantic);
-		if (semantic == TGSI_SEMANTIC_VERTEXID_NOBASE)
-			vertex_regid = vp->inputs[i].regid;
-		else if (semantic == TGSI_SEMANTIC_INSTANCEID)
-			instance_regid = vp->inputs[i].regid;
-		else if (semantic == IR3_SEMANTIC_VTXCNT)
-			vtxcnt_regid = vp->inputs[i].regid;
-		else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask)
+		if (vp->inputs[i].sysval) {
+			switch(vp->inputs[i].slot) {
+			case SYSTEM_VALUE_BASE_VERTEX:
+				/* handled elsewhere */
+				break;
+			case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
+				vertex_regid = vp->inputs[i].regid;
+				break;
+			case SYSTEM_VALUE_INSTANCE_ID:
+				instance_regid = vp->inputs[i].regid;
+				break;
+			case SYSTEM_VALUE_VERTEX_CNT:
+				vtxcnt_regid = vp->inputs[i].regid;
+				break;
+			default:
+				unreachable("invalid system value");
+				break;
+			}
+		} else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask) {
 			last = i;
+		}
 	}

 	/* hw doesn't like to be configured for zero vbo's, it seems: */
@@ -419,7 +382,7 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
 		return;

 	for (i = 0, j = 0; i <= last; i++) {
-		assert(sem2name(vp->inputs[i].semantic) == 0);
+		assert(!vp->inputs[i].sysval);
 		if (vp->inputs[i].compmask) {
 			struct pipe_vertex_element *elem = &vtx->vtx->pipe[i];
 			const struct pipe_vertex_buffer *vb =
@@ -492,8 +455,10 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 				A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(ctx->sample_mask));
 	}

-	if ((dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) && !emit->key.binning_pass) {
-		uint32_t val = fd3_zsa_stateobj(ctx->zsa)->rb_render_control;
+	if ((dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG | FD_DIRTY_BLEND_DUAL)) &&
+		!emit->key.binning_pass) {
+		uint32_t val = fd3_zsa_stateobj(ctx->zsa)->rb_render_control |
+			fd3_blend_stateobj(ctx->blend)->rb_render_control;

 		val |= COND(fp->frag_face, A3XX_RB_RENDER_CONTROL_FACENESS);
 		val |= COND(fp->frag_coord, A3XX_RB_RENDER_CONTROL_XCOORD |
@@ -563,10 +528,30 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 		val |= COND(fp->writes_pos, A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE);
 		val |= COND(fp->frag_coord, A3XX_GRAS_CL_CLIP_CNTL_ZCOORD |
 				A3XX_GRAS_CL_CLIP_CNTL_WCOORD);
+		/* TODO only use if prog doesn't use clipvertex/clipdist */
+		val |= A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES(
+				MIN2(util_bitcount(ctx->rasterizer->clip_plane_enable), 6));
 		OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
 		OUT_RING(ring, val);
 	}

+	if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_UCP)) {
+		uint32_t planes = ctx->rasterizer->clip_plane_enable;
+		int count = 0;
+
+		while (planes && count < 6) {
+			int i = ffs(planes) - 1;
+
+			planes &= ~(1U << i);
+			fd_wfi(ctx, ring);
+			OUT_PKT0(ring, REG_A3XX_GRAS_CL_USER_PLANE(count++), 4);
+			OUT_RING(ring, fui(ctx->ucp.ucp[i][0]));
+			OUT_RING(ring, fui(ctx->ucp.ucp[i][1]));
+			OUT_RING(ring, fui(ctx->ucp.ucp[i][2]));
+			OUT_RING(ring, fui(ctx->ucp.ucp[i][3]));
+		}
+	}
+
 	/* NOTE: since primitive_restart is not actually part of any
 	 * state object, we need to make sure that we always emit
 	 * PRIM_VTX_CNTL.. either that or be more clever and detect
@@ -620,9 +605,13 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 		OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(ctx->viewport.scale[2]));
 	}

-	if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER)) {
+	if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER | FD_DIRTY_BLEND_DUAL)) {
 		struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
-		fd3_program_emit(ring, emit, pfb->nr_cbufs, pfb->cbufs);
+		int nr_cbufs = pfb->nr_cbufs;
+		if (fd3_blend_stateobj(ctx->blend)->rb_render_control &
+			A3XX_RB_RENDER_CONTROL_DUAL_COLOR_IN_ENABLE)
+			nr_cbufs++;
+		fd3_program_emit(ring, emit, nr_cbufs, pfb->cbufs);
 	}

 	/* TODO we should not need this or fd_wfi() before emit_constants():
--- a/src/gallium/drivers/freedreno/a3xx/fd3_format.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_format.c
@@ -355,6 +355,8 @@ fd3_fs_output_format(enum pipe_format format)
 	case PIPE_FORMAT_R16G16_FLOAT:
 	case PIPE_FORMAT_R11G11B10_FLOAT:
 		return RB_R16G16B16A16_FLOAT;
+	case PIPE_FORMAT_L8_UNORM:
+		return RB_R8G8B8A8_UNORM;
 	default:
 		return fd3_pipe2color(format);
 	}
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
@@ -194,24 +194,17 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
 	/* seems like vs->constlen + fs->constlen > 256, then CONSTMODE=1 */
 	constmode = ((vp->constlen + fp->constlen) > 256) ? 1 : 0;

-	pos_regid = ir3_find_output_regid(vp,
-		ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
-	posz_regid = ir3_find_output_regid(fp,
-		ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
-	psize_regid = ir3_find_output_regid(vp,
-		ir3_semantic_name(TGSI_SEMANTIC_PSIZE, 0));
+	pos_regid = ir3_find_output_regid(vp, VARYING_SLOT_POS);
+	posz_regid = ir3_find_output_regid(fp, FRAG_RESULT_DEPTH);
+	psize_regid = ir3_find_output_regid(vp, VARYING_SLOT_PSIZ);
 	if (fp->color0_mrt) {
 		color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
-			ir3_find_output_regid(fp, ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0));
+			ir3_find_output_regid(fp, FRAG_RESULT_COLOR);
 	} else {
-		for (i = 0; i < fp->outputs_count; i++) {
-			ir3_semantic sem = fp->outputs[i].semantic;
-			unsigned idx = sem2idx(sem);
-			if (sem2name(sem) != TGSI_SEMANTIC_COLOR)
-				continue;
-			debug_assert(idx < ARRAY_SIZE(color_regid));
-			color_regid[idx] = fp->outputs[i].regid;
-		}
+		color_regid[0] = ir3_find_output_regid(fp, FRAG_RESULT_DATA0);
+		color_regid[1] = ir3_find_output_regid(fp, FRAG_RESULT_DATA1);
+		color_regid[2] = ir3_find_output_regid(fp, FRAG_RESULT_DATA2);
+		color_regid[3] = ir3_find_output_regid(fp, FRAG_RESULT_DATA3);
 	}

 	/* adjust regids for alpha output formats. there is no alpha render
@@ -280,14 +273,14 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,

 		j = ir3_next_varying(fp, j);
 		if (j < fp->inputs_count) {
-			k = ir3_find_output(vp, fp->inputs[j].semantic);
+			k = ir3_find_output(vp, fp->inputs[j].slot);
 			reg |= A3XX_SP_VS_OUT_REG_A_REGID(vp->outputs[k].regid);
 			reg |= A3XX_SP_VS_OUT_REG_A_COMPMASK(fp->inputs[j].compmask);
 		}

 		j = ir3_next_varying(fp, j);
 		if (j < fp->inputs_count) {
-			k = ir3_find_output(vp, fp->inputs[j].semantic);
+			k = ir3_find_output(vp, fp->inputs[j].slot);
 			reg |= A3XX_SP_VS_OUT_REG_B_REGID(vp->outputs[k].regid);
 			reg |= A3XX_SP_VS_OUT_REG_B_COMPMASK(fp->inputs[j].compmask);
 		}
@@ -394,7 +387,6 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,

 		/* figure out VARYING_INTERP / FLAT_SHAD register values: */
 		for (j = -1; (j = ir3_next_varying(fp, j)) < (int)fp->inputs_count; ) {
-			uint32_t interp = fp->inputs[j].interpolate;

 			/* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG
 			 * instead.. rather than -8 everywhere else..
@@ -406,8 +398,8 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
 			 */
 			debug_assert((inloc % 4) == 0);

-			if ((interp == TGSI_INTERPOLATE_CONSTANT) ||
-					((interp == TGSI_INTERPOLATE_COLOR) && emit->rasterflat)) {
+			if ((fp->inputs[j].interpolate == INTERP_QUALIFIER_FLAT) ||
+					(fp->inputs[j].rasterflat && emit->rasterflat)) {
 				uint32_t loc = inloc;
 				for (i = 0; i < 4; i++, loc++) {
 					vinterp[loc / 16] |= FLAT << ((loc % 16) * 2);
@@ -415,14 +407,20 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
 				}
 			}

-			/* Replace the .xy coordinates with S/T from the point sprite. Set
-			 * interpolation bits for .zw such that they become .01
-			 */
-			if (emit->sprite_coord_enable & (1 << sem2idx(fp->inputs[j].semantic))) {
-				vpsrepl[inloc / 16] |= (emit->sprite_coord_mode ? 0x0d : 0x09)
-					<< ((inloc % 16) * 2);
-				vinterp[(inloc + 2) / 16] |= 2 << (((inloc + 2) % 16) * 2);
-				vinterp[(inloc + 3) / 16] |= 3 << (((inloc + 3) % 16) * 2);
+			gl_varying_slot slot = fp->inputs[j].slot;
+
+			/* since we don't enable PIPE_CAP_TGSI_TEXCOORD: */
+			if (slot >= VARYING_SLOT_VAR0) {
+				unsigned texmask = 1 << (slot - VARYING_SLOT_VAR0);
+				/* Replace the .xy coordinates with S/T from the point sprite. Set
+				 * interpolation bits for .zw such that they become .01
+				 */
+				if (emit->sprite_coord_enable & texmask) {
+					vpsrepl[inloc / 16] |= (emit->sprite_coord_mode ? 0x0d : 0x09)
+							<< ((inloc % 16) * 2);
+					vinterp[(inloc + 2) / 16] |= 2 << (((inloc + 2) % 16) * 2);
+					vinterp[(inloc + 3) / 16] |= 3 << (((inloc + 3) % 16) * 2);
+				}
 			}
 		}

--- a/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c
@@ -65,7 +65,8 @@ fd3_rasterizer_state_create(struct pipe_context *pctx,
 	if (cso->multisample)
 		TODO
 */
-	so->gras_cl_clip_cntl = A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER; /* ??? */
+	so->gras_cl_clip_cntl = A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER /* ??? */ |
+		COND(cso->clip_halfz, A3XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z);
 	so->gras_su_point_minmax =
 			A3XX_GRAS_SU_POINT_MINMAX_MIN(psize_min) |
 			A3XX_GRAS_SU_POINT_MINMAX_MAX(psize_max);
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -11,10 +11,10 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    364 bytes, from 2015-05-20 20:03:07)
 - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2015-05-20 20:03:07)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10755 bytes, from 2015-09-14 20:46:55)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67120 bytes, from 2015-08-14 23:22:03)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63785 bytes, from 2015-08-14 18:27:06)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67771 bytes, from 2015-09-14 20:46:55)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63970 bytes, from 2015-09-14 20:50:12)

 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
@@ -162,10 +162,13 @@ enum a4xx_tex_fmt {
 	TFMT4_8_UNORM = 4,
 	TFMT4_8_8_UNORM = 14,
 	TFMT4_8_8_8_8_UNORM = 28,
+	TFMT4_8_SNORM = 5,
 	TFMT4_8_8_SNORM = 15,
 	TFMT4_8_8_8_8_SNORM = 29,
+	TFMT4_8_UINT = 6,
 	TFMT4_8_8_UINT = 16,
 	TFMT4_8_8_8_8_UINT = 30,
+	TFMT4_8_SINT = 7,
 	TFMT4_8_8_SINT = 17,
 	TFMT4_8_8_8_8_SINT = 31,
 	TFMT4_16_UINT = 21,
@@ -246,7 +249,8 @@ enum a4xx_tex_clamp {
 	A4XX_TEX_REPEAT = 0,
 	A4XX_TEX_CLAMP_TO_EDGE = 1,
 	A4XX_TEX_MIRROR_REPEAT = 2,
-	A4XX_TEX_CLAMP_NONE = 3,
+	A4XX_TEX_CLAMP_TO_BORDER = 3,
+	A4XX_TEX_MIRROR_CLAMP = 4,
 };

 enum a4xx_tex_aniso {
--- a/src/gallium/drivers/freedreno/a4xx/fd4_context.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_context.c
@@ -55,6 +55,8 @@ fd4_context_destroy(struct pipe_context *pctx)
 	pipe_resource_reference(&fd4_ctx->solid_vbuf, NULL);
 	pipe_resource_reference(&fd4_ctx->blit_texcoord_vbuf, NULL);

+	u_upload_destroy(fd4_ctx->border_color_uploader);
+
 	fd_context_destroy(pctx);
 }

@@ -96,7 +98,7 @@ static const uint8_t primtypes[PIPE_PRIM_MAX] = {
 };

 struct pipe_context *
-fd4_context_create(struct pipe_screen *pscreen, void *priv)
+fd4_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
 {
 	struct fd_screen *screen = fd_screen(pscreen);
 	struct fd4_context *fd4_ctx = CALLOC_STRUCT(fd4_context);
@@ -169,5 +171,8 @@ fd4_context_create(struct pipe_screen *pscreen, void *priv)

 	fd4_query_context_init(pctx);

+	fd4_ctx->border_color_uploader = u_upload_create(pctx, 4096,
+			2 * PIPE_MAX_SAMPLERS * BORDERCOLOR_SIZE, 0);
+
 	return pctx;
 }
--- a/src/gallium/drivers/freedreno/a4xx/fd4_context.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_context.h
@@ -29,6 +29,8 @@
 #ifndef FD4_CONTEXT_H_
 #define FD4_CONTEXT_H_

+#include "util/u_upload_mgr.h"
+
 #include "freedreno_drmif.h"

 #include "freedreno_context.h"
@@ -70,6 +72,9 @@ struct fd4_context {
 	 */
 	struct fd_vertex_state blit_vbuf_state;

+	struct u_upload_mgr *border_color_uploader;
+	struct pipe_resource *border_color_buf;
+
 	/* if *any* of bits are set in {v,f}saturate_{s,t,r} */
 	bool vsaturate, fsaturate;

@@ -97,6 +102,6 @@ fd4_context(struct fd_context *ctx)
 }

 struct pipe_context *
-fd4_context_create(struct pipe_screen *pscreen, void *priv);
+fd4_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags);

 #endif /* FD4_CONTEXT_H_ */
--- a/Show More
+++ b/Show More