i965/gen8: Remove gen<8 checks in gen8 code

Some assertions in gen8_surface_state.c checked for gen < 8. Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com> Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
i965/gen9: Enable rep clears on gen9
2015-10-09 14:24:12 -07:00 · 2015-10-09 14:24:12 -07:00 · 2015-10-09 14:24:12 -07:00 · 2015-10-09 23:12:14 +02:00 · 2015-10-09 22:02:19 +02:00 · 2015-10-09 22:02:18 +02:00
906 changed files with 61019 additions and 28936 deletions
--- a/Android.common.mk
+++ b/Android.common.mk
@@ -42,6 +42,7 @@ LOCAL_CFLAGS += \
 	-DANDROID_VERSION=0x0$(MESA_ANDROID_MAJOR_VERSION)0$(MESA_ANDROID_MINOR_VERSION)

 LOCAL_CFLAGS += \
+	-D__STDC_LIMIT_MACROS \
 	-DHAVE___BUILTIN_EXPECT \
 	-DHAVE___BUILTIN_FFS \
 	-DHAVE___BUILTIN_FFSLL \
@@ -70,7 +71,7 @@ endif

 ifeq ($(MESA_ENABLE_LLVM),true)
 LOCAL_CFLAGS += \
-	-DHAVE_LLVM=0x0305 -DLLVM_VERSION_PATCH=2 \
+	-DHAVE_LLVM=0x0305 -DMESA_LLVM_VERSION_PATCH=2 \
 	-D__STDC_CONSTANT_MACROS \
 	-D__STDC_FORMAT_MACROS \
 	-D__STDC_LIMIT_MACROS
--- a/2
+++ b/2
@@ -1 +1 @@
-11.0.0-devel
+11.1.0-devel
--- a/configure.ac
+++ b/configure.ac
@@ -74,7 +74,7 @@ LIBDRM_AMDGPU_REQUIRED=2.4.63
 LIBDRM_INTEL_REQUIRED=2.4.61
 LIBDRM_NVVIEUX_REQUIRED=2.4.33
 LIBDRM_NOUVEAU_REQUIRED=2.4.62
-LIBDRM_FREEDRENO_REQUIRED=2.4.64
+LIBDRM_FREEDRENO_REQUIRED=2.4.65
 DRI2PROTO_REQUIRED=2.6
 DRI3PROTO_REQUIRED=1.0
 PRESENTPROTO_REQUIRED=1.0
@@ -533,15 +533,32 @@ AM_CONDITIONAL(HAVE_COMPAT_SYMLINKS, test "x$HAVE_COMPAT_SYMLINKS" = xyes)
 dnl
 dnl library names
 dnl
+dnl Unfortunately we need to do a few things that libtool can't help us with,
+dnl so we need some knowledge of shared library filenames:
+dnl
+dnl LIB_EXT is the extension used when creating symlinks for alternate
+dnl filenames for a shared library which will be dynamically loaded
+dnl
+dnl IMP_LIB_EXT is the extension used when checking for the presence of a
+dnl the file for a shared library we wish to link with
+dnl
 case "$host_os" in
 darwin* )
-    LIB_EXT='dylib' ;;
+    LIB_EXT='dylib'
+    IMP_LIB_EXT=$LIB_EXT
+    ;;
 cygwin* )
-    LIB_EXT='dll' ;;
+    LIB_EXT='dll'
+    IMP_LIB_EXT='dll.a'
+    ;;
 aix* )
-    LIB_EXT='a' ;;
+    LIB_EXT='a'
+    IMP_LIB_EXT=$LIB_EXT
+    ;;
 * )
-    LIB_EXT='so' ;;
+    LIB_EXT='so'
+    IMP_LIB_EXT=$LIB_EXT
+    ;;
 esac

 AC_SUBST([LIB_EXT])
@@ -1110,6 +1127,11 @@ AC_MSG_RESULT([$with_sha1])
 AC_SUBST(SHA1_LIBS)
 AC_SUBST(SHA1_CFLAGS)

+# Enable a define for SHA1
+if test "x$with_sha1" != "x"; then
+	DEFINES="$DEFINES -DHAVE_SHA1"
+fi
+
 # Allow user to configure out the shader-cache feature
 AC_ARG_ENABLE([shader-cache],
    AS_HELP_STRING([--disable-shader-cache], [Disable binary shader cache]),
@@ -1289,6 +1311,16 @@ AC_SUBST(GLX_TLS, ${GLX_USE_TLS})
 AS_IF([test "x$GLX_USE_TLS" = xyes -a "x$ax_pthread_ok" = xyes],
      [DEFINES="${DEFINES} -DGLX_USE_TLS"])

+dnl Read-only text section on x86 hardened platforms
+AC_ARG_ENABLE([glx-read-only-text],
+    [AS_HELP_STRING([--enable-glx-read-only-text],
+        [Disable writable .text section on x86 (decreases performance) @<:@default=disabled@:>@])],
+    [enable_glx_read_only_text="$enableval"],
+    [enable_glx_read_only_text=no])
+if test "x$enable_glx_read_only_text" = xyes; then
+    DEFINES="$DEFINES -DGLX_X86_READONLY_TEXT"
+fi
+
 dnl
 dnl More DRI setup
 dnl
@@ -2051,7 +2083,7 @@ radeon_llvm_check() {
    if test "x$enable_gallium_llvm" != "xyes"; then
        AC_MSG_ERROR([--enable-gallium-llvm is required when building $1])
    fi
-    llvm_check_version_for "3" "4" "2" $1 
+    llvm_check_version_for "3" "5" "0" $1
    if test true && $LLVM_CONFIG --targets-built | grep -iqvw $amdgpu_llvm_target_name ; then
        AC_MSG_ERROR([LLVM $amdgpu_llvm_target_name not enabled in your LLVM build.])
    fi
@@ -2139,11 +2171,8 @@ if test -n "$with_gallium_drivers"; then
            gallium_require_drm "vc4"
            gallium_require_drm_loader

-            case "$host_cpu" in
-                i?86 | x86_64 | amd64)
-                USE_VC4_SIMULATOR=yes
-                ;;
-            esac
+            PKG_CHECK_MODULES([SIMPENROSE], [simpenrose],
+                              [USE_VC4_SIMULATOR=yes], [USE_VC4_SIMULATOR=no])
            ;;
        *)
            AC_MSG_ERROR([Unknown Gallium driver: $driver])
@@ -2163,10 +2192,14 @@ if test "x$MESA_LLVM" != x0; then

    LLVM_LIBS="`$LLVM_CONFIG --libs ${LLVM_COMPONENTS}`"

+    dnl llvm-config may not give the right answer when llvm is a built as a
+    dnl single shared library, so we must work the library name out for
+    dnl ourselves.
+    dnl (See https://llvm.org/bugs/show_bug.cgi?id=6823)
    if test "x$enable_llvm_shared_libs" = xyes; then
        dnl We can't use $LLVM_VERSION because it has 'svn' stripped out,
        LLVM_SO_NAME=LLVM-`$LLVM_CONFIG --version`
-        AS_IF([test -f "$LLVM_LIBDIR/lib$LLVM_SO_NAME.so"], [llvm_have_one_so=yes])
+        AS_IF([test -f "$LLVM_LIBDIR/lib$LLVM_SO_NAME.$IMP_LIB_EXT"], [llvm_have_one_so=yes])

        if test "x$llvm_have_one_so" = xyes; then
            dnl LLVM was built using auto*, so there is only one shared object.
@@ -2174,7 +2207,7 @@ if test "x$MESA_LLVM" != x0; then
        else
            dnl If LLVM was built with CMake, there will be one shared object per
            dnl component.
-            AS_IF([test ! -f "$LLVM_LIBDIR/libLLVMTarget.so"],
+            AS_IF([test ! -f "$LLVM_LIBDIR/libLLVMTarget.$IMP_LIB_EXT"],
                    [AC_MSG_ERROR([Could not find llvm shared libraries:
 	Please make sure you have built llvm with the --enable-shared option
 	and that your llvm libraries are installed in $LLVM_LIBDIR
@@ -2317,6 +2350,7 @@ AC_CONFIG_FILES([Makefile
 		src/gallium/auxiliary/Makefile
 		src/gallium/auxiliary/pipe-loader/Makefile
 		src/gallium/drivers/freedreno/Makefile
+		src/gallium/drivers/ddebug/Makefile
 		src/gallium/drivers/i915/Makefile
 		src/gallium/drivers/ilo/Makefile
 		src/gallium/drivers/llvmpipe/Makefile
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -109,14 +109,14 @@ GL 4.0, GLSL 4.00 --- all DONE: nvc0, radeonsi
  - Enhanced per-sample shading                        DONE (r600)
  - Interpolation functions                            DONE (r600)
  - New overload resolution rules                      DONE
-  GL_ARB_gpu_shader_fp64                               DONE (llvmpipe, softpipe)
+  GL_ARB_gpu_shader_fp64                               DONE (r600, llvmpipe, softpipe)
  GL_ARB_sample_shading                                DONE (i965, nv50, r600)
  GL_ARB_shader_subroutine                             DONE (i965, nv50, r600, llvmpipe, softpipe)
  GL_ARB_tessellation_shader                           DONE ()
  GL_ARB_texture_buffer_object_rgb32                   DONE (i965, r600, llvmpipe, softpipe)
  GL_ARB_texture_cube_map_array                        DONE (i965, nv50, r600, llvmpipe, softpipe)
  GL_ARB_texture_gather                                DONE (i965, nv50, r600, llvmpipe, softpipe)
-  GL_ARB_texture_query_lod                             DONE (i965, nv50, r600)
+  GL_ARB_texture_query_lod                             DONE (i965, nv50, r600, softpipe)
  GL_ARB_transform_feedback2                           DONE (i965, nv50, r600, llvmpipe, softpipe)
  GL_ARB_transform_feedback3                           DONE (i965, nv50, r600, llvmpipe, softpipe)

@@ -127,7 +127,7 @@ GL 4.1, GLSL 4.10 --- all DONE: nvc0, radeonsi
  GL_ARB_get_program_binary                            DONE (0 binary formats)
  GL_ARB_separate_shader_objects                       DONE (all drivers)
  GL_ARB_shader_precision                              DONE (all drivers that support GLSL 4.10)
-  GL_ARB_vertex_attrib_64bit                           DONE (llvmpipe, softpipe)
+  GL_ARB_vertex_attrib_64bit                           DONE (r600, llvmpipe, softpipe)
  GL_ARB_viewport_array                                DONE (i965, nv50, r600, llvmpipe)


@@ -164,7 +164,7 @@ GL 4.3, GLSL 4.30:
  GL_ARB_program_interface_query                       DONE (all drivers)
  GL_ARB_robust_buffer_access_behavior                 not started
  GL_ARB_shader_image_size                             DONE (i965)
-  GL_ARB_shader_storage_buffer_object                  in progress (Iago Toral, Samuel Iglesias)
+  GL_ARB_shader_storage_buffer_object                  DONE (i965)
  GL_ARB_stencil_texturing                             DONE (i965/gen8+, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
  GL_ARB_texture_buffer_range                          DONE (nv50, nvc0, i965, r600, radeonsi, llvmpipe)
  GL_ARB_texture_query_levels                          DONE (all drivers that support GLSL 1.30)
@@ -178,7 +178,13 @@ GL 4.4, GLSL 4.40:
  GL_MAX_VERTEX_ATTRIB_STRIDE                          DONE (all drivers)
  GL_ARB_buffer_storage                                DONE (i965, nv50, nvc0, r600, radeonsi)
  GL_ARB_clear_texture                                 DONE (i965) (gallium - in progress, VMware)
-  GL_ARB_enhanced_layouts                              not started
+  GL_ARB_enhanced_layouts                              in progress (Timothy)
+  - compile-time constant expressions                  in progress
+  - explicit byte offsets for blocks                   in progress
+  - forced alignment within blocks                     in progress
+  - specified vec4-slot component numbers              in progress
+  - specified transform/feedback layout                in progress
+  - input/output block locations                       in progress
  GL_ARB_multi_bind                                    DONE (all drivers)
  GL_ARB_query_buffer_object                           not started
  GL_ARB_texture_mirror_clamp_to_edge                  DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
@@ -194,9 +200,9 @@ GL 4.5, GLSL 4.50:
  GL_ARB_derivative_control                            DONE (i965, nv50, nvc0, r600, radeonsi)
  GL_ARB_direct_state_access                           DONE (all drivers)
  GL_ARB_get_texture_sub_image                         DONE (all drivers)
-  GL_ARB_shader_texture_image_samples                  not started
-  GL_ARB_texture_barrier                               DONE (nv50, nvc0, r600, radeonsi)
-  GL_KHR_context_flush_control                         DONE (all - but needs GLX/EXT extension to be useful)
+  GL_ARB_shader_texture_image_samples                  DONE (i965, nv50, nvc0, r600, radeonsi)
+  GL_ARB_texture_barrier                               DONE (i965, nv50, nvc0, r600, radeonsi)
+  GL_KHR_context_flush_control                         DONE (all - but needs GLX/EGL extension to be useful)
  GL_KHR_robust_buffer_access_behavior                 not started
  GL_KHR_robustness                                    90% done (the ARB variant)
  GL_EXT_shader_integer_mix                            DONE (all drivers that support GLSL)
@@ -212,7 +218,7 @@ GLES3.1, GLSL ES 3.1
  GL_ARB_shader_atomic_counters                        DONE (i965)
  GL_ARB_shader_image_load_store                       DONE (i965)
  GL_ARB_shader_image_size                             DONE (i965)
-  GL_ARB_shader_storage_buffer_object                  in progress (Iago Toral, Samuel Iglesias)
+  GL_ARB_shader_storage_buffer_object                  DONE (i965)
  GL_ARB_shading_language_packing                      DONE (all drivers)
  GL_ARB_separate_shader_objects                       DONE (all drivers)
  GL_ARB_stencil_texturing                             DONE (i965/gen8+, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
@@ -223,10 +229,35 @@ GLES3.1, GLSL ES 3.1
  GS5 Packing/bitfield/conversion functions            DONE (i965, nvc0, r600, radeonsi)
  GL_EXT_shader_integer_mix                            DONE (all drivers that support GLSL)

-  Additional functions not covered above:
-      glMemoryBarrierByRegion
-      glGetTexLevelParameter[fi]v - needs updates to restrict to GLES enums
-      glGetBooleani_v - needs updates to restrict to GLES enums
+  Additional functionality not covered above:
+      glMemoryBarrierByRegion                          DONE
+      glGetTexLevelParameter[fi]v - needs updates      DONE
+      glGetBooleani_v - restrict to GLES enums
+      gl_HelperInvocation support
+
+GLES3.2, GLSL ES 3.2
+  GL_EXT_color_buffer_float                            DONE (all drivers)
+  GL_KHR_blend_equation_advanced                       not started
+  GL_KHR_debug                                         DONE (all drivers)
+  GL_KHR_robustness                                    90% done (the ARB variant)
+  GL_KHR_texture_compression_astc_ldr                  DONE (i965/gen9+)
+  GL_OES_copy_image                                    not started (based on GL_ARB_copy_image, which is done for some drivers)
+  GL_OES_draw_buffers_indexed                          not started
+  GL_OES_draw_elements_base_vertex                     not started (based on GL_ARB_draw_elements_base_vertex, which is done for all drivers)
+  GL_OES_geometry_shader                               not started (based on GL_ARB_geometry_shader4, which is done for all drivers)
+  GL_OES_gpu_shader5                                   not started (based on parts of GL_ARB_gpu_shader5, which is done for some drivers)
+  GL_OES_primitive_bounding box                        not started
+  GL_OES_sample_shading                                not started (based on parts of GL_ARB_sample_shading, which is done for some drivers)
+  GL_OES_sample_variables                              not started (based on parts of GL_ARB_sample_shading, which is done for some drivers)
+  GL_OES_shader_image_atomic                           not started (based on parts of GL_ARB_shader_image_load_store, which is done for some drivers)
+  GL_OES_shader_io_blocks                              not started (based on parts of GLSL 1.50, which is done)
+  GL_OES_shader_multisample_interpolation              not started (based on parts of GL_ARB_gpu_shader5, which is done)
+  GL_OES_tessellation_shader                           not started (based on GL_ARB_tessellation_shader, which is done for some drivers)
+  GL_OES_texture_border_clamp                          not started (based on GL_ARB_texture_border_clamp, which is done)
+  GL_OES_texture_buffer                                not started (based on GL_ARB_texture_buffer_object, GL_ARB_texture_buffer_range, and GL_ARB_texture_buffer_object_rgb32 that are all done)
+  GL_OES_texture_cube_map_array                        not started (based on GL_ARB_texture_cube_map_array, which is done for all drivers)
+  GL_OES_texture_stencil8                              not started (based on GL_ARB_texture_stencil8, which is done for some drivers)
+  GL_OES_texture_storage_multisample_2d_array          DONE (all drivers that support GL_ARB_texture_multisample)

 More info about these features and the work involved can be found at
 http://dri.freedesktop.org/wiki/MissingFunctionality
--- a/docs/autoconf.html
+++ b/docs/autoconf.html
@@ -87,6 +87,13 @@ created in a <code>lib64</code> directory at the top of the Mesa source
 tree.</p>
 </dd>

+<dt><code>--sysconfdir=DIR</code></dt>
+<dd><p>This option specifies the directory where the configuration
+files will be installed. The default is <code>${prefix}/etc</code>.
+Currently there's only one config file provided when dri drivers are
+enabled - it's <code>drirc</code>.</p>
+</dd>
+
 <dt><code>--enable-static, --disable-shared</code></dt>
 <dd><p>By default, Mesa
 will build shared libraries. Either of these options will force static
@@ -217,7 +224,7 @@ GLX.
 <dt><code>--with-expat=DIR</code>
 <dd><p><strong>DEPRECATED</strong>, use <code>PKG_CONFIG_PATH</code> instead.</p>
 <p>The DRI-enabled libGL uses expat to
-parse the DRI configuration files in <code>/etc/drirc</code> and
+parse the DRI configuration files in <code>${sysconfdir}/drirc</code> and
 <code>~/.drirc</code>. This option allows a specific expat installation
 to be used. For example, <code>--with-expat=/usr/local</code> will
 search for expat headers and libraries in <code>/usr/local/include</code>
--- a/docs/envvars.html
+++ b/docs/envvars.html
@@ -153,6 +153,7 @@ See the <a href="xlibdriver.html">Xlib software driver page</a> for details.
   <li>no16 - suppress generation of 16-wide fragment shaders. useful for debugging broken shaders</li>
   <li>blorp - emit messages about the blorp operations (blits &amp; clears)</li>
   <li>nodualobj - suppress generation of dual-object geometry shader code</li>
+   <li>optimizer - dump shader assembly to files at each optimization pass and iteration that make progress</li>
 </ul>
 </ul>

--- a/docs/index.html
+++ b/docs/index.html
@@ -16,25 +16,72 @@

 <h1>News</h1>

-<h2>August 22 2015</h2>
+<h2>October 3, 2015</h2>
+<p>
+<a href="relnotes/10.6.9.html">Mesa 10.6.9</a> is released.
+This is a bug-fix release.
+<br>
+NOTE: It is anticipated that 10.6.9 will be the final release in the 10.6
+series. Users of 10.5 are encouraged to migrate to the 11.0 series in order
+to obtain future fixes.
+</p>
+
+<h2>September 28, 2015</h2>
+<p>
+<a href="relnotes/11.0.2.html">Mesa 11.0.2</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>September 26, 2015</h2>
+<p>
+<a href="relnotes/11.0.1.html">Mesa 11.0.1</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>September 20, 2015</h2>
+<p>
+<a href="relnotes/10.6.8.html">Mesa 10.6.8</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>September 12, 2015</h2>
+<p>
+<a href="relnotes/11.0.0.html">Mesa 11.0.0</a> is released.  This is a new
+development release.  See the release notes for more information about
+the release.
+</p>
+
+<h2>September 10, 2015</h2>
+<p>
+<a href="relnotes/10.6.7.html">Mesa 10.6.7</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>September 4, 2015</h2>
+<p>
+<a href="relnotes/10.6.6.html">Mesa 10.6.6</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>August 22, 2015</h2>
 <p>
 <a href="relnotes/10.6.5.html">Mesa 10.6.5</a> is released.
 This is a bug-fix release.
 </p>

-<h2>August 11 2015</h2>
+<h2>August 11, 2015</h2>
 <p>
 <a href="relnotes/10.6.4.html">Mesa 10.6.4</a> is released.
 This is a bug-fix release.
 </p>

-<h2>July 26 2015</h2>
+<h2>July 26, 2015</h2>
 <p>
 <a href="relnotes/10.6.3.html">Mesa 10.6.3</a> is released.
 This is a bug-fix release.
 </p>

-<h2>July 11 2015</h2>
+<h2>July 11, 2015</h2>
 <p>
 <a href="relnotes/10.6.2.html">Mesa 10.6.2</a> is released.
 This is a bug-fix release.
--- a/docs/relnotes.html
+++ b/docs/relnotes.html
@@ -21,6 +21,13 @@ The release notes summarize what's new or changed in each Mesa release.
 </p>

 <ul>
+<li><a href="relnotes/10.6.9.html">10.6.9 release notes</a>
+<li><a href="relnotes/11.0.2.html">11.0.2 release notes</a>
+<li><a href="relnotes/11.0.1.html">11.0.1 release notes</a>
+<li><a href="relnotes/10.6.8.html">10.6.8 release notes</a>
+<li><a href="relnotes/11.0.0.html">11.0.0 release notes</a>
+<li><a href="relnotes/10.6.7.html">10.6.7 release notes</a>
+<li><a href="relnotes/10.6.6.html">10.6.6 release notes</a>
 <li><a href="relnotes/10.6.5.html">10.6.5 release notes</a>
 <li><a href="relnotes/10.6.4.html">10.6.4 release notes</a>
 <li><a href="relnotes/10.6.3.html">10.6.3 release notes</a>
--- a/docs/relnotes/10.6.6.html
+++ b/docs/relnotes/10.6.6.html
@@ -0,0 +1,164 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.6.6 Release Notes / September 04, 2015</h1>
+
+<p>
+Mesa 10.6.6 is a bug fix release which fixes bugs found since the 10.6.5 release.
+</p>
+<p>
+Mesa 10.6.6 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+416517aa9df4791f97d34451a9e4da33c966afcd18c115c5769b92b15b018ef5  mesa-10.6.6.tar.gz
+570f2154b7340ff5db61ff103bc6e85165b8958798b78a50fa2df488e98e5778  mesa-10.6.6.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=84677">Bug 84677</a> - Triangle disappears with glPolygonMode GL_LINE</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90734">Bug 90734</a> - glBufferSubData is corrupting data when buffer is &gt; 32k</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90748">Bug 90748</a> - [BDW Bisected]dEQP-GLES3.functional.fbo.completeness.renderable.texture.depth.rg_half_float_oes fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90902">Bug 90902</a> - [bsw][regression] dEQP: &quot;Found invalid pixel values&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90925">Bug 90925</a> - &quot;high fidelity&quot;: Segfault in _mesa_program_resource_find_name</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91254">Bug 91254</a> - (regresion) video using VA-API on Intel slow and freeze system with mesa 10.6 or 10.6.1</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91292">Bug 91292</a> - [BDW+] glVertexAttribDivisor not working in combination with glPolygonMode</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91673">Bug 91673</a> - Segfault when calling glTexSubImage2D on storage texture to bound FBO</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91726">Bug 91726</a> - R600 asserts in tgsi_cmp/make_src_for_op3</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Chris Wilson (2):</p>
+<ul>
+  <li>i965: Prevent coordinate overflow in intel_emit_linear_blit</li>
+  <li>i965: Always re-emit the pipeline select during invariant state emission</li>
+</ul>
+
+<p>Daniel Scharrer (1):</p>
+<ul>
+  <li>mesa: add missing queries for ARB_direct_state_access</li>
+</ul>
+
+<p>Dave Airlie (8):</p>
+<ul>
+  <li>mesa/arb_gpu_shader_fp64: add support for glGetUniformdv</li>
+  <li>mesa/texgetimage: fix missing stencil check</li>
+  <li>st/readpixels: fix accel path for skipimages.</li>
+  <li>texcompress_s3tc/fxt1: fix stride checks (v1.1)</li>
+  <li>mesa/readpixels: check strides are equal before skipping conversion</li>
+  <li>mesa: enable texture stencil8 for multisample</li>
+  <li>r600/sb: update last_cf for finalize if.</li>
+  <li>r600g: fix calculation for gpr allocation</li>
+</ul>
+
+<p>David Heidelberg (1):</p>
+<ul>
+  <li>st/nine: Require gcc &gt;= 4.6</li>
+</ul>
+
+<p>Emil Velikov (2):</p>
+<ul>
+  <li>docs: add sha256 checksums for 10.6.5</li>
+  <li>get-pick-list.sh: Require explicit "10.6" for nominating stable patches</li>
+</ul>
+
+<p>Glenn Kennard (4):</p>
+<ul>
+  <li>r600g: Fix assert in tgsi_cmp</li>
+  <li>r600g/sb: Handle undef in read port tracker</li>
+  <li>r600g/sb: Don't read junk after EOP</li>
+  <li>r600g/sb: Don't crash on empty if jump target</li>
+</ul>
+
+<p>Ilia Mirkin (5):</p>
+<ul>
+  <li>st/mesa: fix assignments with 4-operand arguments (i.e. BFI)</li>
+  <li>st/mesa: pass through 4th opcode argument in bitmap/pixel visitors</li>
+  <li>nv50,nvc0: disable depth bounds test on blit</li>
+  <li>nv50: fix 2d engine blits for 64- and 128-bit formats</li>
+  <li>mesa: only copy the requested teximage faces</li>
+</ul>
+
+<p>Jason Ekstrand (1):</p>
+<ul>
+  <li>i965/fs: Split VGRFs after lowering pull constants</li>
+</ul>
+
+<p>Kenneth Graunke (3):</p>
+<ul>
+  <li>i965: Fix copy propagation type changes.</li>
+  <li>Revert "i965: Advertise a line width of 40.0 on Cherryview and Skylake."</li>
+  <li>i965: Momentarily pretend to support ARB_texture_stencil8 for blits.</li>
+</ul>
+
+<p>Marek Olšák (3):</p>
+<ul>
+  <li>gallium/radeon: fix the ADDRESS_HI mask for EVENT_WRITE CIK packets</li>
+  <li>mesa: create multisample fallback textures like normal textures</li>
+  <li>radeonsi: fix a Unigine Heaven hang when drirc is missing</li>
+</ul>
+
+<p>Matt Turner (1):</p>
+<ul>
+  <li>i965/fs: Handle MRF destinations in lower_integer_multiplication().</li>
+</ul>
+
+<p>Neil Roberts (2):</p>
+<ul>
+  <li>i965: Swap the order of the vertex ID and edge flag attributes</li>
+  <li>i965/bdw: Fix 3DSTATE_VF_INSTANCING when the edge flag is used</li>
+</ul>
+
+<p>Tapani Pälli (5):</p>
+<ul>
+  <li>mesa: update fbo state in glTexStorage</li>
+  <li>glsl: build stageref mask using IR, not symbol table</li>
+  <li>glsl: expose build_program_resource_list function</li>
+  <li>glsl: create program resource list after LinkShader</li>
+  <li>mesa: add GL_RED, GL_RG support for floating point textures</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/10.6.7.html
+++ b/docs/relnotes/10.6.7.html
@@ -0,0 +1,75 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.6.7 Release Notes / September 10, 2015</h1>
+
+<p>
+Mesa 10.6.7 is a bug fix release which fixes bugs found since the 10.6.6 release.
+</p>
+<p>
+Mesa 10.6.7 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+4ba10c59abee30d72476543a57afd2f33803dabf4620dc333b335d47966ff842  mesa-10.6.7.tar.gz
+feb1f640b915dada88a7c793dfaff0ae23580f8903f87a6b76469253de0d28d8  mesa-10.6.7.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90751">Bug 90751</a> - [BDW Bisected]dEQP-GLES3.functional.fbo.completeness.renderable.texture.stencil.stencil_index8 fails</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Dave Airlie (1):</p>
+<ul>
+  <li>mesa/teximage: use correct extension for accept stencil texture.</li>
+</ul>
+
+<p>Emil Velikov (3):</p>
+<ul>
+  <li>docs: add sha256 checksums for 10.6.6</li>
+  <li>Revert "i965: Momentarily pretend to support ARB_texture_stencil8 for blits."</li>
+  <li>Update version to 10.6.7</li>
+</ul>
+
+<p>Kenneth Graunke (1):</p>
+<ul>
+  <li>glsl: Handle attribute aliasing in attribute storage limit check.</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/10.6.8.html
+++ b/docs/relnotes/10.6.8.html
@@ -0,0 +1,136 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.6.8 Release Notes / September 20, 2015</h1>
+
+<p>
+Mesa 10.6.8 is a bug fix release which fixes bugs found since the 10.6.7 release.
+</p>
+<p>
+Mesa 10.6.8 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+1f34dba2a8059782e3e4e0f18b9628004e253b2c69085f735b846d2e63c9e250  mesa-10.6.8.tar.gz
+e36ee5ceeadb3966fb5ce5b4cf18322dbb76a4f075558ae49c3bba94f57d58fd  mesa-10.6.8.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90621">Bug 90621</a> - Mesa fail to build from git</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91526">Bug 91526</a> - World of Warcraft (on Wine) has UI corruption with nouveau</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91719">Bug 91719</a> - [SNB,HSW,BYT] dEQP regressions associated with using NIR for vertex shaders</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Alejandro Piñeiro (1):</p>
+<ul>
+  <li>i965/vec4: fill src_reg type using the constructor type parameter</li>
+</ul>
+
+<p>Antia Puentes (1):</p>
+<ul>
+  <li>i965/vec4: Fix saturation errors when coalescing registers</li>
+</ul>
+
+<p>Emil Velikov (2):</p>
+<ul>
+  <li>docs: add sha256 checksums for 10.6.7</li>
+  <li>cherry-ignore: add commit non applicable for 10.6</li>
+</ul>
+
+<p>Hans de Goede (4):</p>
+<ul>
+  <li>nv30: Fix creation of scanout buffers</li>
+  <li>nv30: Implement color resolve for msaa</li>
+  <li>nv30: Fix max width / height checks in nv30 sifm code</li>
+  <li>nv30: Disable msaa unless requested from the env by NV30_MAX_MSAA</li>
+</ul>
+
+<p>Ian Romanick (2):</p>
+<ul>
+  <li>mesa: Pass the type to _mesa_uniform_matrix as a glsl_base_type</li>
+  <li>mesa: Don't allow wrong type setters for matrix uniforms</li>
+</ul>
+
+<p>Ilia Mirkin (5):</p>
+<ul>
+  <li>st/mesa: don't fall back to 16F when 32F is requested</li>
+  <li>nvc0: always emit a full shader colormask</li>
+  <li>nvc0: remove BGRA4 format support</li>
+  <li>st/mesa: avoid integer overflows with buffers &gt;= 512MB</li>
+  <li>nv50, nvc0: fix max texture buffer size to 128M elements</li>
+</ul>
+
+<p>Jason Ekstrand (1):</p>
+<ul>
+  <li>i965/vec4: Don't reswizzle hardware registers</li>
+</ul>
+
+<p>Jose Fonseca (1):</p>
+<ul>
+  <li>gallivm: Workaround LLVM PR23628.</li>
+</ul>
+
+<p>Kenneth Graunke (1):</p>
+<ul>
+  <li>i965: Momentarily pretend to support ARB_texture_stencil8 for blits.</li>
+</ul>
+
+<p>Oded Gabbay (1):</p>
+<ul>
+  <li>llvmpipe: convert double to long long instead of unsigned long long</li>
+</ul>
+
+<p>Ray Strode (1):</p>
+<ul>
+  <li>gbm: convert gbm bo format to fourcc format on dma-buf import</li>
+</ul>
+
+<p>Ulrich Weigand (1):</p>
+<ul>
+  <li>mesa: Fix texture compression on big-endian systems</li>
+</ul>
+
+<p>Vinson Lee (1):</p>
+<ul>
+  <li>gallivm: Do not use NoFramePointerElim with LLVM 3.7.</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/10.6.9.html
+++ b/docs/relnotes/10.6.9.html
@@ -0,0 +1,130 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.6.9 Release Notes / Octover 03, 2015</h1>
+
+<p>
+Mesa 10.6.9 is a bug fix release which fixes bugs found since the 10.6.8 release.
+</p>
+<p>
+Mesa 10.6.9 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+3406876aac67546d0c3e2cb97da330b62644c313e7992b95618662e13c54296a  mesa-10.6.9.tar.gz
+b04c4de6280b863babc2929573da17218d92e9e4ba6272d548d135415723e8c3  mesa-10.6.9.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=38109">Bug 38109</a> - i915 driver crashes if too few vertices are submitted (Mesa 7.10.2)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=55552">Bug 55552</a> - Compile errors with --enable-mangling</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=86281">Bug 86281</a> - brw_meta_fast_clear (brw=brw&#64;entry=0x7fffd4097a08, fb=fb&#64;entry=0x7fffd40fa900, buffers=buffers&#64;entry=2, partial_clear=partial_clear&#64;entry=false)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91970">Bug 91970</a> - [BSW regression] dEQP-GLES3.functional.shaders.precision.int.highp_mul_vertex</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92072">Bug 92072</a> - Wine breakage since d082c5324 (st/mesa: don't call st_validate_state in BlitFramebuffer)</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Brian Paul (1):</p>
+<ul>
+  <li>st/mesa: try PIPE_BIND_RENDER_TARGET when choosing float texture formats</li>
+</ul>
+
+<p>Chris Wilson (1):</p>
+<ul>
+  <li>i965: Remove early release of DRI2 miptree</li>
+</ul>
+
+<p>Emil Velikov (4):</p>
+<ul>
+  <li>docs: add sha256 checksums for 10.6.8</li>
+  <li>cherry-ignore: add commit non applicable for 10.6</li>
+  <li>cherry-ignore: add commit non applicable for 10.6</li>
+  <li>Update version to 10.6.9</li>
+</ul>
+
+<p>Iago Toral Quiroga (1):</p>
+<ul>
+  <li>mesa: Fix GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE for default framebuffer.</li>
+</ul>
+
+<p>Ian Romanick (5):</p>
+<ul>
+  <li>t_dd_dmatmp: Make "count" actually be the count</li>
+  <li>t_dd_dmatmp: Clean up improper code formatting from previous patch</li>
+  <li>t_dd_dmatmp: Use '&amp; 3' instead of '% 4' everywhere</li>
+  <li>t_dd_dmatmp: Pull out common 'count -= count &amp; 3' code</li>
+  <li>t_dd_dmatmp: Use addition instead of subtraction in loop bounds</li>
+</ul>
+
+<p>Jeremy Huddleston (1):</p>
+<ul>
+  <li>configure.ac: Add support to enable read-only text segment on x86.</li>
+</ul>
+
+<p>Kristian Høgsberg Kristensen (1):</p>
+<ul>
+  <li>i965: Respect stride and subreg_offset for ATTR registers</li>
+</ul>
+
+<p>Kyle Brenneman (3):</p>
+<ul>
+  <li>glx: Fix build errors with --enable-mangling (v2)</li>
+  <li>mapi: Make _glapi_get_stub work with "gl" or "mgl" prefix.</li>
+  <li>glx: Don't hard-code the name "libGL.so.1" in driOpenDriver (v3)</li>
+</ul>
+
+<p>Leo Liu (1):</p>
+<ul>
+  <li>radeon/vce: fix vui time_scale zero error</li>
+</ul>
+
+<p>Marek Olšák (1):</p>
+<ul>
+  <li>st/mesa: fix front buffer regression after dropping st_validate_state in Blit</li>
+</ul>
+
+<p>Roland Scheidegger (1):</p>
+<ul>
+  <li>mesa: fix mipmap generation for immutable, compressed textures</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/11.0.0.html
+++ b/docs/relnotes/11.0.0.html
@@ -14,7 +14,7 @@
 <iframe src="../contents.html"></iframe>
 <div class="content">

-<h1>Mesa 11.0.0 Release Notes / TBD</h1>
+<h1>Mesa 11.0.0 Release Notes / September 12, 2015</h1>

 <p>
 Mesa 11.0.0 is a new development release.
@@ -33,7 +33,8 @@ because compatibility contexts are not supported.

 <h2>SHA256 checksums</h2>
 <pre>
-TBD.
+7d7e4ddffa3b162506efa01e2cc41e329caa4995336b92e5cc21f2e1fb36c1b3  mesa-11.0.0.tar.gz
+e095a3eb2eca9dfde7efca8946527c8ae20a0cc938a8c78debc7f158ad44af32  mesa-11.0.0.tar.xz
 </pre>


@@ -83,13 +84,175 @@ Note: some of the new features are only available with certain drivers.
 <li>EGL 1.5 on r600, radeonsi, nv50, nvc0</li>
 </ul>

+
 <h2>Bug fixes</h2>

-TBD.
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=51658">Bug 51658</a> - r200 (&amp; possibly radeon) DRI fixes for gnome shell on Mesa 8.0.3</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=65525">Bug 65525</a> - [llvmpipe] lp_scene.h:210:lp_scene_alloc: Assertion `size &lt;= (64 * 1024)' failed.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=66346">Bug 66346</a> - shader_query.cpp:49: error: invalid conversion from 'void*' to 'GLuint'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=73512">Bug 73512</a> - [clover] mesa.icd. should contain full path</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=73528">Bug 73528</a> - Deferred lighting in Second Life causes system hiccups and screen flickering</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=74329">Bug 74329</a> - Please expose OES_texture_float and OES_texture_half_float on the ES3 context</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=80500">Bug 80500</a> - Flickering shadows in unreleased title trace</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=82186">Bug 82186</a> - [r600g] BARTS GPU lockup with minecraft shaders</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=84225">Bug 84225</a> - Allow constant-index-expression sampler array indexing with GLSL-ES &lt; 300</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=84677">Bug 84677</a> - Triangle disappears with glPolygonMode GL_LINE</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=85252">Bug 85252</a> - Segfault in compiler while processing ternary operator with void arguments</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89131">Bug 89131</a> - [Bisected] Graphical corruption in Weston,  shows old framebuffer pieces</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90000">Bug 90000</a> - [i965 Bisected NIR] Piglit/gglean_fragprog1-z-write_test fail</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90073">Bug 90073</a> - Leaks in xcb_dri3_open_reply_fds() and get_render_node_from_id_path_tag</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90249">Bug 90249</a> - Fails to build egl_dri2 on osx</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90310">Bug 90310</a> - Fails to build gallium_dri.so at linking stage with clang because of multiple redefinitions</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90347">Bug 90347</a> - [NVE0+] Failure to insert texbar under some circumstances (causing bad colors in Terasology)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90466">Bug 90466</a> - arm: linker error ndefined reference to `nir_metadata_preserve'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90520">Bug 90520</a> - Register spilling clobbers registers used elsewhere in the shader</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90537">Bug 90537</a> - radeonsi bo/va conflict on RADEON_GEM_VA (rscreen-&gt;ws-&gt;buffer_from_handle returns NULL)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90547">Bug 90547</a> - [BDW/BSW/SKL Bisected]Piglit/glean&#64;vertprog1-rsq_test_2_(reciprocal_square_root_of_negative_value) fais</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90580">Bug 90580</a> - [HSW bisected] integer multiplication bug</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90600">Bug 90600</a> - IOError: [Errno 2] No such file or directory: 'gl_API.xml'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90621">Bug 90621</a> - Mesa fail to build from git</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90629">Bug 90629</a> - [i965] SIMD16 dual_source_blend assertion `src[i].file != GRF || src[i].width == dst.width' failed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90691">Bug 90691</a> - [BSW]Piglit/spec/nv_conditional_render/dlist fails intermittently</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90728">Bug 90728</a> - dvd playback with vlc and vdpau causes segmentation fault</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90734">Bug 90734</a> - glBufferSubData is corrupting data when buffer is &gt; 32k</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90748">Bug 90748</a> - [BDW Bisected]dEQP-GLES3.functional.fbo.completeness.renderable.texture.depth.rg_half_float_oes fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90749">Bug 90749</a> - [BDW Bisected]dEQP-GLES3.functional.rasterization.fbo.rbo_multisample_max.primitives.lines_wide fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90751">Bug 90751</a> - [BDW Bisected]dEQP-GLES3.functional.fbo.completeness.renderable.texture.stencil.stencil_index8 fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90797">Bug 90797</a> - [ALL bisected] Mesa change cause performance case manhattan fail.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90817">Bug 90817</a> - swrast fails to load with certain remote X servers</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90830">Bug 90830</a> - [bsw bisected regression] GPU hang for spec.arb_gpu_shader5.execution.sampler_array_indexing.vs-nonzero-base</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90839">Bug 90839</a> - [10.5.5/10.6 regression, bisected] PBO glDrawPixels no longer using blit fastpath</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90873">Bug 90873</a> - Kernel hang, TearFree On, Mate desktop environment</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90887">Bug 90887</a> - PhiMovesPass in register allocator broken</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90895">Bug 90895</a> - [IVB/HSW/BDW/BSW Bisected] GLB2.7 Egypt, GfxBench3.0 T-Rex &amp; ALU and many SynMark cases performance reduced by 10-23%</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90902">Bug 90902</a> - [bsw][regression] dEQP: &quot;Found invalid pixel values&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90903">Bug 90903</a> - egl_dri2.c:dri2_load fails to load libglapi on osx</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90904">Bug 90904</a> - OSX: EXC_BAD_ACCESS when using translate_sse + gallium + softpipe/llvmpipe</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90905">Bug 90905</a> - mesa: Finish subdir-objects transition</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90925">Bug 90925</a> - &quot;high fidelity&quot;: Segfault in _mesa_program_resource_find_name</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91022">Bug 91022</a> - [g45 g965 bisected] assertions generated from textureGrad cube samplers fix</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91047">Bug 91047</a> - [SNB Bisected] Messed up Fog in Super Smash Bros. Melee in Dolphin</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91056">Bug 91056</a> - The Bard's Tale (2005, native)  has rendering issues</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91077">Bug 91077</a> - dri2_glx.c:1186: undefined reference to `loader_open_device'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91099">Bug 91099</a> - [llvmpipe] piglit glsl-max-varyings &gt;max_varying_components regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91101">Bug 91101</a> - [softpipe] piglit glsl-1.50&#64;execution&#64;geometry&#64;max-input-components regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91117">Bug 91117</a> - Nimbus (running in wine) has rendering issues, objects are semi-transparent</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91124">Bug 91124</a> - Civilization V (in Wine) has rendering issues: text missing, menu bar corrupted</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91173">Bug 91173</a> - Oddworld: Stranger's Wrath HD: disfigured models in wrong colors</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91193">Bug 91193</a> - [290x] Dota2 reborn ingame rendering breaks with git-af4b9c7</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91222">Bug 91222</a> - lp_test_format regression on CentOS 7</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91226">Bug 91226</a> - Crash in glLinkProgram (NEW)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91231">Bug 91231</a> - [NV92] Psychonauts (native) segfaults on start when DRI3 enabled</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91254">Bug 91254</a> - (regresion) video using VA-API on Intel slow and freeze system with mesa 10.6 or 10.6.1</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91290">Bug 91290</a> - SIGSEGV glcpp/glcpp-parse.y:1077</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91292">Bug 91292</a> - [BDW+] glVertexAttribDivisor not working in combination with glPolygonMode</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91337">Bug 91337</a> - OSMesaGetProcAdress(&quot;OSMesaPixelStore&quot;) returns nil</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91418">Bug 91418</a> - Visual Studio 2015 vsnprintf build error</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91425">Bug 91425</a> - [regression, bisected] Piglit spec/ext_packed_float/ getteximage-invalid-format-for-packed-type fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91441">Bug 91441</a> - make check DispatchSanity_test.GL30 regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91444">Bug 91444</a> - regression bisected radeonsi: don't change pipe_resource in resource_copy_region</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91461">Bug 91461</a> - gl_TessLevel* writes have no effect for all but the last TCS invocation</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91513">Bug 91513</a> - [IVB/HSW/BDW/SKL Bisected] Lightsmark performance reduced by 7%-10%</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91526">Bug 91526</a> - World of Warcraft (on Wine) has UI corruption with nouveau</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91544">Bug 91544</a> - [i965, regression, bisected] regression of several tests in 93977d3a151675946c03e</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91551">Bug 91551</a> - DXTn compressed normal maps produce severe artifacts on all NV5x and NVDx chipsets</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91570">Bug 91570</a> - Upgrading mesa to 10.6 causes segfault in OpenGL applications with GeForce4 MX 440 / AGP 8X</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91591">Bug 91591</a> - rounding.h:102:2: error: #error &quot;Unsupported or undefined LONG_BIT&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91610">Bug 91610</a> - [BSW] GPU hang for spec.shaders.point-vertex-id gl_instanceid divisor</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91673">Bug 91673</a> - Segfault when calling glTexSubImage2D on storage texture to bound FBO</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91726">Bug 91726</a> - R600 asserts in tgsi_cmp/make_src_for_op3</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91847">Bug 91847</a> - glGenerateTextureMipmap not working (no errors) unless glActiveTexture(GL_TEXTURE1) is called before</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91857">Bug 91857</a> - Mesa 10.6.3 linker is slow</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91881">Bug 91881</a> - regression: GPU lockups since mesa-11.0.0_rc1 on RV620 (r600) driver</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91890">Bug 91890</a> - [nve7] witcher2: blurry image &amp; DATA_ERRORs (class 0xa097 mthd 0x2380/0x238c)</li>
+
+</ul>
+

 <h2>Changes</h2>

-TBD.
+<li>Removed the EGL loader from the Linux SCons build.</li>

 </div>
 </body>
--- a/docs/relnotes/11.0.1.html
+++ b/docs/relnotes/11.0.1.html
@@ -0,0 +1,134 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 11.0.1 Release Notes / September 26, 2015</h1>
+
+<p>
+Mesa 11.0.1 is a bug fix release which fixes bugs found since the 11.0.0 release.
+</p>
+<p>
+Mesa 11.0.1 implements the OpenGL 4.1 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.1.  OpenGL
+4.1 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+6dab262877e12c0546a0e2970c6835a0f217e6d4026ccecb3cd5dd733d1ce867  mesa-11.0.1.tar.gz
+43d0dfcd1f1e36f07f8228cd76d90175d3fc74c1ed25d7071794a100a98ef2a6  mesa-11.0.1.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=38109">Bug 38109</a> - i915 driver crashes if too few vertices are submitted (Mesa 7.10.2)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91114">Bug 91114</a> - ES3-CTS.gtf.GL3Tests.shadow.shadow_execution_vert fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91716">Bug 91716</a> - [bisected] piglit.shaders.glsl-vs-int-attrib regresses on 32 bit BYT, HSW, IVB, SNB</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91719">Bug 91719</a> - [SNB,HSW,BYT] dEQP regressions associated with using NIR for vertex shaders</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92009">Bug 92009</a> - ES3-CTS.gtf.GL3Tests.packed_pixels.packed_pixels fails</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Antia Puentes (2):</p>
+<ul>
+  <li>i965/vec4: Fix saturation errors when coalescing registers</li>
+  <li>i965/vec4_nir: Load constants as integers</li>
+</ul>
+
+<p>Anuj Phogat (1):</p>
+<ul>
+  <li>meta: Abort meta pbo path if TexSubImage need signed unsigned conversion</li>
+</ul>
+
+<p>Emil Velikov (2):</p>
+<ul>
+  <li>docs: add sha256 checksums for 11.0.0</li>
+  <li>Update version to 11.0.1</li>
+</ul>
+
+<p>Iago Toral Quiroga (1):</p>
+<ul>
+  <li>mesa: Fix GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE for default framebuffer.</li>
+</ul>
+
+<p>Ian Romanick (5):</p>
+<ul>
+  <li>t_dd_dmatmp: Make "count" actually be the count</li>
+  <li>t_dd_dmatmp: Clean up improper code formatting from previous patch</li>
+  <li>t_dd_dmatmp: Use '&amp; 3' instead of '% 4' everywhere</li>
+  <li>t_dd_dmatmp: Pull out common 'count -= count &amp; 3' code</li>
+  <li>t_dd_dmatmp: Use addition instead of subtraction in loop bounds</li>
+</ul>
+
+<p>Ilia Mirkin (6):</p>
+<ul>
+  <li>st/mesa: avoid integer overflows with buffers &gt;= 512MB</li>
+  <li>nv50, nvc0: fix max texture buffer size to 128M elements</li>
+  <li>freedreno/a3xx: fix blending of L8 format</li>
+  <li>nv50,nvc0: detect underlying resource changes and update tic</li>
+  <li>nv50,nvc0: flush texture cache in presence of coherent bufs</li>
+  <li>radeonsi: load fmask ptr relative to the resources array</li>
+</ul>
+
+<p>Jason Ekstrand (2):</p>
+<ul>
+  <li>nir: Fix a bunch of ralloc parenting errors</li>
+  <li>i965/vec4: Don't reswizzle hardware registers</li>
+</ul>
+
+<p>Jeremy Huddleston (1):</p>
+<ul>
+  <li>configure.ac: Add support to enable read-only text segment on x86.</li>
+</ul>
+
+<p>Ray Strode (1):</p>
+<ul>
+  <li>gbm: convert gbm bo format to fourcc format on dma-buf import</li>
+</ul>
+
+<p>Tapani Pälli (2):</p>
+<ul>
+  <li>mesa: fix errors when reading depth with glReadPixels</li>
+  <li>i965: fix textureGrad for cubemaps</li>
+</ul>
+
+<p>Ulrich Weigand (1):</p>
+<ul>
+  <li>mesa: Fix texture compression on big-endian systems</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/11.0.2.html
+++ b/docs/relnotes/11.0.2.html
@@ -0,0 +1,85 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 11.0.2 Release Notes / September 28, 2015</h1>
+
+<p>
+Mesa 11.0.2 is a bug fix release which fixes bugs found since the 11.0.1 release.
+</p>
+<p>
+Mesa 11.0.2 implements the OpenGL 4.1 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.1.  OpenGL
+4.1 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+45170773500d6ae2f9eb93fc85efee69f7c97084411ada4eddf92f78bca56d20  mesa-11.0.2.tar.gz
+fce11fb27eb87adf1e620a76455d635c6136dfa49ae58c53b34ef8d0c7b7eae4  mesa-11.0.2.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91582">Bug 91582</a> - [bisected] Regression in DEQP gles2.functional.negative_api.texture.texsubimage2d_neg_offset</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91970">Bug 91970</a> - [BSW regression] dEQP-GLES3.functional.shaders.precision.int.highp_mul_vertex</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92095">Bug 92095</a> - [Regression, bisected] arb_shader_atomic_counters.compiler.builtins.frag</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Eduardo Lima Mitev (3):</p>
+<ul>
+  <li>mesa: Fix order of format+type and internal format checks for glTexImageXD ops</li>
+  <li>mesa: Move _mesa_base_tex_format() from teximage to glformats files</li>
+  <li>mesa: Use the effective internal format instead for validation</li>
+</ul>
+
+<p>Emil Velikov (2):</p>
+<ul>
+  <li>docs: add sha256 checksums for 11.0.1</li>
+  <li>Update version to 11.0.2</li>
+</ul>
+
+<p>Kristian Høgsberg Kristensen (1):</p>
+<ul>
+  <li>i965: Respect stride and subreg_offset for ATTR registers</li>
+</ul>
+
+<p>Matt Turner (1):</p>
+<ul>
+  <li>glsl: Expose gl_MaxTess{Control,Evaluation}AtomicCounters.</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -0,0 +1,67 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 11.1.0 Release Notes / TBD</h1>
+
+<p>
+Mesa 11.1.0 is a new development release.
+People who are concerned with stability and reliability should stick
+with a previous release or wait for Mesa 11.1.1.
+</p>
+<p>
+Mesa 11.1.0 implements the OpenGL 4.1 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.1.  OpenGL
+4.1 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+TBD.
+</pre>
+
+
+<h2>New features</h2>
+
+<p>
+Note: some of the new features are only available with certain drivers.
+</p>
+
+<ul>
+<li>GL_ARB_blend_func_extended on freedreno (a3xx)</li>
+<li>GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba chips</li>
+<li>GL_ARB_shader_storage_buffer_object on i965</li>
+<li>GL_ARB_shader_texture_image_samples on i965, nv50, nvc0, r600, radeonsi</li>
+<li>GL_ARB_texture_barrier / GL_NV_texture_barrier on i965</li>
+<li>GL_ARB_texture_query_lod on softpipe</li>
+<li>EGL_KHR_create_context on softpipe, llvmpipe</li>
+<li>EGL_KHR_gl_colorspace on softpipe, llvmpipe</li>
+</ul>
+
+<h2>Bug fixes</h2>
+
+TBD.
+
+<h2>Changes</h2>
+
+TBD.
+
+</div>
+</body>
+</html>
--- a/docs/shading.html
+++ b/docs/shading.html
@@ -63,6 +63,20 @@ execution.  These are generally used for debugging.
 Example:  export MESA_GLSL=dump,nopt
 </p>

+<p>
+Shaders can be dumped and replaced on runtime for debugging purposes. Mesa 
+needs to be configured with '--with-sha1' to enable this functionality. This 
+feature is not currently supported by SCons build.
+
+This is controlled via following environment variables:
+<ul>
+<li><b>MESA_SHADER_DUMP_PATH</b> - path where shader sources are dumped
+<li><b>MESA_SHADER_READ_PATH</b> - path where replacement shaders are read
+</ul>
+Note, path set must exist before running for dumping or replacing to work. 
+When both are set, these paths should be different so the dumped shaders do 
+not clobber the replacement shaders.
+</p>

 <h2 id="support">GLSL Version</h2>

--- a/docs/vmware-guest.html
+++ b/docs/vmware-guest.html
@@ -26,6 +26,31 @@ VMware Workstation running on Linux or Windows and VMware Fusion running on
 MacOS are all supported.
 </p>

+<p>
+With the August 2015 Workstation 12 / Fusion 8 releases, OpenGL 3.3
+is supported in the guest.
+This requires:
+<ul>
+<li>The VM is configured for virtual hardware version 12.
+<li>The host OS, GPU and graphics driver supports DX11 (Windows) or
+    OpenGL 4.0 (Linux, Mac)
+<li>On Linux, the vmwgfx kernel module must be version 2.9.0 or later.
+<li>A recent version of Mesa with the updated svga gallium driver.
+</ul>
+</p>
+
+<p>
+Otherwise, OpenGL 2.1 is supported.
+</p>
+
+<p>
+OpenGL 3.3 support can be disabled by setting the environment variable
+SVGA_VGPU10=0.
+You will then have OpenGL 2.1 support.
+This may be useful to work around application bugs (such as incorrect use
+of the OpenGL 3.x core profile).
+</p>
+
 <p>
 Most modern Linux distros include the SVGA3D driver so end users shouldn't
 be concerned with this information.
@@ -227,6 +252,16 @@ If you don't see this, try setting this environment variable:
 then rerun glxinfo and examine the output for error messages.
 </p>

+<p>
+If OpenGL 3.3 is not working (you only get OpenGL 2.1):
+</p>
+<ul>
+<li>Make sure the VM uses hardware version 12.
+<li>Make sure the vmwgfx kernel module is version 2.9.0 or later.
+<li>Check the vmware.log file for errors.
+<li>Run 'dmesg | grep vmwgfx' and look for "DX: yes".
+
+
 </div>
 </body>
 </html>
--- a/include/c11/threads_posix.h
+++ b/include/c11/threads_posix.h
@@ -102,9 +102,8 @@ call_once(once_flag *flag, void (*func)(void))
 static inline int
 cnd_broadcast(cnd_t *cond)
 {
-    if (!cond) return thrd_error;
-    pthread_cond_broadcast(cond);
-    return thrd_success;
+    assert(cond != NULL);
+    return (pthread_cond_broadcast(cond) == 0) ? thrd_success : thrd_error;
 }

 // 7.25.3.2
@@ -119,18 +118,16 @@ cnd_destroy(cnd_t *cond)
 static inline int
 cnd_init(cnd_t *cond)
 {
-    if (!cond) return thrd_error;
-    pthread_cond_init(cond, NULL);
-    return thrd_success;
+    assert(cond != NULL);
+    return (pthread_cond_init(cond, NULL) == 0) ? thrd_success : thrd_error;
 }

 // 7.25.3.4
 static inline int
 cnd_signal(cnd_t *cond)
 {
-    if (!cond) return thrd_error;
-    pthread_cond_signal(cond);
-    return thrd_success;
+    assert(cond != NULL);
+    return (pthread_cond_signal(cond) == 0) ? thrd_success : thrd_error;
 }

 // 7.25.3.5
@@ -139,7 +136,14 @@ cnd_timedwait(cnd_t *cond, mtx_t *mtx, const xtime *xt)
 {
    struct timespec abs_time;
    int rt;
-    if (!cond || !mtx || !xt) return thrd_error;
+
+    assert(mtx != NULL);
+    assert(cond != NULL);
+    assert(xt != NULL);
+
+    abs_time.tv_sec = xt->sec;
+    abs_time.tv_nsec = xt->nsec;
+
    rt = pthread_cond_timedwait(cond, mtx, &abs_time);
    if (rt == ETIMEDOUT)
        return thrd_busy;
@@ -150,9 +154,9 @@ cnd_timedwait(cnd_t *cond, mtx_t *mtx, const xtime *xt)
 static inline int
 cnd_wait(cnd_t *cond, mtx_t *mtx)
 {
-    if (!cond || !mtx) return thrd_error;
-    pthread_cond_wait(cond, mtx);
-    return thrd_success;
+    assert(mtx != NULL);
+    assert(cond != NULL);
+    return (pthread_cond_wait(cond, mtx) == 0) ? thrd_success : thrd_error;
 }


@@ -161,7 +165,7 @@ cnd_wait(cnd_t *cond, mtx_t *mtx)
 static inline void
 mtx_destroy(mtx_t *mtx)
 {
-    assert(mtx);
+    assert(mtx != NULL);
    pthread_mutex_destroy(mtx);
 }

@@ -170,7 +174,7 @@ static inline int
 mtx_init(mtx_t *mtx, int type)
 {
    pthread_mutexattr_t attr;
-    if (!mtx) return thrd_error;
+    assert(mtx != NULL);
    if (type != mtx_plain && type != mtx_timed && type != mtx_try
      && type != (mtx_plain|mtx_recursive)
      && type != (mtx_timed|mtx_recursive)
@@ -188,9 +192,8 @@ mtx_init(mtx_t *mtx, int type)
 static inline int
 mtx_lock(mtx_t *mtx)
 {
-    if (!mtx) return thrd_error;
-    pthread_mutex_lock(mtx);
-    return thrd_success;
+    assert(mtx != NULL);
+    return (pthread_mutex_lock(mtx) == 0) ? thrd_success : thrd_error;
 }

 static inline int
@@ -203,7 +206,9 @@ thrd_yield(void);
 static inline int
 mtx_timedlock(mtx_t *mtx, const xtime *xt)
 {
-    if (!mtx || !xt) return thrd_error;
+    assert(mtx != NULL);
+    assert(xt != NULL);
+
    {
 #ifdef EMULATED_THREADS_USE_NATIVE_TIMEDLOCK
    struct timespec ts;
@@ -233,7 +238,7 @@ mtx_timedlock(mtx_t *mtx, const xtime *xt)
 static inline int
 mtx_trylock(mtx_t *mtx)
 {
-    if (!mtx) return thrd_error;
+    assert(mtx != NULL);
    return (pthread_mutex_trylock(mtx) == 0) ? thrd_success : thrd_busy;
 }

@@ -241,9 +246,8 @@ mtx_trylock(mtx_t *mtx)
 static inline int
 mtx_unlock(mtx_t *mtx)
 {
-    if (!mtx) return thrd_error;
-    pthread_mutex_unlock(mtx);
-    return thrd_success;
+    assert(mtx != NULL);
+    return (pthread_mutex_unlock(mtx) == 0) ? thrd_success : thrd_error;
 }


@@ -253,7 +257,7 @@ static inline int
 thrd_create(thrd_t *thr, thrd_start_t func, void *arg)
 {
    struct impl_thrd_param *pack;
-    if (!thr) return thrd_error;
+    assert(thr != NULL);
    pack = (struct impl_thrd_param *)malloc(sizeof(struct impl_thrd_param));
    if (!pack) return thrd_nomem;
    pack->func = func;
@@ -329,7 +333,7 @@ thrd_yield(void)
 static inline int
 tss_create(tss_t *key, tss_dtor_t dtor)
 {
-    if (!key) return thrd_error;
+    assert(key != NULL);
    return (pthread_key_create(key, dtor) == 0) ? thrd_success : thrd_error;
 }

--- a/src/egl/SConscript
+++ b/src/egl/SConscript
@@ -8,6 +8,7 @@ env = env.Clone()

 env.Append(CPPPATH = [
    '#/include',
+    '#/include/HaikuGL',
    '#/src/egl/main',
    '#/src',
 ])
@@ -15,7 +16,6 @@ env.Append(CPPPATH = [

 # parse Makefile.sources
 egl_sources = env.ParseSourceList('Makefile.sources', 'LIBEGL_C_FILES')
-egl_sources.append(env.ParseSourceList('Makefile.sources', 'dri2_backend_core_FILES'))

 env.Append(CPPDEFINES = [
    '_EGL_NATIVE_PLATFORM=_EGL_PLATFORM_HAIKU',
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -27,6 +27,7 @@

 #define WL_HIDE_DEPRECATED

+#include <stdbool.h>
 #include <stdint.h>
 #include <stdbool.h>
 #include <stdlib.h>
@@ -130,12 +131,10 @@ const __DRIconfig *
 dri2_get_dri_config(struct dri2_egl_config *conf, EGLint surface_type,
                    EGLenum colorspace)
 {
-   if (colorspace == EGL_GL_COLORSPACE_SRGB_KHR)
-      return surface_type == EGL_WINDOW_BIT ? conf->dri_srgb_double_config :
-                                              conf->dri_srgb_single_config;
-   else
-      return surface_type == EGL_WINDOW_BIT ? conf->dri_double_config :
-                                              conf->dri_single_config;
+   const bool srgb = colorspace == EGL_GL_COLORSPACE_SRGB_KHR;
+
+   return surface_type == EGL_WINDOW_BIT ? conf->dri_double_config[srgb] :
+                                           conf->dri_single_config[srgb];
 }

 static EGLBoolean
@@ -283,14 +282,10 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,
   if (num_configs == 1) {
      conf = (struct dri2_egl_config *) matching_config;

-      if (double_buffer && srgb && !conf->dri_srgb_double_config)
-         conf->dri_srgb_double_config = dri_config;
-      else if (double_buffer && !srgb && !conf->dri_double_config)
-         conf->dri_double_config = dri_config;
-      else if (!double_buffer && srgb && !conf->dri_srgb_single_config)
-         conf->dri_srgb_single_config = dri_config;
-      else if (!double_buffer && !srgb && !conf->dri_single_config)
-         conf->dri_single_config = dri_config;
+      if (double_buffer && !conf->dri_double_config[srgb])
+         conf->dri_double_config[srgb] = dri_config;
+      else if (!double_buffer && !conf->dri_single_config[srgb])
+         conf->dri_single_config[srgb] = dri_config;
      else
         /* a similar config type is already added (unlikely) => discard */
         return NULL;
@@ -300,18 +295,13 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,
      if (conf == NULL)
         return NULL;

+      if (double_buffer)
+         conf->dri_double_config[srgb] = dri_config;
+      else
+         conf->dri_single_config[srgb] = dri_config;
+
      memcpy(&conf->base, &base, sizeof base);
-      if (double_buffer) {
-         if (srgb)
-            conf->dri_srgb_double_config = dri_config;
-         else
-            conf->dri_double_config = dri_config;
-      } else {
-         if (srgb)
-            conf->dri_srgb_single_config = dri_config;
-         else
-            conf->dri_single_config = dri_config;
-      }
+      conf->base.SurfaceType = 0;
      conf->base.ConfigID = config_id;

      _eglLinkConfig(&conf->base);
@@ -588,7 +578,8 @@ dri2_setup_screen(_EGLDisplay *disp)
                                   __DRI2_RENDERER_HAS_FRAMEBUFFER_SRGB))
      disp->Extensions.KHR_gl_colorspace = EGL_TRUE;

-   if (dri2_dpy->dri2 && dri2_dpy->dri2->base.version >= 3) {
+   if ((dri2_dpy->dri2 && dri2_dpy->dri2->base.version >= 3) ||
+       (dri2_dpy->swrast && dri2_dpy->swrast->base.version >= 3)) {
      disp->Extensions.KHR_create_context = EGL_TRUE;

      if (dri2_dpy->robustness)
@@ -784,7 +775,7 @@ dri2_terminate(_EGLDriver *drv, _EGLDisplay *disp)

   if (dri2_dpy->own_dri_screen)
      dri2_dpy->core->destroyScreen(dri2_dpy->dri_screen);
-   if (dri2_dpy->fd)
+   if (dri2_dpy->fd >= 0)
      close(dri2_dpy->fd);
   if (dri2_dpy->driver)
      dlclose(dri2_dpy->driver);
@@ -902,6 +893,55 @@ dri2_create_context_attribs_error(int dri_error)
   _eglError(egl_error, "dri2_create_context");
 }

+static bool
+dri2_fill_context_attribs(struct dri2_egl_context *dri2_ctx,
+                          struct dri2_egl_display *dri2_dpy,
+                          uint32_t *ctx_attribs,
+                          unsigned *num_attribs)
+{
+   int pos = 0;
+
+   assert(*num_attribs >= 8);
+
+   ctx_attribs[pos++] = __DRI_CTX_ATTRIB_MAJOR_VERSION;
+   ctx_attribs[pos++] = dri2_ctx->base.ClientMajorVersion;
+   ctx_attribs[pos++] = __DRI_CTX_ATTRIB_MINOR_VERSION;
+   ctx_attribs[pos++] = dri2_ctx->base.ClientMinorVersion;
+
+   if (dri2_ctx->base.Flags != 0) {
+      /* If the implementation doesn't support the __DRI2_ROBUSTNESS
+       * extension, don't even try to send it the robust-access flag.
+       * It may explode.  Instead, generate the required EGL error here.
+       */
+      if ((dri2_ctx->base.Flags & EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR) != 0
+            && !dri2_dpy->robustness) {
+         _eglError(EGL_BAD_MATCH, "eglCreateContext");
+         return false;
+      }
+
+      ctx_attribs[pos++] = __DRI_CTX_ATTRIB_FLAGS;
+      ctx_attribs[pos++] = dri2_ctx->base.Flags;
+   }
+
+   if (dri2_ctx->base.ResetNotificationStrategy != EGL_NO_RESET_NOTIFICATION_KHR) {
+      /* If the implementation doesn't support the __DRI2_ROBUSTNESS
+       * extension, don't even try to send it a reset strategy.  It may
+       * explode.  Instead, generate the required EGL error here.
+       */
+      if (!dri2_dpy->robustness) {
+         _eglError(EGL_BAD_CONFIG, "eglCreateContext");
+         return false;
+      }
+
+      ctx_attribs[pos++] = __DRI_CTX_ATTRIB_RESET_STRATEGY;
+      ctx_attribs[pos++] = __DRI_CTX_RESET_LOSE_CONTEXT;
+   }
+
+   *num_attribs = pos;
+
+   return true;
+}
+
 /**
 * Called via eglCreateContext(), drv->API.CreateContext().
 */
@@ -970,10 +1010,10 @@ dri2_create_context(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf,
       * doubleBufferMode check in
       * src/mesa/main/context.c:check_compatible()
       */
-      if (dri2_config->dri_double_config)
-         dri_config = dri2_config->dri_double_config;
+      if (dri2_config->dri_double_config[0])
+         dri_config = dri2_config->dri_double_config[0];
      else
-         dri_config = dri2_config->dri_single_config;
+         dri_config = dri2_config->dri_single_config[0];

      /* EGL_WINDOW_BIT is set only when there is a dri_double_config.  This
       * makes sure the back buffer will always be used.
@@ -987,44 +1027,12 @@ dri2_create_context(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf,
   if (dri2_dpy->dri2) {
      if (dri2_dpy->dri2->base.version >= 3) {
         unsigned error;
-         unsigned num_attribs = 0;
+         unsigned num_attribs = 8;
         uint32_t ctx_attribs[8];

-         ctx_attribs[num_attribs++] = __DRI_CTX_ATTRIB_MAJOR_VERSION;
-         ctx_attribs[num_attribs++] = dri2_ctx->base.ClientMajorVersion;
-         ctx_attribs[num_attribs++] = __DRI_CTX_ATTRIB_MINOR_VERSION;
-         ctx_attribs[num_attribs++] = dri2_ctx->base.ClientMinorVersion;
-
-         if (dri2_ctx->base.Flags != 0) {
-            /* If the implementation doesn't support the __DRI2_ROBUSTNESS
-             * extension, don't even try to send it the robust-access flag.
-             * It may explode.  Instead, generate the required EGL error here.
-             */
-            if ((dri2_ctx->base.Flags & EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR) != 0
-                && !dri2_dpy->robustness) {
-               _eglError(EGL_BAD_MATCH, "eglCreateContext");
-               goto cleanup;
-            }
-
-            ctx_attribs[num_attribs++] = __DRI_CTX_ATTRIB_FLAGS;
-            ctx_attribs[num_attribs++] = dri2_ctx->base.Flags;
-         }
-
-         if (dri2_ctx->base.ResetNotificationStrategy != EGL_NO_RESET_NOTIFICATION_KHR) {
-            /* If the implementation doesn't support the __DRI2_ROBUSTNESS
-             * extension, don't even try to send it a reset strategy.  It may
-             * explode.  Instead, generate the required EGL error here.
-             */
-            if (!dri2_dpy->robustness) {
-               _eglError(EGL_BAD_CONFIG, "eglCreateContext");
-               goto cleanup;
-            }
-
-            ctx_attribs[num_attribs++] = __DRI_CTX_ATTRIB_RESET_STRATEGY;
-            ctx_attribs[num_attribs++] = __DRI_CTX_RESET_LOSE_CONTEXT;
-         }
-
-         assert(num_attribs <= ARRAY_SIZE(ctx_attribs));
+         if (!dri2_fill_context_attribs(dri2_ctx, dri2_dpy, ctx_attribs,
+                                        &num_attribs))
+            goto cleanup;

 	 dri2_ctx->dri_context =
 	    dri2_dpy->dri2->createContextAttribs(dri2_dpy->dri_screen,
@@ -1046,12 +1054,33 @@ dri2_create_context(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf,
      }
   } else {
      assert(dri2_dpy->swrast);
-      dri2_ctx->dri_context =
-         dri2_dpy->swrast->createNewContextForAPI(dri2_dpy->dri_screen,
-                                                  api,
-                                                  dri_config,
-                                                  shared,
-                                                  dri2_ctx);
+      if (dri2_dpy->swrast->base.version >= 3) {
+         unsigned error;
+         unsigned num_attribs = 8;
+         uint32_t ctx_attribs[8];
+
+         if (!dri2_fill_context_attribs(dri2_ctx, dri2_dpy, ctx_attribs,
+                                        &num_attribs))
+            goto cleanup;
+
+         dri2_ctx->dri_context =
+            dri2_dpy->swrast->createContextAttribs(dri2_dpy->dri_screen,
+                                                   api,
+                                                   dri_config,
+                                                   shared,
+                                                   num_attribs / 2,
+                                                   ctx_attribs,
+                                                   & error,
+                                                   dri2_ctx);
+         dri2_create_context_attribs_error(error);
+      } else {
+         dri2_ctx->dri_context =
+            dri2_dpy->swrast->createNewContextForAPI(dri2_dpy->dri_screen,
+                                                     api,
+                                                     dri_config,
+                                                     shared,
+                                                     dri2_ctx);
+      }
   }

   if (!dri2_ctx->dri_context)
@@ -2384,13 +2413,18 @@ dri2_client_wait_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync,
   unsigned wait_flags = 0;
   EGLint ret = EGL_CONDITION_SATISFIED_KHR;

-   if (flags & EGL_SYNC_FLUSH_COMMANDS_BIT_KHR)
+   /* The EGL_KHR_fence_sync spec states:
+    *
+    *    "If no context is current for the bound API,
+    *     the EGL_SYNC_FLUSH_COMMANDS_BIT_KHR bit is ignored.
+    */
+   if (dri2_ctx && flags & EGL_SYNC_FLUSH_COMMANDS_BIT_KHR)
      wait_flags |= __DRI2_FENCE_FLAG_FLUSH_COMMANDS;

   /* the sync object should take a reference while waiting */
   dri2_egl_ref_sync(dri2_sync);

-   if (dri2_dpy->fence->client_wait_sync(dri2_ctx->dri_context,
+   if (dri2_dpy->fence->client_wait_sync(dri2_ctx ? dri2_ctx->dri_context : NULL,
                                         dri2_sync->fence, wait_flags,
                                         timeout))
      dri2_sync->base.SyncStatus = EGL_SIGNALED_KHR;
--- a/src/egl/drivers/dri2/egl_dri2.h
+++ b/src/egl/drivers/dri2/egl_dri2.h
@@ -284,10 +284,8 @@ struct dri2_egl_surface
 struct dri2_egl_config
 {
   _EGLConfig         base;
-   const __DRIconfig *dri_single_config;
-   const __DRIconfig *dri_double_config;
-   const __DRIconfig *dri_srgb_single_config;
-   const __DRIconfig *dri_srgb_double_config;
+   const __DRIconfig *dri_single_config[2];
+   const __DRIconfig *dri_double_config[2];
 };

 struct dri2_egl_image
--- a/src/egl/drivers/dri2/platform_drm.c
+++ b/src/egl/drivers/dri2/platform_drm.c
@@ -101,6 +101,7 @@ dri2_drm_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
   struct dri2_egl_surface *dri2_surf;
   struct gbm_surface *window = native_window;
   struct gbm_dri_surface *surf;
+   const __DRIconfig *config;

   (void) drv;

@@ -130,21 +131,20 @@ dri2_drm_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
      goto cleanup_surf;
   }

-   if (dri2_dpy->dri2) {
-      const __DRIconfig *config =
-         dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT,
-                             dri2_surf->base.GLColorspace);
+   config = dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT,
+                                dri2_surf->base.GLColorspace);

+   if (dri2_dpy->dri2) {
      dri2_surf->dri_drawable =
         (*dri2_dpy->dri2->createNewDrawable)(dri2_dpy->dri_screen, config,
                                              dri2_surf->gbm_surf);

   } else {
      assert(dri2_dpy->swrast != NULL);
+
      dri2_surf->dri_drawable =
-         (*dri2_dpy->swrast->createNewDrawable) (dri2_dpy->dri_screen,
-                                                 dri2_conf->dri_double_config,
-                                                 dri2_surf->gbm_surf);
+         (*dri2_dpy->swrast->createNewDrawable)(dri2_dpy->dri_screen, config,
+                                                dri2_surf->gbm_surf);

   }
   if (dri2_surf->dri_drawable == NULL) {
@@ -623,27 +623,19 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp)
      dri2_dpy->own_device = 1;
      gbm = gbm_create_device(fd);
      if (gbm == NULL)
-         return EGL_FALSE;
+         goto cleanup;
+   } else {
+      fd = fcntl(gbm_device_get_fd(gbm), F_DUPFD_CLOEXEC, 3);
+      if (fd < 0)
+         goto cleanup;
   }

-   if (strcmp(gbm_device_get_backend_name(gbm), "drm") != 0) {
-      free(dri2_dpy);
-      return EGL_FALSE;
-   }
+   if (strcmp(gbm_device_get_backend_name(gbm), "drm") != 0)
+      goto cleanup;

   dri2_dpy->gbm_dri = gbm_dri_device(gbm);
-   if (dri2_dpy->gbm_dri->base.type != GBM_DRM_DRIVER_TYPE_DRI) {
-      free(dri2_dpy);
-      return EGL_FALSE;
-   }
-
-   if (fd < 0) {
-      fd = fcntl(gbm_device_get_fd(gbm), F_DUPFD_CLOEXEC, 3);
-      if (fd < 0) {
-         free(dri2_dpy);
-         return EGL_FALSE;
-      }
-   }
+   if (dri2_dpy->gbm_dri->base.type != GBM_DRM_DRIVER_TYPE_DRI)
+      goto cleanup;

   dri2_dpy->fd = fd;
   dri2_dpy->device_name = loader_get_device_name_for_fd(dri2_dpy->fd);
@@ -727,4 +719,11 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp)
   dri2_dpy->vtbl = &dri2_drm_display_vtbl;

   return EGL_TRUE;
+
+cleanup:
+   if (fd >= 0)
+      close(fd);
+
+   free(dri2_dpy);
+   return EGL_FALSE;
 }
--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -1645,6 +1645,7 @@ dri2_wl_swrast_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,
   struct dri2_egl_config *dri2_conf = dri2_egl_config(conf);
   struct wl_egl_window *window = native_window;
   struct dri2_egl_surface *dri2_surf;
+   const __DRIconfig *config;

   (void) drv;

@@ -1669,10 +1670,12 @@ dri2_wl_swrast_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,
   dri2_surf->base.Width = -1;
   dri2_surf->base.Height = -1;

+   config = dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT,
+                                dri2_surf->base.GLColorspace);
+
   dri2_surf->dri_drawable =
-      (*dri2_dpy->swrast->createNewDrawable) (dri2_dpy->dri_screen,
-                                              dri2_conf->dri_double_config,
-                                              dri2_surf);
+      (*dri2_dpy->swrast->createNewDrawable)(dri2_dpy->dri_screen,
+                                             config, dri2_surf);
   if (dri2_surf->dri_drawable == NULL) {
      _eglError(EGL_BAD_ALLOC, "swrast->createNewDrawable");
      goto cleanup_dri_drawable;
@@ -1804,6 +1807,7 @@ dri2_initialize_wayland_swrast(_EGLDriver *drv, _EGLDisplay *disp)
   if (roundtrip(dri2_dpy) < 0 || dri2_dpy->formats == 0)
      goto cleanup_shm;

+   dri2_dpy->fd = -1;
   dri2_dpy->driver_name = strdup("swrast");
   if (!dri2_load_driver_swrast(disp))
      goto cleanup_shm;
--- a/src/egl/drivers/dri2/platform_x11.c
+++ b/src/egl/drivers/dri2/platform_x11.c
@@ -206,6 +206,7 @@ dri2_x11_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
   xcb_generic_error_t *error;
   xcb_drawable_t drawable;
   xcb_screen_t *screen;
+   const __DRIconfig *config;

   STATIC_ASSERT(sizeof(uintptr_t) == sizeof(native_surface));
   drawable = (uintptr_t) native_surface;
@@ -245,19 +246,18 @@ dri2_x11_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
      dri2_surf->drawable = drawable;
   }

-   if (dri2_dpy->dri2) {
-      const __DRIconfig *config =
-         dri2_get_dri_config(dri2_conf, type, dri2_surf->base.GLColorspace);
+   config = dri2_get_dri_config(dri2_conf, type,
+                                dri2_surf->base.GLColorspace);

+   if (dri2_dpy->dri2) {
      dri2_surf->dri_drawable =
 	 (*dri2_dpy->dri2->createNewDrawable)(dri2_dpy->dri_screen, config,
 					      dri2_surf);
   } else {
      assert(dri2_dpy->swrast);
      dri2_surf->dri_drawable = 
-	 (*dri2_dpy->swrast->createNewDrawable) (dri2_dpy->dri_screen,
-						 dri2_conf->dri_double_config,
-						 dri2_surf);
+         (*dri2_dpy->swrast->createNewDrawable)(dri2_dpy->dri_screen, config,
+                                                dri2_surf);
   }

   if (dri2_surf->dri_drawable == NULL) {
@@ -1161,6 +1161,7 @@ dri2_initialize_x11_swrast(_EGLDriver *drv, _EGLDisplay *disp)
    * Every hardware driver_name is set using strdup. Doing the same in
    * here will allow is to simply free the memory at dri2_terminate().
    */
+   dri2_dpy->fd = -1;
   dri2_dpy->driver_name = strdup("swrast");
   if (!dri2_load_driver_swrast(disp))
      goto cleanup_conn;
--- a/src/egl/main/eglcontext.c
+++ b/src/egl/main/eglcontext.c
@@ -152,12 +152,51 @@ _eglParseContextAttribList(_EGLContext *ctx, _EGLDisplay *dpy,

         /* The EGL_KHR_create_context spec says:
          *
-          *     "Flags are only defined for OpenGL context creation, and
-          *     specifying a flags value other than zero for other types of
-          *     contexts, including OpenGL ES contexts, will generate an
-          *     error."
+          *     "If the EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR flag bit is set in
+          *     EGL_CONTEXT_FLAGS_KHR, then a <debug context> will be created.
+          *     [...]
+          *     In some cases a debug context may be identical to a non-debug
+          *     context. This bit is supported for OpenGL and OpenGL ES
+          *     contexts."
          */
-         if (api != EGL_OPENGL_API && val != 0) {
+         if ((val & EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR) &&
+             (api != EGL_OPENGL_API && api != EGL_OPENGL_ES_API)) {
+            err = EGL_BAD_ATTRIBUTE;
+            break;
+         }
+
+         /* The EGL_KHR_create_context spec says:
+          *
+          *     "If the EGL_CONTEXT_OPENGL_FORWARD_COMPATIBLE_BIT_KHR flag bit
+          *     is set in EGL_CONTEXT_FLAGS_KHR, then a <forward-compatible>
+          *     context will be created. Forward-compatible contexts are
+          *     defined only for OpenGL versions 3.0 and later. They must not
+          *     support functionality marked as <deprecated> by that version of
+          *     the API, while a non-forward-compatible context must support
+          *     all functionality in that version, deprecated or not. This bit
+          *     is supported for OpenGL contexts, and requesting a
+          *     forward-compatible context for OpenGL versions less than 3.0
+          *     will generate an error."
+          */
+         if ((val & EGL_CONTEXT_OPENGL_FORWARD_COMPATIBLE_BIT_KHR) &&
+             (api != EGL_OPENGL_API || ctx->ClientMajorVersion < 3)) {
+            err = EGL_BAD_ATTRIBUTE;
+            break;
+         }
+
+         /* The EGL_KHR_create_context_spec says:
+          *
+          *     "If the EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR bit is set in
+          *     EGL_CONTEXT_FLAGS_KHR, then a context supporting <robust buffer
+          *     access> will be created. Robust buffer access is defined in the
+          *     GL_ARB_robustness extension specification, and the resulting
+          *     context must also support either the GL_ARB_robustness
+          *     extension, or a version of OpenGL incorporating equivalent
+          *     functionality. This bit is supported for OpenGL contexts.
+          */
+         if ((val & EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR) &&
+             (api != EGL_OPENGL_API ||
+              !dpy->Extensions.EXT_create_context_robustness)) {
            err = EGL_BAD_ATTRIBUTE;
            break;
         }
--- a/src/egl/wayland/wayland-drm/wayland-drm.c
+++ b/src/egl/wayland/wayland-drm/wayland-drm.c
@@ -197,7 +197,7 @@ drm_authenticate(struct wl_client *client,
 		wl_resource_post_event(resource, WL_DRM_AUTHENTICATED);
 }

-const static struct wl_drm_interface drm_interface = {
+static const struct wl_drm_interface drm_interface = {
 	drm_authenticate,
 	drm_create_buffer,
        drm_create_planar_buffer,
--- a/src/egl/wayland/wayland-egl/wayland-egl.c
+++ b/src/egl/wayland/wayland-egl/wayland-egl.c
@@ -1,3 +1,32 @@
+/*
+ * Copyright © 2011 Kristian Høgsberg
+ * Copyright © 2011 Benjamin Franzke
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Kristian Høgsberg <krh@bitplanet.net>
+ *    Benjamin Franzke <benjaminfranzke@googlemail.com>
+ */
+
 #include <stdlib.h>

 #include <wayland-client.h>
--- a/src/gallium/Makefile.am
+++ b/src/gallium/Makefile.am
@@ -11,6 +11,7 @@ SUBDIRS += auxiliary
 ##

 SUBDIRS += \
+	drivers/ddebug \
 	drivers/noop \
 	drivers/trace \
 	drivers/rbug
--- a/src/gallium/auxiliary/Makefile.am
+++ b/src/gallium/auxiliary/Makefile.am
@@ -38,18 +38,23 @@ libgallium_la_SOURCES += \

 endif

-indices/u_indices_gen.c: $(srcdir)/indices/u_indices_gen.py
-	$(AM_V_at)$(MKDIR_P) indices
-	$(AM_V_GEN) $(PYTHON2) $< > $@
+MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
+PYTHON_GEN =  $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)

-indices/u_unfilled_gen.c: $(srcdir)/indices/u_unfilled_gen.py
-	$(AM_V_at)$(MKDIR_P) indices
-	$(AM_V_GEN) $(PYTHON2) $< > $@
+indices/u_indices_gen.c: indices/u_indices_gen.py
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/indices/u_indices_gen.py > $@

-util/u_format_table.c: $(srcdir)/util/u_format_table.py $(srcdir)/util/u_format_pack.py $(srcdir)/util/u_format_parse.py $(srcdir)/util/u_format.csv
-	$(AM_V_at)$(MKDIR_P) util
-	$(AM_V_GEN) $(PYTHON2) $(srcdir)/util/u_format_table.py $(srcdir)/util/u_format.csv > $@
+indices/u_unfilled_gen.c: indices/u_unfilled_gen.py
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/indices/u_unfilled_gen.py > $@

+util/u_format_table.c: util/u_format_table.py \
+                       util/u_format_pack.py \
+                       util/u_format_parse.py \
+                       util/u_format.csv
+	$(MKDIR_GEN)
+	$(PYTHON_GEN) $(srcdir)/util/u_format_table.py $(srcdir)/util/u_format.csv > $@

 noinst_LTLIBRARIES += libgalliumvl_stub.la
 libgalliumvl_stub_la_SOURCES = \
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -129,12 +129,16 @@ C_SOURCES := \
 	rtasm/rtasm_execmem.h \
 	rtasm/rtasm_x86sse.c \
 	rtasm/rtasm_x86sse.h \
+	tgsi/tgsi_aa_point.c \
+	tgsi/tgsi_aa_point.h \
 	tgsi/tgsi_build.c \
 	tgsi/tgsi_build.h \
 	tgsi/tgsi_dump.c \
 	tgsi/tgsi_dump.h \
 	tgsi/tgsi_exec.c \
 	tgsi/tgsi_exec.h \
+	tgsi/tgsi_emulate.c \
+	tgsi/tgsi_emulate.h \
 	tgsi/tgsi_info.c \
 	tgsi/tgsi_info.h \
 	tgsi/tgsi_iterate.c \
@@ -144,6 +148,8 @@ C_SOURCES := \
 	tgsi/tgsi_opcode_tmp.h \
 	tgsi/tgsi_parse.c \
 	tgsi/tgsi_parse.h \
+	tgsi/tgsi_point_sprite.c \
+	tgsi/tgsi_point_sprite.h \
 	tgsi/tgsi_sanity.c \
 	tgsi/tgsi_sanity.h \
 	tgsi/tgsi_scan.c \
@@ -154,6 +160,8 @@ C_SOURCES := \
 	tgsi/tgsi_text.h \
 	tgsi/tgsi_transform.c \
 	tgsi/tgsi_transform.h \
+	tgsi/tgsi_two_side.c \
+	tgsi/tgsi_two_side.h \
 	tgsi/tgsi_ureg.c \
 	tgsi/tgsi_ureg.h \
 	tgsi/tgsi_util.c \
@@ -260,6 +268,8 @@ C_SOURCES := \
 	util/u_pack_color.h \
 	util/u_pointer.h \
 	util/u_prim.h \
+	util/u_prim_restart.c \
+	util/u_prim_restart.h \
 	util/u_pstipple.c \
 	util/u_pstipple.h \
 	util/u_range.h \
--- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
@@ -240,7 +240,8 @@ aa_transform_prolog(struct tgsi_transform_context *ctx)
                               TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_W);

   /* KILL_IF -tmp0.yyyy;   # if -tmp0.y < 0, KILL */
-   tgsi_transform_kill_inst(ctx, TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y);
+   tgsi_transform_kill_inst(ctx, TGSI_FILE_TEMPORARY, tmp0,
+                            TGSI_SWIZZLE_Y, TRUE);

   /* compute coverage factor = (1-d)/(1-k) */

--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -280,7 +280,8 @@ pstip_transform_prolog(struct tgsi_transform_context *ctx)

   /* KILL_IF -texTemp.wwww;   # if -texTemp < 0, KILL fragment */
   tgsi_transform_kill_inst(ctx,
-                            TGSI_FILE_TEMPORARY, pctx->texTemp, TGSI_SWIZZLE_W);
+                            TGSI_FILE_TEMPORARY, pctx->texTemp,
+                            TGSI_SWIZZLE_W, TRUE);
 }


--- a/src/gallium/auxiliary/gallivm/lp_bld_const.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_const.c
@@ -311,7 +311,7 @@ lp_build_const_elem(struct gallivm_state *gallivm,
   else {
      double dscale = lp_const_scale(type);

-      elem = LLVMConstInt(elem_type, round(val*dscale), 0);
+      elem = LLVMConstInt(elem_type, (long long) round(val*dscale), 0);
   }

   return elem;
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -81,6 +81,8 @@
 #  pragma pop_macro("DEBUG")
 #endif

+#include "c11/threads.h"
+#include "os/os_thread.h"
 #include "pipe/p_config.h"
 #include "util/u_debug.h"
 #include "util/u_cpu_detect.h"
@@ -103,6 +105,33 @@ static LLVMEnsureMultithreaded lLVMEnsureMultithreaded;

 }

+static once_flag init_native_targets_once_flag;
+
+static void init_native_targets()
+{
+   // If we have a native target, initialize it to ensure it is linked in and
+   // usable by the JIT.
+   llvm::InitializeNativeTarget();
+
+   llvm::InitializeNativeTargetAsmPrinter();
+
+   llvm::InitializeNativeTargetDisassembler();
+}
+
+/**
+ * The llvm target registry is not thread-safe, so drivers and state-trackers
+ * that want to initialize targets should use the gallivm_init_llvm_targets()
+ * function to safely initialize targets.
+ *
+ * LLVM targets should be initialized before the driver or state-tracker tries
+ * to access the registry.
+ */
+extern "C" void
+gallivm_init_llvm_targets(void)
+{
+   call_once(&init_native_targets_once_flag, init_native_targets);
+}
+
 extern "C" void
 lp_set_target_options(void)
 {
@@ -115,13 +144,7 @@ lp_set_target_options(void)
   llvm::DisablePrettyStackTrace = true;
 #endif

-   // If we have a native target, initialize it to ensure it is linked in and
-   // usable by the JIT.
-   llvm::InitializeNativeTarget();
-
-   llvm::InitializeNativeTargetAsmPrinter();
-
-   llvm::InitializeNativeTargetDisassembler();
+   gallivm_init_llvm_targets();
 }


--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.h
@@ -41,6 +41,8 @@ extern "C" {

 struct lp_generated_code;

+extern void
+gallivm_init_llvm_targets(void);

 extern void
 lp_set_target_options(void);
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -24,9 +24,10 @@

 #include "util/ralloc.h"
 #include "glsl/nir/nir.h"
+#include "glsl/nir/nir_control_flow.h"
 #include "glsl/nir/nir_builder.h"
 #include "glsl/list.h"
-#include "glsl/shader_enums.h"
+#include "glsl/nir/shader_enums.h"

 #include "nir/tgsi_to_nir.h"
 #include "tgsi/tgsi_parse.h"
@@ -64,24 +65,24 @@ struct ttn_compile {
   nir_register *addr_reg;

   /**
-    * Stack of cf_node_lists where instructions should be pushed as we pop
+    * Stack of nir_cursors where instructions should be pushed as we pop
    * back out of the control flow stack.
    *
    * For each IF/ELSE/ENDIF block, if_stack[if_stack_pos] has where the else
    * instructions should be placed, and if_stack[if_stack_pos - 1] has where
    * the next instructions outside of the if/then/else block go.
    */
-   struct exec_list **if_stack;
+   nir_cursor *if_stack;
   unsigned if_stack_pos;

   /**
-    * Stack of cf_node_lists where instructions should be pushed as we pop
+    * Stack of nir_cursors where instructions should be pushed as we pop
    * back out of the control flow stack.
    *
    * loop_stack[loop_stack_pos - 1] contains the cf_node_list for the outside
    * of the loop.
    */
-   struct exec_list **loop_stack;
+   nir_cursor *loop_stack;
   unsigned loop_stack_pos;

   /* How many TGSI_FILE_IMMEDIATE vec4s have been parsed so far. */
@@ -93,6 +94,128 @@ struct ttn_compile {
 #define ttn_channel(b, src, swiz) \
   nir_swizzle(b, src, SWIZ(swiz, swiz, swiz, swiz), 1, false)

+static gl_varying_slot
+tgsi_varying_semantic_to_slot(unsigned semantic, unsigned index)
+{
+   switch (semantic) {
+   case TGSI_SEMANTIC_POSITION:
+      return VARYING_SLOT_POS;
+   case TGSI_SEMANTIC_COLOR:
+      if (index == 0)
+         return VARYING_SLOT_COL0;
+      else
+         return VARYING_SLOT_COL1;
+   case TGSI_SEMANTIC_BCOLOR:
+      if (index == 0)
+         return VARYING_SLOT_BFC0;
+      else
+         return VARYING_SLOT_BFC1;
+   case TGSI_SEMANTIC_FOG:
+      return VARYING_SLOT_FOGC;
+   case TGSI_SEMANTIC_PSIZE:
+      return VARYING_SLOT_PSIZ;
+   case TGSI_SEMANTIC_GENERIC:
+      return VARYING_SLOT_VAR0 + index;
+   case TGSI_SEMANTIC_FACE:
+      return VARYING_SLOT_FACE;
+   case TGSI_SEMANTIC_EDGEFLAG:
+      return VARYING_SLOT_EDGE;
+   case TGSI_SEMANTIC_PRIMID:
+      return VARYING_SLOT_PRIMITIVE_ID;
+   case TGSI_SEMANTIC_CLIPDIST:
+      if (index == 0)
+         return VARYING_SLOT_CLIP_DIST0;
+      else
+         return VARYING_SLOT_CLIP_DIST1;
+   case TGSI_SEMANTIC_CLIPVERTEX:
+      return VARYING_SLOT_CLIP_VERTEX;
+   case TGSI_SEMANTIC_TEXCOORD:
+      return VARYING_SLOT_TEX0 + index;
+   case TGSI_SEMANTIC_PCOORD:
+      return VARYING_SLOT_PNTC;
+   case TGSI_SEMANTIC_VIEWPORT_INDEX:
+      return VARYING_SLOT_VIEWPORT;
+   case TGSI_SEMANTIC_LAYER:
+      return VARYING_SLOT_LAYER;
+   default:
+      fprintf(stderr, "Bad TGSI semantic: %d/%d\n", semantic, index);
+      abort();
+   }
+}
+
+/* Temporary helper to remap back to TGSI style semantic name/index
+ * values, for use in drivers that haven't been converted to using
+ * VARYING_SLOT_
+ */
+void
+varying_slot_to_tgsi_semantic(gl_varying_slot slot,
+                              unsigned *semantic_name, unsigned *semantic_index)
+{
+   static const unsigned map[][2] = {
+      [VARYING_SLOT_POS] = { TGSI_SEMANTIC_POSITION, 0 },
+      [VARYING_SLOT_COL0] = { TGSI_SEMANTIC_COLOR, 0 },
+      [VARYING_SLOT_COL1] = { TGSI_SEMANTIC_COLOR, 1 },
+      [VARYING_SLOT_BFC0] = { TGSI_SEMANTIC_BCOLOR, 0 },
+      [VARYING_SLOT_BFC1] = { TGSI_SEMANTIC_BCOLOR, 1 },
+      [VARYING_SLOT_FOGC] = { TGSI_SEMANTIC_FOG, 0 },
+      [VARYING_SLOT_PSIZ] = { TGSI_SEMANTIC_PSIZE, 0 },
+      [VARYING_SLOT_FACE] = { TGSI_SEMANTIC_FACE, 0 },
+      [VARYING_SLOT_EDGE] = { TGSI_SEMANTIC_EDGEFLAG, 0 },
+      [VARYING_SLOT_PRIMITIVE_ID] = { TGSI_SEMANTIC_PRIMID, 0 },
+      [VARYING_SLOT_CLIP_DIST0] = { TGSI_SEMANTIC_CLIPDIST, 0 },
+      [VARYING_SLOT_CLIP_DIST1] = { TGSI_SEMANTIC_CLIPDIST, 1 },
+      [VARYING_SLOT_CLIP_VERTEX] = { TGSI_SEMANTIC_CLIPVERTEX, 0 },
+      [VARYING_SLOT_PNTC] = { TGSI_SEMANTIC_PCOORD, 0 },
+      [VARYING_SLOT_VIEWPORT] = { TGSI_SEMANTIC_VIEWPORT_INDEX, 0 },
+      [VARYING_SLOT_LAYER] = { TGSI_SEMANTIC_LAYER, 0 },
+   };
+
+   if (slot >= VARYING_SLOT_VAR0) {
+      *semantic_name = TGSI_SEMANTIC_GENERIC;
+      *semantic_index = slot - VARYING_SLOT_VAR0;
+      return;
+   }
+
+   if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) {
+      *semantic_name = TGSI_SEMANTIC_TEXCOORD;
+      *semantic_index = slot - VARYING_SLOT_TEX0;
+      return;
+   }
+
+   if (slot >= ARRAY_SIZE(map)) {
+      fprintf(stderr, "Unknown varying slot %d\n", slot);
+      abort();
+   }
+
+   *semantic_name = map[slot][0];
+   *semantic_index = map[slot][1];
+}
+
+/* Temporary helper to remap back to TGSI style semantic name/index
+ * values, for use in drivers that haven't been converted to using
+ * FRAG_RESULT_
+ */
+void
+frag_result_to_tgsi_semantic(gl_frag_result slot,
+                             unsigned *semantic_name, unsigned *semantic_index)
+{
+   static const unsigned map[][2] = {
+      [FRAG_RESULT_DEPTH] = { TGSI_SEMANTIC_POSITION, 0 },
+      [FRAG_RESULT_COLOR] = { TGSI_SEMANTIC_COLOR, -1 },
+      [FRAG_RESULT_DATA0 + 0] = { TGSI_SEMANTIC_COLOR, 0 },
+      [FRAG_RESULT_DATA0 + 1] = { TGSI_SEMANTIC_COLOR, 1 },
+      [FRAG_RESULT_DATA0 + 2] = { TGSI_SEMANTIC_COLOR, 2 },
+      [FRAG_RESULT_DATA0 + 3] = { TGSI_SEMANTIC_COLOR, 3 },
+      [FRAG_RESULT_DATA0 + 4] = { TGSI_SEMANTIC_COLOR, 4 },
+      [FRAG_RESULT_DATA0 + 5] = { TGSI_SEMANTIC_COLOR, 5 },
+      [FRAG_RESULT_DATA0 + 6] = { TGSI_SEMANTIC_COLOR, 6 },
+      [FRAG_RESULT_DATA0 + 7] = { TGSI_SEMANTIC_COLOR, 7 },
+   };
+
+   *semantic_name = map[slot][0];
+   *semantic_index = map[slot][1];
+}
+
 static nir_ssa_def *
 ttn_src_for_dest(nir_builder *b, nir_alu_dest *dest)
 {
@@ -215,12 +338,15 @@ ttn_emit_declaration(struct ttn_compile *c)
            var->data.mode = nir_var_shader_in;
            var->name = ralloc_asprintf(var, "in_%d", idx);

-            /* We should probably translate to a VERT_ATTRIB_* or VARYING_SLOT_*
-             * instead, but nothing in NIR core is looking at the value
-             * currently, and this is less change to drivers.
-             */
-            var->data.location = decl->Semantic.Name;
-            var->data.index = decl->Semantic.Index;
+            if (c->scan->processor == TGSI_PROCESSOR_FRAGMENT) {
+               var->data.location =
+                  tgsi_varying_semantic_to_slot(decl->Semantic.Name,
+                                                decl->Semantic.Index);
+            } else {
+               assert(!decl->Declaration.Semantic);
+               var->data.location = VERT_ATTRIB_GENERIC0 + idx;
+            }
+            var->data.index = 0;

            /* We definitely need to translate the interpolation field, because
             * nir_print will decode it.
@@ -240,6 +366,8 @@ ttn_emit_declaration(struct ttn_compile *c)
            exec_list_push_tail(&b->shader->inputs, &var->node);
            break;
         case TGSI_FILE_OUTPUT: {
+            int semantic_name = decl->Semantic.Name;
+            int semantic_index = decl->Semantic.Index;
            /* Since we can't load from outputs in the IR, we make temporaries
             * for the outputs and emit stores to the real outputs at the end of
             * the shader.
@@ -251,14 +379,40 @@ ttn_emit_declaration(struct ttn_compile *c)

            var->data.mode = nir_var_shader_out;
            var->name = ralloc_asprintf(var, "out_%d", idx);
+            var->data.index = 0;

-            var->data.location = decl->Semantic.Name;
-            if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
-                decl->Semantic.Index == 0 &&
-                c->scan->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
-               var->data.index = -1;
-            else
-               var->data.index = decl->Semantic.Index;
+            if (c->scan->processor == TGSI_PROCESSOR_FRAGMENT) {
+               switch (semantic_name) {
+               case TGSI_SEMANTIC_COLOR: {
+                  /* TODO tgsi loses some information, so we cannot
+                   * actually differentiate here between DSB and MRT
+                   * at this point.  But so far no drivers using tgsi-
+                   * to-nir support dual source blend:
+                   */
+                  bool dual_src_blend = false;
+                  if (dual_src_blend && (semantic_index == 1)) {
+                     var->data.location = FRAG_RESULT_DATA0;
+                     var->data.index = 1;
+                  } else {
+                     if (c->scan->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
+                        var->data.location = FRAG_RESULT_COLOR;
+                     else
+                        var->data.location = FRAG_RESULT_DATA0 + semantic_index;
+                  }
+                  break;
+               }
+               case TGSI_SEMANTIC_POSITION:
+                  var->data.location = FRAG_RESULT_DEPTH;
+                  break;
+               default:
+                  fprintf(stderr, "Bad TGSI semantic: %d/%d\n",
+                          decl->Semantic.Name, decl->Semantic.Index);
+                  abort();
+               }
+            } else {
+               var->data.location =
+                  tgsi_varying_semantic_to_slot(semantic_name, semantic_index);
+            }

            if (is_array) {
               unsigned j;
@@ -307,7 +461,7 @@ ttn_emit_immediate(struct ttn_compile *c)
   for (i = 0; i < 4; i++)
      load_const->value.u[i] = tgsi_imm->u[i].Uint;

-   nir_instr_insert_after_cf_list(b->cf_node_list, &load_const->instr);
+   nir_builder_instr_insert(b, &load_const->instr);
 }

 static nir_src
@@ -363,7 +517,7 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
         load->variables[0] = ttn_array_deref(c, load, var, offset, indirect);

         nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
-         nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr);
+         nir_builder_instr_insert(b, &load->instr);

         src = nir_src_for_ssa(&load->dest.ssa);

@@ -414,7 +568,7 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
      load->num_components = ncomp;

      nir_ssa_dest_init(&load->instr, &load->dest, ncomp, NULL);
-      nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr);
+      nir_builder_instr_insert(b, &load->instr);

      src = nir_src_for_ssa(&load->dest.ssa);
      break;
@@ -476,7 +630,7 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
         srcn++;
      }
      nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
-      nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr);
+      nir_builder_instr_insert(b, &load->instr);

      src = nir_src_for_ssa(&load->dest.ssa);
      break;
@@ -552,7 +706,7 @@ ttn_get_dest(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst)

         load->dest = nir_dest_for_reg(reg);

-         nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr);
+         nir_builder_instr_insert(b, &load->instr);
      } else {
         assert(!tgsi_dst->Indirect);
         dest.dest.reg.reg = c->temp_regs[index].reg;
@@ -667,7 +821,7 @@ ttn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
      instr->src[i].src = nir_src_for_ssa(src[i]);

   instr->dest = dest;
-   nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr);
+   nir_builder_instr_insert(b, &instr->instr);
 }

 static void
@@ -683,7 +837,7 @@ ttn_move_dest_masked(nir_builder *b, nir_alu_dest dest,
   mov->src[0].src = nir_src_for_ssa(def);
   for (unsigned i = def->num_components; i < 4; i++)
      mov->src[0].swizzle[i] = def->num_components - 1;
-   nir_instr_insert_after_cf_list(b->cf_node_list, &mov->instr);
+   nir_builder_instr_insert(b, &mov->instr);
 }

 static void
@@ -902,7 +1056,7 @@ ttn_kill(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
 {
   nir_intrinsic_instr *discard =
      nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard);
-   nir_instr_insert_after_cf_list(b->cf_node_list, &discard->instr);
+   nir_builder_instr_insert(b, &discard->instr);
 }

 static void
@@ -912,7 +1066,7 @@ ttn_kill_if(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
   nir_intrinsic_instr *discard =
      nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if);
   discard->src[0] = nir_src_for_ssa(cmp);
-   nir_instr_insert_after_cf_list(b->cf_node_list, &discard->instr);
+   nir_builder_instr_insert(b, &discard->instr);
 }

 static void
@@ -920,10 +1074,6 @@ ttn_if(struct ttn_compile *c, nir_ssa_def *src, bool is_uint)
 {
   nir_builder *b = &c->build;

-   /* Save the outside-of-the-if-statement node list. */
-   c->if_stack[c->if_stack_pos] = b->cf_node_list;
-   c->if_stack_pos++;
-
   src = ttn_channel(b, src, X);

   nir_if *if_stmt = nir_if_create(b->shader);
@@ -932,11 +1082,14 @@ ttn_if(struct ttn_compile *c, nir_ssa_def *src, bool is_uint)
   } else {
      if_stmt->condition = nir_src_for_ssa(nir_fne(b, src, nir_imm_int(b, 0)));
   }
-   nir_cf_node_insert_end(b->cf_node_list, &if_stmt->cf_node);
+   nir_builder_cf_insert(b, &if_stmt->cf_node);

-   nir_builder_insert_after_cf_list(b, &if_stmt->then_list);
+   c->if_stack[c->if_stack_pos] = nir_after_cf_node(&if_stmt->cf_node);
+   c->if_stack_pos++;

-   c->if_stack[c->if_stack_pos] = &if_stmt->else_list;
+   b->cursor = nir_after_cf_list(&if_stmt->then_list);
+
+   c->if_stack[c->if_stack_pos] = nir_after_cf_list(&if_stmt->else_list);
   c->if_stack_pos++;
 }

@@ -945,7 +1098,7 @@ ttn_else(struct ttn_compile *c)
 {
   nir_builder *b = &c->build;

-   nir_builder_insert_after_cf_list(b, c->if_stack[c->if_stack_pos - 1]);
+   b->cursor = c->if_stack[c->if_stack_pos - 1];
 }

 static void
@@ -954,7 +1107,7 @@ ttn_endif(struct ttn_compile *c)
   nir_builder *b = &c->build;

   c->if_stack_pos -= 2;
-   nir_builder_insert_after_cf_list(b, c->if_stack[c->if_stack_pos]);
+   b->cursor = c->if_stack[c->if_stack_pos];
 }

 static void
@@ -962,28 +1115,27 @@ ttn_bgnloop(struct ttn_compile *c)
 {
   nir_builder *b = &c->build;

-   /* Save the outside-of-the-loop node list. */
-   c->loop_stack[c->loop_stack_pos] = b->cf_node_list;
+   nir_loop *loop = nir_loop_create(b->shader);
+   nir_builder_cf_insert(b, &loop->cf_node);
+
+   c->loop_stack[c->loop_stack_pos] = nir_after_cf_node(&loop->cf_node);
   c->loop_stack_pos++;

-   nir_loop *loop = nir_loop_create(b->shader);
-   nir_cf_node_insert_end(b->cf_node_list, &loop->cf_node);
-
-   nir_builder_insert_after_cf_list(b, &loop->body);
+   b->cursor = nir_after_cf_list(&loop->body);
 }

 static void
 ttn_cont(nir_builder *b)
 {
   nir_jump_instr *instr = nir_jump_instr_create(b->shader, nir_jump_continue);
-   nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr);
+   nir_builder_instr_insert(b, &instr->instr);
 }

 static void
 ttn_brk(nir_builder *b)
 {
   nir_jump_instr *instr = nir_jump_instr_create(b->shader, nir_jump_break);
-   nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr);
+   nir_builder_instr_insert(b, &instr->instr);
 }

 static void
@@ -992,7 +1144,7 @@ ttn_endloop(struct ttn_compile *c)
   nir_builder *b = &c->build;

   c->loop_stack_pos--;
-   nir_builder_insert_after_cf_list(b, c->loop_stack[c->loop_stack_pos]);
+   b->cursor = c->loop_stack[c->loop_stack_pos];
 }

 static void
@@ -1279,7 +1431,7 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
   assert(src_number == num_srcs);

   nir_ssa_dest_init(&instr->instr, &instr->dest, 4, NULL);
-   nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr);
+   nir_builder_instr_insert(b, &instr->instr);

   /* Resolve the writemask on the texture op. */
   ttn_move_dest(b, dest, &instr->dest.ssa);
@@ -1318,10 +1470,10 @@ ttn_txq(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
   txs->src[0].src_type = nir_tex_src_lod;

   nir_ssa_dest_init(&txs->instr, &txs->dest, 3, NULL);
-   nir_instr_insert_after_cf_list(b->cf_node_list, &txs->instr);
+   nir_builder_instr_insert(b, &txs->instr);

   nir_ssa_dest_init(&qlv->instr, &qlv->dest, 1, NULL);
-   nir_instr_insert_after_cf_list(b->cf_node_list, &qlv->instr);
+   nir_builder_instr_insert(b, &qlv->instr);

   ttn_move_dest_masked(b, dest, &txs->dest.ssa, TGSI_WRITEMASK_XYZ);
   ttn_move_dest_masked(b, dest, &qlv->dest.ssa, TGSI_WRITEMASK_W);
@@ -1730,7 +1882,7 @@ ttn_emit_instruction(struct ttn_compile *c)
      store->variables[0] = ttn_array_deref(c, store, var, offset, indirect);
      store->src[0] = nir_src_for_reg(dest.dest.reg.reg);

-      nir_instr_insert_after_cf_list(b->cf_node_list, &store->instr);
+      nir_builder_instr_insert(b, &store->instr);
   }
 }

@@ -1759,11 +1911,26 @@ ttn_add_output_stores(struct ttn_compile *c)
         store->const_index[0] = loc;
         store->src[0].reg.reg = c->output_regs[loc].reg;
         store->src[0].reg.base_offset = c->output_regs[loc].offset;
-         nir_instr_insert_after_cf_list(b->cf_node_list, &store->instr);
+         nir_builder_instr_insert(b, &store->instr);
      }
   }
 }

+static gl_shader_stage
+tgsi_processor_to_shader_stage(unsigned processor)
+{
+   switch (processor) {
+   case TGSI_PROCESSOR_FRAGMENT:  return MESA_SHADER_FRAGMENT;
+   case TGSI_PROCESSOR_VERTEX:    return MESA_SHADER_VERTEX;
+   case TGSI_PROCESSOR_GEOMETRY:  return MESA_SHADER_GEOMETRY;
+   case TGSI_PROCESSOR_TESS_CTRL: return MESA_SHADER_TESS_CTRL;
+   case TGSI_PROCESSOR_TESS_EVAL: return MESA_SHADER_TESS_EVAL;
+   case TGSI_PROCESSOR_COMPUTE:   return MESA_SHADER_COMPUTE;
+   default:
+      unreachable("invalid TGSI processor");
+   };
+}
+
 struct nir_shader *
 tgsi_to_nir(const void *tgsi_tokens,
            const nir_shader_compiler_options *options)
@@ -1775,17 +1942,19 @@ tgsi_to_nir(const void *tgsi_tokens,
   int ret;

   c = rzalloc(NULL, struct ttn_compile);
-   s = nir_shader_create(NULL, options);
+
+   tgsi_scan_shader(tgsi_tokens, &scan);
+   c->scan = &scan;
+
+   s = nir_shader_create(NULL, tgsi_processor_to_shader_stage(scan.processor),
+                         options);

   nir_function *func = nir_function_create(s, "main");
   nir_function_overload *overload = nir_function_overload_create(func);
   nir_function_impl *impl = nir_function_impl_create(overload);

   nir_builder_init(&c->build, impl);
-   nir_builder_insert_after_cf_list(&c->build, &impl->body);
-
-   tgsi_scan_shader(tgsi_tokens, &scan);
-   c->scan = &scan;
+   c->build.cursor = nir_after_cf_list(&impl->body);

   s->num_inputs = scan.file_max[TGSI_FILE_INPUT] + 1;
   s->num_uniforms = scan.const_file_max[0] + 1;
@@ -1801,10 +1970,10 @@ tgsi_to_nir(const void *tgsi_tokens,
   c->num_samp_types = scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
   c->samp_types = rzalloc_array(c, nir_alu_type, c->num_samp_types);

-   c->if_stack = rzalloc_array(c, struct exec_list *,
+   c->if_stack = rzalloc_array(c, nir_cursor,
                               (scan.opcode_count[TGSI_OPCODE_IF] +
                                scan.opcode_count[TGSI_OPCODE_UIF]) * 2);
-   c->loop_stack = rzalloc_array(c, struct exec_list *,
+   c->loop_stack = rzalloc_array(c, nir_cursor,
                                 scan.opcode_count[TGSI_OPCODE_BGNLOOP]);

   ret = tgsi_parse_init(&parser, tgsi_tokens);
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.h
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.h
@@ -28,3 +28,9 @@ struct nir_shader_compiler_options *options;
 struct nir_shader *
 tgsi_to_nir(const void *tgsi_tokens,
            const struct nir_shader_compiler_options *options);
+void
+varying_slot_to_tgsi_semantic(gl_varying_slot slot,
+                              unsigned *semantic_name, unsigned *semantic_index);
+void
+frag_result_to_tgsi_semantic(gl_frag_result slot,
+                             unsigned *semantic_name, unsigned *semantic_index);
--- a/src/gallium/auxiliary/os/os_misc.c
+++ b/src/gallium/auxiliary/os/os_misc.c
@@ -96,11 +96,13 @@ os_log_message(const char *message)
 }


+#if !defined(PIPE_SUBSYSTEM_EMBEDDED)
 const char *
 os_get_option(const char *name)
 {
   return getenv(name);
 }
+#endif /* !PIPE_SUBSYSTEM_EMBEDDED */


 /**
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
@@ -166,6 +166,11 @@ pb_cache_manager_create(struct pb_manager *provider,
                        unsigned bypass_usage,
                        uint64_t maximum_cache_size);

+/**
+ * Remove a buffer from the cache, but keep it alive.
+ */
+void
+pb_cache_manager_remove_buffer(struct pb_buffer *buf);

 struct pb_fence_ops;

--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
@@ -104,18 +104,42 @@ pb_cache_manager(struct pb_manager *mgr)
 }


+static void
+_pb_cache_manager_remove_buffer_locked(struct pb_cache_buffer *buf)
+{
+   struct pb_cache_manager *mgr = buf->mgr;
+
+   if (buf->head.next) {
+      LIST_DEL(&buf->head);
+      assert(mgr->numDelayed);
+      --mgr->numDelayed;
+      mgr->cache_size -= buf->base.size;
+   }
+   buf->mgr = NULL;
+}
+
+void
+pb_cache_manager_remove_buffer(struct pb_buffer *pb_buf)
+{
+   struct pb_cache_buffer *buf = (struct pb_cache_buffer*)pb_buf;
+   struct pb_cache_manager *mgr = buf->mgr;
+
+   if (!mgr)
+      return;
+
+   pipe_mutex_lock(mgr->mutex);
+   _pb_cache_manager_remove_buffer_locked(buf);
+   pipe_mutex_unlock(mgr->mutex);
+}
+
 /**
 * Actually destroy the buffer.
 */
 static inline void
 _pb_cache_buffer_destroy(struct pb_cache_buffer *buf)
 {
-   struct pb_cache_manager *mgr = buf->mgr;
-
-   LIST_DEL(&buf->head);
-   assert(mgr->numDelayed);
-   --mgr->numDelayed;
-   mgr->cache_size -= buf->base.size;
+   if (buf->mgr)
+      _pb_cache_manager_remove_buffer_locked(buf);
   assert(!pipe_is_referenced(&buf->base.reference));
   pb_reference(&buf->buffer, NULL);
   FREE(buf);
@@ -156,6 +180,12 @@ pb_cache_buffer_destroy(struct pb_buffer *_buf)
   struct pb_cache_buffer *buf = pb_cache_buffer(_buf);   
   struct pb_cache_manager *mgr = buf->mgr;

+   if (!mgr) {
+      pb_reference(&buf->buffer, NULL);
+      FREE(buf);
+      return;
+   }
+
   pipe_mutex_lock(mgr->mutex);
   assert(!pipe_is_referenced(&buf->base.reference));
   
--- a/src/gallium/auxiliary/target-helpers/inline_debug_helper.h
+++ b/src/gallium/auxiliary/target-helpers/inline_debug_helper.h
@@ -11,6 +11,10 @@
 * one or more debug driver: rbug, trace.
 */

+#ifdef GALLIUM_DDEBUG
+#include "ddebug/dd_public.h"
+#endif
+
 #ifdef GALLIUM_TRACE
 #include "trace/tr_public.h"
 #endif
@@ -30,6 +34,10 @@
 static inline struct pipe_screen *
 debug_screen_wrap(struct pipe_screen *screen)
 {
+#if defined(GALLIUM_DDEBUG)
+   screen = ddebug_screen_create(screen);
+#endif
+
 #if defined(GALLIUM_RBUG)
   screen = rbug_screen_create(screen);
 #endif
--- a/src/gallium/auxiliary/tgsi/tgsi_aa_point.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_aa_point.c
@@ -0,0 +1,309 @@
+/*
+ * Copyright 2014 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/**
+ * This utility transforms the fragment shader to support anti-aliasing points.
+ */
+
+#include "util/u_debug.h"
+#include "util/u_math.h"
+#include "tgsi_info.h"
+#include "tgsi_aa_point.h"
+#include "tgsi_transform.h"
+
+#define INVALID_INDEX 9999
+
+struct aa_transform_context
+{
+   struct tgsi_transform_context base;
+
+   unsigned tmp;           // temp register
+   unsigned color_out;     // frag color out register
+   unsigned color_tmp;     // frag color temp register
+   unsigned num_tmp;       // number of temp registers
+   unsigned num_imm;       // number of immediates
+   unsigned num_input;     // number of inputs
+   unsigned aa_point_coord_index;
+};
+
+static inline struct aa_transform_context *
+aa_transform_context(struct tgsi_transform_context *ctx)
+{
+   return (struct aa_transform_context *) ctx;
+}
+
+/**
+ * TGSI declaration transform callback.
+ */
+static void
+aa_decl(struct tgsi_transform_context *ctx,
+              struct tgsi_full_declaration *decl)
+{
+   struct aa_transform_context *ts = aa_transform_context(ctx);
+
+   if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
+       decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
+       decl->Semantic.Index == 0) {
+         ts->color_out = decl->Range.First;
+   }
+   else if (decl->Declaration.File == TGSI_FILE_INPUT) {
+      ts->num_input++;
+   }
+   else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
+      ts->num_tmp = MAX2(ts->num_tmp, decl->Range.Last + 1);
+   }
+
+   ctx->emit_declaration(ctx, decl);
+}
+
+/**
+ * TGSI immediate declaration transform callback.
+ */
+static void
+aa_immediate(struct tgsi_transform_context *ctx,
+                  struct tgsi_full_immediate *imm)
+{
+   struct aa_transform_context *ts = aa_transform_context(ctx);
+
+   ctx->emit_immediate(ctx, imm);
+   ts->num_imm++;
+}
+
+/**
+ * TGSI transform prolog callback.
+ */
+static void
+aa_prolog(struct tgsi_transform_context *ctx)
+{
+   struct aa_transform_context *ts = aa_transform_context(ctx);
+   unsigned tmp0;
+   unsigned texIn;
+   unsigned imm;
+
+   /* Declare two temporary registers, one for temporary and
+    * one for color.
+    */
+   ts->tmp = ts->num_tmp++;
+   ts->color_tmp = ts->num_tmp++;
+
+   tgsi_transform_temps_decl(ctx, ts->tmp, ts->color_tmp);
+
+   /* Declare new generic input/texcoord */
+   texIn = ts->num_input++;
+   tgsi_transform_input_decl(ctx, texIn, TGSI_SEMANTIC_GENERIC,
+                             ts->aa_point_coord_index, TGSI_INTERPOLATE_LINEAR);
+
+   /* Declare extra immediates */
+   imm = ts->num_imm++;
+   tgsi_transform_immediate_decl(ctx, 0.5, 0.5, 0.45, 1.0);
+
+   /*
+    * Emit code to compute fragment coverage.
+    * The point always has radius 0.5.  The threshold value will be a
+    * value less than, but close to 0.5, such as 0.45.
+    * We compute a coverage factor from the distance and threshold.
+    * If the coverage is negative, the fragment is outside the circle and
+    * it's discarded.
+    * If the coverage is >= 1, the fragment is fully inside the threshold
+    * distance.  We limit/clamp the coverage to 1.
+    * Otherwise, the fragment is between the threshold value and 0.5 and we
+    * compute a coverage value in [0,1].
+    *
+    * Input reg (texIn) usage:
+    *  texIn.x = x point coord in [0,1]
+    *  texIn.y = y point coord in [0,1]
+    *  texIn.z = "k" the smoothing threshold distance
+    *  texIn.w = unused
+    *
+    * Temp reg (t0) usage:
+    *  t0.x = distance of fragment from center point
+    *  t0.y = boolean, is t0.x > 0.5, also misc temp usage
+    *  t0.z = temporary for computing 1/(0.5-k) value
+    *  t0.w = final coverage value
+    */
+
+   tmp0 = ts->tmp;
+
+   /* SUB t0.xy, texIn, (0.5, 0,5) */
+   tgsi_transform_op2_inst(ctx, TGSI_OPCODE_SUB,
+                           TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_XY,
+                           TGSI_FILE_INPUT, texIn,
+                           TGSI_FILE_IMMEDIATE, imm);
+
+   /* DP2 t0.x, t0.xy, t0.xy;  # t0.x = x^2 + y^2 */
+   tgsi_transform_op2_inst(ctx, TGSI_OPCODE_DP2,
+                           TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_X,
+                           TGSI_FILE_TEMPORARY, tmp0,
+                           TGSI_FILE_TEMPORARY, tmp0);
+
+   /* SQRT t0.x, t0.x */
+   tgsi_transform_op1_inst(ctx, TGSI_OPCODE_SQRT,
+                           TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_X,
+                           TGSI_FILE_TEMPORARY, tmp0);
+
+   /* compute coverage factor = (0.5-d)/(0.5-k) */
+
+   /* SUB t0.w, 0.5, texIn.z;  # t0.w = 0.5-k */
+   tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SUB,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W,
+                               TGSI_FILE_IMMEDIATE, imm, TGSI_SWIZZLE_X,
+                               TGSI_FILE_INPUT, texIn, TGSI_SWIZZLE_Z);
+
+   /* SUB t0.y, 0.5, t0.x;  # t0.y = 0.5-d */
+   tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SUB,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Y,
+                               TGSI_FILE_IMMEDIATE, imm, TGSI_SWIZZLE_X,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X);
+
+   /* DIV t0.w, t0.y, t0.w;  # coverage = (0.5-d)/(0.5-k) */
+   tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_DIV,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_W);
+
+   /* If the coverage value is negative, it means the fragment is outside
+    * the point's circular boundary.  Kill it.
+    */
+   /* KILL_IF tmp0.w;  # if tmp0.w < 0 KILL */
+   tgsi_transform_kill_inst(ctx, TGSI_FILE_TEMPORARY, tmp0,
+                            TGSI_SWIZZLE_W, FALSE);
+
+   /* If the distance is less than the threshold, the coverage/alpha value
+    * will be greater than one.  Clamp to one here.
+    */
+   /* MIN tmp0.w, tmp0.w, 1.0 */
+   tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MIN,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_W,
+                               TGSI_FILE_IMMEDIATE, imm, TGSI_SWIZZLE_W);
+}
+
+/**
+ * TGSI instruction transform callback.
+ */
+static void
+aa_inst(struct tgsi_transform_context *ctx,
+        struct tgsi_full_instruction *inst)
+{
+   struct aa_transform_context *ts = aa_transform_context(ctx);
+   unsigned i;
+
+   /* Look for writes to color output reg and replace it with
+    * color temp reg.
+    */
+   for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
+      struct tgsi_full_dst_register *dst = &inst->Dst[i];
+      if (dst->Register.File == TGSI_FILE_OUTPUT &&
+          dst->Register.Index == ts->color_out) {
+         dst->Register.File = TGSI_FILE_TEMPORARY;
+         dst->Register.Index = ts->color_tmp;
+      }
+   }
+
+   ctx->emit_instruction(ctx, inst);
+}
+
+/**
+ * TGSI transform epilog callback.
+ */
+static void
+aa_epilog(struct tgsi_transform_context *ctx)
+{
+   struct aa_transform_context *ts = aa_transform_context(ctx);
+
+   /* add alpha modulation code at tail of program */
+   assert(ts->color_out != INVALID_INDEX);
+   assert(ts->color_tmp != INVALID_INDEX);
+
+   /* MOV output.color.xyz colorTmp */
+   tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV,
+                           TGSI_FILE_OUTPUT, ts->color_out,
+                           TGSI_WRITEMASK_XYZ,
+                           TGSI_FILE_TEMPORARY, ts->color_tmp);
+
+   /* MUL output.color.w colorTmp.w tmp0.w */
+   tgsi_transform_op2_inst(ctx, TGSI_OPCODE_MUL,
+                           TGSI_FILE_OUTPUT, ts->color_out,
+                           TGSI_WRITEMASK_W,
+                           TGSI_FILE_TEMPORARY, ts->color_tmp,
+                           TGSI_FILE_TEMPORARY, ts->tmp);
+}
+
+/**
+ * TGSI utility to transform a fragment shader to support antialiasing point.
+ *
+ * This utility accepts two inputs:
+ *\param tokens_in  -- the original token string of the shader
+ *\param aa_point_coord_index -- the semantic index of the generic register
+ *                            that contains the point sprite texture coord
+ *
+ * For each fragment in the point, we compute the distance of the fragment
+ * from the point center using the point sprite texture coordinates.
+ * If the distance is greater than 0.5, we'll discard the fragment.
+ * Otherwise, we'll compute a coverage value which approximates how much
+ * of the fragment is inside the bounding circle of the point. If the distance
+ * is less than 'k', the coverage is 1. Else, the coverage is between 0 and 1.
+ * The final fragment color's alpha channel is then modulated by the coverage
+ * value.
+ */
+struct tgsi_token *
+tgsi_add_aa_point(const struct tgsi_token *tokens_in,
+                  const int aa_point_coord_index)
+{
+   struct aa_transform_context transform;
+   const uint num_new_tokens = 200; /* should be enough */
+   const uint new_len = tgsi_num_tokens(tokens_in) + num_new_tokens;
+   struct tgsi_token *new_tokens;
+
+   /* allocate new tokens buffer */
+   new_tokens = tgsi_alloc_tokens(new_len);
+   if (!new_tokens)
+      return NULL;
+
+   /* setup transformation context */
+   memset(&transform, 0, sizeof(transform));
+   transform.base.transform_declaration = aa_decl;
+   transform.base.transform_instruction = aa_inst;
+   transform.base.transform_immediate = aa_immediate;
+   transform.base.prolog = aa_prolog;
+   transform.base.epilog = aa_epilog;
+
+   transform.tmp = INVALID_INDEX;
+   transform.color_out = INVALID_INDEX;
+   transform.color_tmp = INVALID_INDEX;
+
+   assert(aa_point_coord_index != -1);
+   transform.aa_point_coord_index = (unsigned)aa_point_coord_index;
+
+   transform.num_tmp = 0;
+   transform.num_imm = 0;
+   transform.num_input = 0;
+
+   /* transform the shader */
+   tgsi_transform_shader(tokens_in, new_tokens, new_len, &transform.base);
+
+   return new_tokens;
+}
--- a/src/gallium/auxiliary/tgsi/tgsi_aa_point.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_aa_point.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2014 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef TGSI_AA_POINT_H
+#define TGSI_AA_POINT_H
+
+struct tgsi_token;
+
+struct tgsi_token *
+tgsi_add_aa_point(const struct tgsi_token *tokens_in,
+                  const int aa_point_coord_index);
+
+#endif /* TGSI_AA_POINT_H */
--- a/src/gallium/auxiliary/tgsi/tgsi_emulate.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_emulate.c
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2015 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include "tgsi/tgsi_transform.h"
+#include "tgsi/tgsi_scan.h"
+#include "tgsi/tgsi_dump.h"
+#include "util/u_debug.h"
+
+#include "tgsi_emulate.h"
+
+struct tgsi_emulation_context {
+   struct tgsi_transform_context base;
+   struct tgsi_shader_info info;
+   unsigned flags;
+   bool first_instruction_emitted;
+};
+
+static inline struct tgsi_emulation_context *
+tgsi_emulation_context(struct tgsi_transform_context *tctx)
+{
+   return (struct tgsi_emulation_context *)tctx;
+}
+
+static void
+transform_decl(struct tgsi_transform_context *tctx,
+               struct tgsi_full_declaration *decl)
+{
+   struct tgsi_emulation_context *ctx = tgsi_emulation_context(tctx);
+
+   if (ctx->flags & TGSI_EMU_FORCE_PERSAMPLE_INTERP &&
+       decl->Declaration.File == TGSI_FILE_INPUT) {
+      assert(decl->Declaration.Interpolate);
+      decl->Interp.Location = TGSI_INTERPOLATE_LOC_SAMPLE;
+   }
+
+   tctx->emit_declaration(tctx, decl);
+}
+
+static void
+passthrough_edgeflag(struct tgsi_transform_context *tctx)
+{
+   struct tgsi_emulation_context *ctx = tgsi_emulation_context(tctx);
+   struct tgsi_full_declaration decl;
+   struct tgsi_full_instruction new_inst;
+
+   /* Input */
+   decl = tgsi_default_full_declaration();
+   decl.Declaration.File = TGSI_FILE_INPUT;
+   decl.Range.First = decl.Range.Last = ctx->info.num_inputs;
+   tctx->emit_declaration(tctx, &decl);
+
+   /* Output */
+   decl = tgsi_default_full_declaration();
+   decl.Declaration.File = TGSI_FILE_OUTPUT;
+   decl.Declaration.Semantic = true;
+   decl.Range.First = decl.Range.Last = ctx->info.num_outputs;
+   decl.Semantic.Name = TGSI_SEMANTIC_EDGEFLAG;
+   decl.Semantic.Index = 0;
+   tctx->emit_declaration(tctx, &decl);
+
+   /* MOV */
+   new_inst = tgsi_default_full_instruction();
+   new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
+
+   new_inst.Instruction.NumDstRegs = 1;
+   new_inst.Dst[0].Register.File  = TGSI_FILE_OUTPUT;
+   new_inst.Dst[0].Register.Index = ctx->info.num_outputs;
+   new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
+
+   new_inst.Instruction.NumSrcRegs = 1;
+   new_inst.Src[0].Register.File  = TGSI_FILE_INPUT;
+   new_inst.Src[0].Register.Index = ctx->info.num_inputs;
+   new_inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
+   new_inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
+   new_inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
+   new_inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X;
+
+   tctx->emit_instruction(tctx, &new_inst);
+}
+
+static void
+transform_instr(struct tgsi_transform_context *tctx,
+                struct tgsi_full_instruction *inst)
+{
+   struct tgsi_emulation_context *ctx = tgsi_emulation_context(tctx);
+
+   /* Pass through edgeflags. */
+   if (!ctx->first_instruction_emitted) {
+      ctx->first_instruction_emitted = true;
+
+      if (ctx->flags & TGSI_EMU_PASSTHROUGH_EDGEFLAG)
+         passthrough_edgeflag(tctx);
+   }
+
+   /* Clamp color outputs. */
+   if (ctx->flags & TGSI_EMU_CLAMP_COLOR_OUTPUTS) {
+      int i;
+      for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
+         unsigned semantic;
+
+         if (inst->Dst[i].Register.File != TGSI_FILE_OUTPUT ||
+             inst->Dst[i].Register.Indirect)
+            continue;
+
+         semantic =
+            ctx->info.output_semantic_name[inst->Dst[i].Register.Index];
+
+         if (semantic == TGSI_SEMANTIC_COLOR ||
+             semantic == TGSI_SEMANTIC_BCOLOR)
+            inst->Instruction.Saturate = true;
+      }
+   }
+
+   tctx->emit_instruction(tctx, inst);
+}
+
+const struct tgsi_token *
+tgsi_emulate(const struct tgsi_token *tokens, unsigned flags)
+{
+   struct tgsi_emulation_context ctx;
+   struct tgsi_token *newtoks;
+   int newlen;
+
+   if (!(flags & (TGSI_EMU_CLAMP_COLOR_OUTPUTS |
+                  TGSI_EMU_PASSTHROUGH_EDGEFLAG |
+                  TGSI_EMU_FORCE_PERSAMPLE_INTERP)))
+      return NULL;
+
+   memset(&ctx, 0, sizeof(ctx));
+   ctx.flags = flags;
+   tgsi_scan_shader(tokens, &ctx.info);
+
+   if (flags & TGSI_EMU_FORCE_PERSAMPLE_INTERP)
+      ctx.base.transform_declaration = transform_decl;
+
+   if (flags & (TGSI_EMU_CLAMP_COLOR_OUTPUTS |
+                TGSI_EMU_PASSTHROUGH_EDGEFLAG))
+      ctx.base.transform_instruction = transform_instr;
+
+   newlen = tgsi_num_tokens(tokens) + 20;
+   newtoks = tgsi_alloc_tokens(newlen);
+   if (!newtoks)
+      return NULL;
+
+   tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
+   return newtoks;
+}
--- a/src/gallium/auxiliary/tgsi/tgsi_emulate.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_emulate.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2015 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef TGSI_GL_EMULATION_H_
+#define TGSI_GL_EMULATION_H_
+
+#include "pipe/p_shader_tokens.h"
+
+#define TGSI_EMU_CLAMP_COLOR_OUTPUTS      (1 << 0)
+#define TGSI_EMU_PASSTHROUGH_EDGEFLAG     (1 << 1)
+#define TGSI_EMU_FORCE_PERSAMPLE_INTERP   (1 << 2)
+
+const struct tgsi_token *
+tgsi_emulate(const struct tgsi_token *tokens, unsigned flags);
+
+#endif /* TGSI_GL_EMULATION_H_ */
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -2021,7 +2021,7 @@ fetch_sampler_unit(struct tgsi_exec_machine *mach,
 /*
 * execute a texture instruction.
 *
- * modifier is used to control the channel routing for the\
+ * modifier is used to control the channel routing for the
 * instruction variants like proj, lod, and texture with lod bias.
 * sampler indicates which src register the sampler is contained in.
 */
@@ -2032,7 +2032,7 @@ exec_tex(struct tgsi_exec_machine *mach,
 {
   const union tgsi_exec_channel *args[5], *proj = NULL;
   union tgsi_exec_channel r[5];
-   enum tgsi_sampler_control control =  tgsi_sampler_lod_none;
+   enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE;
   uint chan;
   uint unit;
   int8_t offsets[3];
@@ -2078,11 +2078,11 @@ exec_tex(struct tgsi_exec_machine *mach,
         args[i] = &ZeroVec;

      if (modifier == TEX_MODIFIER_EXPLICIT_LOD)
-         control = tgsi_sampler_lod_explicit;
+         control = TGSI_SAMPLER_LOD_EXPLICIT;
      else if (modifier == TEX_MODIFIER_LOD_BIAS)
-         control = tgsi_sampler_lod_bias;
+         control = TGSI_SAMPLER_LOD_BIAS;
      else if (modifier == TEX_MODIFIER_GATHER)
-         control = tgsi_sampler_gather;
+         control = TGSI_SAMPLER_GATHER;
   }
   else {
      for (i = dim; i < Elements(args); i++)
@@ -2132,6 +2132,46 @@ exec_tex(struct tgsi_exec_machine *mach,
   }
 }

+static void
+exec_lodq(struct tgsi_exec_machine *mach,
+          const struct tgsi_full_instruction *inst)
+{
+   uint unit;
+   int dim;
+   int i;
+   union tgsi_exec_channel coords[4];
+   const union tgsi_exec_channel *args[Elements(coords)];
+   union tgsi_exec_channel r[2];
+
+   unit = fetch_sampler_unit(mach, inst, 1);
+   dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture, NULL);
+   assert(dim <= Elements(coords));
+   /* fetch coordinates */
+   for (i = 0; i < dim; i++) {
+      FETCH(&coords[i], 0, TGSI_CHAN_X + i);
+      args[i] = &coords[i];
+   }
+   for (i = dim; i < Elements(coords); i++) {
+      args[i] = &ZeroVec;
+   }
+   mach->Sampler->query_lod(mach->Sampler, unit, unit,
+                            args[0]->f,
+                            args[1]->f,
+                            args[2]->f,
+                            args[3]->f,
+                            TGSI_SAMPLER_LOD_NONE,
+                            r[0].f,
+                            r[1].f);
+
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+      store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X,
+                 TGSI_EXEC_DATA_FLOAT);
+   }
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+      store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y,
+                 TGSI_EXEC_DATA_FLOAT);
+   }
+}

 static void
 exec_txd(struct tgsi_exec_machine *mach,
@@ -2155,7 +2195,7 @@ exec_txd(struct tgsi_exec_machine *mach,

      fetch_texel(mach->Sampler, unit, unit,
                  &r[0], &ZeroVec, &ZeroVec, &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                  &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
      break;

@@ -2171,7 +2211,7 @@ exec_txd(struct tgsi_exec_machine *mach,

      fetch_texel(mach->Sampler, unit, unit,
                  &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                  &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
      break;

@@ -2185,7 +2225,7 @@ exec_txd(struct tgsi_exec_machine *mach,

      fetch_texel(mach->Sampler, unit, unit,
                  &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                  &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
      break;

@@ -2205,7 +2245,7 @@ exec_txd(struct tgsi_exec_machine *mach,

      fetch_texel(mach->Sampler, unit, unit,
                  &r[0], &r[1], &r[2], &r[3], &ZeroVec,   /* inputs */
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                  &r[0], &r[1], &r[2], &r[3]);     /* outputs */
      break;

@@ -2225,7 +2265,7 @@ exec_txd(struct tgsi_exec_machine *mach,

      fetch_texel(mach->Sampler, unit, unit,
                  &r[0], &r[1], &r[2], &r[3], &ZeroVec,   /* inputs */
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                  &r[0], &r[1], &r[2], &r[3]);     /* outputs */
      break;

@@ -2364,7 +2404,7 @@ exec_sample(struct tgsi_exec_machine *mach,
   const uint sampler_unit = inst->Src[2].Register.Index;
   union tgsi_exec_channel r[5], c1;
   const union tgsi_exec_channel *lod = &ZeroVec;
-   enum tgsi_sampler_control control = tgsi_sampler_lod_none;
+   enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE;
   uint chan;
   unsigned char swizzles[4];
   int8_t offsets[3];
@@ -2378,16 +2418,16 @@ exec_sample(struct tgsi_exec_machine *mach,
      if (modifier == TEX_MODIFIER_LOD_BIAS) {
         FETCH(&c1, 3, TGSI_CHAN_X);
         lod = &c1;
-         control = tgsi_sampler_lod_bias;
+         control = TGSI_SAMPLER_LOD_BIAS;
      }
      else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
         FETCH(&c1, 3, TGSI_CHAN_X);
         lod = &c1;
-         control = tgsi_sampler_lod_explicit;
+         control = TGSI_SAMPLER_LOD_EXPLICIT;
      }
      else {
         assert(modifier == TEX_MODIFIER_LEVEL_ZERO);
-         control = tgsi_sampler_lod_zero;
+         control = TGSI_SAMPLER_LOD_ZERO;
      }
   }

@@ -2513,7 +2553,7 @@ exec_sample_d(struct tgsi_exec_machine *mach,

      fetch_texel(mach->Sampler, resource_unit, sampler_unit,
                  &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                  &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
      break;

@@ -2529,7 +2569,7 @@ exec_sample_d(struct tgsi_exec_machine *mach,

      fetch_texel(mach->Sampler, resource_unit, sampler_unit,
                  &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec,   /* inputs */
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                  &r[0], &r[1], &r[2], &r[3]);     /* outputs */
      break;

@@ -2547,7 +2587,7 @@ exec_sample_d(struct tgsi_exec_machine *mach,

      fetch_texel(mach->Sampler, resource_unit, sampler_unit,
                  &r[0], &r[1], &r[2], &r[3], &ZeroVec,
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                  &r[0], &r[1], &r[2], &r[3]);
      break;

@@ -4378,6 +4418,12 @@ exec_instruction(
      exec_tex(mach, inst, TEX_MODIFIER_GATHER, 2);
      break;

+   case TGSI_OPCODE_LODQ:
+      /* src[0] = texcoord */
+      /* src[1] = sampler unit */
+      exec_lodq(mach, inst);
+      break;
+
   case TGSI_OPCODE_UP2H:
      assert (0);
      break;
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -88,13 +88,14 @@ struct tgsi_interp_coef
   float dady[TGSI_NUM_CHANNELS];
 };

-enum tgsi_sampler_control {
-   tgsi_sampler_lod_none,
-   tgsi_sampler_lod_bias,
-   tgsi_sampler_lod_explicit,
-   tgsi_sampler_lod_zero,
-   tgsi_sampler_derivs_explicit,
-   tgsi_sampler_gather,
+enum tgsi_sampler_control
+{
+   TGSI_SAMPLER_LOD_NONE,
+   TGSI_SAMPLER_LOD_BIAS,
+   TGSI_SAMPLER_LOD_EXPLICIT,
+   TGSI_SAMPLER_LOD_ZERO,
+   TGSI_SAMPLER_DERIVS_EXPLICIT,
+   TGSI_SAMPLER_GATHER,
 };

 /**
@@ -138,6 +139,16 @@ struct tgsi_sampler
                     const int j[TGSI_QUAD_SIZE], const int k[TGSI_QUAD_SIZE],
                     const int lod[TGSI_QUAD_SIZE], const int8_t offset[3],
                     float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
+   void (*query_lod)(const struct tgsi_sampler *tgsi_sampler,
+                     const unsigned sview_index,
+                     const unsigned sampler_index,
+                     const float s[TGSI_QUAD_SIZE],
+                     const float t[TGSI_QUAD_SIZE],
+                     const float p[TGSI_QUAD_SIZE],
+                     const float c0[TGSI_QUAD_SIZE],
+                     const enum tgsi_sampler_control control,
+                     float mipmap[TGSI_QUAD_SIZE],
+                     float lod[TGSI_QUAD_SIZE]);
 };

 #define TGSI_EXEC_NUM_TEMPS       4096
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -141,7 +141,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
   { 0, 0, 0, 1, 1, 0, NONE, "ENDLOOP", TGSI_OPCODE_ENDLOOP },
   { 0, 0, 0, 0, 1, 0, NONE, "ENDSUB", TGSI_OPCODE_ENDSUB },
   { 1, 1, 1, 0, 0, 0, OTHR, "TXQ_LZ", TGSI_OPCODE_TXQ_LZ },
-   { 0, 0, 0, 0, 0, 0, NONE, "", 104 },     /* removed */
+   { 1, 1, 1, 0, 0, 0, OTHR, "TXQS", TGSI_OPCODE_TXQS },
   { 0, 0, 0, 0, 0, 0, NONE, "", 105 },     /* removed */
   { 0, 0, 0, 0, 0, 0, NONE, "", 106 },     /* removed */
   { 0, 0, 0, 0, 0, 0, NONE, "NOP", TGSI_OPCODE_NOP },
@@ -331,6 +331,7 @@ tgsi_opcode_infer_type( uint opcode )
   case TGSI_OPCODE_SAD: /* XXX some src args may be signed for SAD ? */
   case TGSI_OPCODE_TXQ:
   case TGSI_OPCODE_TXQ_LZ:
+   case TGSI_OPCODE_TXQS:
   case TGSI_OPCODE_F2U:
   case TGSI_OPCODE_UDIV:
   case TGSI_OPCODE_UMAD:
--- a/src/gallium/auxiliary/tgsi/tgsi_point_sprite.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_point_sprite.c
@@ -0,0 +1,582 @@
+/*
+ * Copyright 2014 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/**
+ * This utility transforms the geometry shader to emulate point sprite by
+ * drawing a quad. It also adds an extra output for the original point position
+ * if the point position is to be written to a stream output buffer.
+ * Note: It assumes the driver will add a constant for the inverse viewport
+ *       after the user defined constants.
+ */
+
+#include "util/u_debug.h"
+#include "util/u_math.h"
+#include "tgsi_info.h"
+#include "tgsi_point_sprite.h"
+#include "tgsi_transform.h"
+#include "pipe/p_state.h"
+
+#define INVALID_INDEX 9999
+
+/* Set swizzle based on the immediates (0, 1, 0, -1) */
+static inline unsigned
+set_swizzle(int x, int y, int z, int w)
+{
+   static const unsigned map[3] = {TGSI_SWIZZLE_W, TGSI_SWIZZLE_X,
+                                   TGSI_SWIZZLE_Y};
+   assert(x >= -1);
+   assert(x <= 1);
+   assert(y >= -1);
+   assert(y <= 1);
+   assert(z >= -1);
+   assert(z <= 1);
+   assert(w >= -1);
+   assert(w <= 1);
+
+   return map[x+1] | (map[y+1] << 2) | (map[z+1] << 4) | (map[w+1] << 6);
+}
+
+static inline unsigned
+get_swizzle(unsigned swizzle, unsigned component)
+{
+   assert(component < 4);
+   return (swizzle >> (component * 2)) & 0x3;
+}
+
+struct psprite_transform_context
+{
+   struct tgsi_transform_context base;
+   unsigned num_tmp;
+   unsigned num_out;
+   unsigned num_orig_out;
+   unsigned num_const;
+   unsigned num_imm;
+   unsigned point_size_in;          // point size input
+   unsigned point_size_out;         // point size output
+   unsigned point_size_tmp;         // point size temp
+   unsigned point_pos_in;           // point pos input
+   unsigned point_pos_out;          // point pos output
+   unsigned point_pos_sout;         // original point pos for streamout
+   unsigned point_pos_tmp;          // point pos temp
+   unsigned point_scale_tmp;        // point scale temp
+   unsigned point_color_out;        // point color output
+   unsigned point_color_tmp;        // point color temp
+   unsigned point_imm;              // point immediates
+   unsigned point_ivp;              // point inverseViewport constant
+   unsigned point_dir_swz[4];       // point direction swizzle
+   unsigned point_coord_swz[4];     // point coord swizzle
+   unsigned point_coord_enable;     // point coord enable mask
+   unsigned point_coord_decl;       // point coord output declared mask
+   unsigned point_coord_out;        // point coord output starting index
+   unsigned point_coord_aa;         // aa point coord semantic index
+   unsigned point_coord_k;          // aa point coord threshold distance
+   unsigned stream_out_point_pos:1; // set if to stream out original point pos
+   unsigned aa_point:1;             // set if doing aa point
+   unsigned out_tmp_index[PIPE_MAX_SHADER_OUTPUTS];
+   int max_generic;
+};
+
+static inline struct psprite_transform_context *
+psprite_transform_context(struct tgsi_transform_context *ctx)
+{
+   return (struct psprite_transform_context *) ctx;
+}
+
+
+/**
+ * TGSI declaration transform callback.
+ */
+static void
+psprite_decl(struct tgsi_transform_context *ctx,
+             struct tgsi_full_declaration *decl)
+{
+   struct psprite_transform_context *ts = psprite_transform_context(ctx);
+
+   if (decl->Declaration.File == TGSI_FILE_INPUT) {
+      if (decl->Semantic.Name == TGSI_SEMANTIC_PSIZE) {
+         ts->point_size_in = decl->Range.First;
+      }
+      else if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
+         ts->point_pos_in = decl->Range.First;
+      }
+   }
+   else if (decl->Declaration.File == TGSI_FILE_OUTPUT) {
+      if (decl->Semantic.Name == TGSI_SEMANTIC_PSIZE) {
+         ts->point_size_out = decl->Range.First;
+      }
+      else if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
+         ts->point_pos_out = decl->Range.First;
+      }
+      else if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC &&
+               decl->Semantic.Index < 32) {
+         ts->point_coord_decl |= 1 << decl->Semantic.Index;
+         ts->max_generic = MAX2(ts->max_generic, decl->Semantic.Index);
+      }
+      ts->num_out = MAX2(ts->num_out, decl->Range.Last + 1);
+   }
+   else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
+      ts->num_tmp = MAX2(ts->num_tmp, decl->Range.Last + 1);
+   }
+   else if (decl->Declaration.File == TGSI_FILE_CONSTANT) {
+      ts->num_const = MAX2(ts->num_const, decl->Range.Last + 1);
+   }
+
+   ctx->emit_declaration(ctx, decl);
+}
+
+/**
+ * TGSI immediate declaration transform callback.
+ */
+static void
+psprite_immediate(struct tgsi_transform_context *ctx,
+                  struct tgsi_full_immediate *imm)
+{
+   struct psprite_transform_context *ts = psprite_transform_context(ctx);
+
+   ctx->emit_immediate(ctx, imm);
+   ts->num_imm++;
+}
+
+
+/**
+ * TGSI transform prolog callback.
+ */
+static void
+psprite_prolog(struct tgsi_transform_context *ctx)
+{
+   struct psprite_transform_context *ts = psprite_transform_context(ctx);
+   unsigned point_coord_enable, en;
+   int i;
+
+   /* Replace output registers with temporary registers */
+   for (i = 0; i < ts->num_out; i++) {
+      ts->out_tmp_index[i] = ts->num_tmp++;
+   }
+   ts->num_orig_out = ts->num_out;
+
+   /* Declare a tmp register for point scale */
+   ts->point_scale_tmp = ts->num_tmp++;
+
+   if (ts->point_size_out != INVALID_INDEX)
+      ts->point_size_tmp = ts->out_tmp_index[ts->point_size_out];
+   else
+      ts->point_size_tmp = ts->num_tmp++;
+
+   assert(ts->point_pos_out != INVALID_INDEX);
+   ts->point_pos_tmp = ts->out_tmp_index[ts->point_pos_out];
+   ts->out_tmp_index[ts->point_pos_out] = INVALID_INDEX;
+
+   /* Declare one more tmp register for point coord threshold distance
+    * if we are generating anti-aliased point.
+    */
+   if (ts->aa_point)
+      ts->point_coord_k = ts->num_tmp++;
+
+   tgsi_transform_temps_decl(ctx, ts->point_size_tmp, ts->num_tmp-1);
+
+   /* Declare an extra output for the original point position for stream out */
+   if (ts->stream_out_point_pos) {
+      ts->point_pos_sout = ts->num_out++;
+      tgsi_transform_output_decl(ctx, ts->point_pos_sout,
+                                 TGSI_SEMANTIC_GENERIC, 0, 0);
+   }
+
+   /* point coord outputs to be declared */
+   point_coord_enable = ts->point_coord_enable & ~ts->point_coord_decl;
+
+   /* Declare outputs for those point coord that are enabled but are not
+    * already declared in this shader.
+    */
+   ts->point_coord_out = ts->num_out;
+   if (point_coord_enable) {
+      for (i = 0, en = point_coord_enable; en; en>>=1, i++) {
+         if (en & 0x1) {
+            tgsi_transform_output_decl(ctx, ts->num_out++,
+                                       TGSI_SEMANTIC_GENERIC, i, 0);
+            ts->max_generic = MAX2(ts->max_generic, i);
+         }
+      }
+   }
+
+   /* add an extra generic output for aa point texcoord */
+   if (ts->aa_point) {
+      ts->point_coord_aa = ts->max_generic + 1;
+      assert((ts->point_coord_enable & (1 << ts->point_coord_aa)) == 0);
+      ts->point_coord_enable |= 1 << (ts->point_coord_aa);
+      tgsi_transform_output_decl(ctx, ts->num_out++, TGSI_SEMANTIC_GENERIC,
+                                 ts->point_coord_aa, 0);
+   }
+
+   /* Declare extra immediates */
+   ts->point_imm = ts->num_imm;
+   tgsi_transform_immediate_decl(ctx, 0, 1, 0.5, -1);
+
+   /* Declare point constant -
+    * constant.xy -- inverseViewport
+    * constant.z -- current point size
+    * constant.w -- max point size
+    * The driver needs to add this constant to the constant buffer
+    */
+   ts->point_ivp = ts->num_const++;
+   tgsi_transform_const_decl(ctx, ts->point_ivp, ts->point_ivp);
+
+   /* If this geometry shader does not specify point size,
+    * get the current point size from the point constant.
+    */
+   if (ts->point_size_out == INVALID_INDEX) {
+      struct tgsi_full_instruction inst;
+
+      inst = tgsi_default_full_instruction();
+      inst.Instruction.Opcode = TGSI_OPCODE_MOV;
+      inst.Instruction.NumDstRegs = 1;
+      tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_TEMPORARY,
+                             ts->point_size_tmp, TGSI_WRITEMASK_XYZW);
+      inst.Instruction.NumSrcRegs = 1;
+      tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_CONSTANT,
+                             ts->point_ivp, TGSI_SWIZZLE_Z,
+                             TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z);
+      ctx->emit_instruction(ctx, &inst);
+   }
+}
+
+
+/**
+ * Add the point sprite emulation instructions at the emit vertex instruction
+ */
+static void
+psprite_emit_vertex_inst(struct tgsi_transform_context *ctx,
+                         struct tgsi_full_instruction *vert_inst)
+{
+   struct psprite_transform_context *ts = psprite_transform_context(ctx);
+   struct tgsi_full_instruction inst;
+   unsigned point_coord_enable, en;
+   unsigned i, j, s;
+
+   /* new point coord outputs */
+   point_coord_enable = ts->point_coord_enable & ~ts->point_coord_decl;
+
+   /* OUTPUT[pos_sout] = TEMP[pos] */
+   if (ts->point_pos_sout != INVALID_INDEX) {
+      tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV,
+                              TGSI_FILE_OUTPUT, ts->point_pos_sout,
+                              TGSI_WRITEMASK_XYZW,
+                              TGSI_FILE_TEMPORARY, ts->point_pos_tmp);
+   }
+
+   /**
+    * Set up the point scale vector
+    * scale = pointSize * pos.w * inverseViewport
+    */
+
+   /* MUL point_scale.x, point_size.x, point_pos.w */
+   tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MUL,
+                  TGSI_FILE_TEMPORARY, ts->point_scale_tmp, TGSI_WRITEMASK_X,
+                  TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_SWIZZLE_X,
+                  TGSI_FILE_TEMPORARY, ts->point_pos_tmp, TGSI_SWIZZLE_W);
+
+   /* MUL point_scale.xy, point_scale.xx, inverseViewport.xy */
+   inst = tgsi_default_full_instruction();
+   inst.Instruction.Opcode = TGSI_OPCODE_MUL;
+   inst.Instruction.NumDstRegs = 1;
+   tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_TEMPORARY,
+                          ts->point_scale_tmp, TGSI_WRITEMASK_XY);
+   inst.Instruction.NumSrcRegs = 2;
+   tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_TEMPORARY,
+                          ts->point_scale_tmp, TGSI_SWIZZLE_X,
+                          TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
+   tgsi_transform_src_reg(&inst.Src[1], TGSI_FILE_CONSTANT,
+                          ts->point_ivp, TGSI_SWIZZLE_X,
+                          TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z);
+   ctx->emit_instruction(ctx, &inst);
+
+   /**
+    * Set up the point coord threshold distance
+    * k = 0.5 - 1 / pointsize
+    */
+   if (ts->aa_point) {
+      tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_DIV,
+                                  TGSI_FILE_TEMPORARY, ts->point_coord_k,
+                                  TGSI_WRITEMASK_X,
+                                  TGSI_FILE_IMMEDIATE, ts->point_imm,
+                                  TGSI_SWIZZLE_Y,
+                                  TGSI_FILE_TEMPORARY, ts->point_size_tmp,
+                                  TGSI_SWIZZLE_X);
+
+      tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SUB,
+                                  TGSI_FILE_TEMPORARY, ts->point_coord_k,
+                                  TGSI_WRITEMASK_X,
+                                  TGSI_FILE_IMMEDIATE, ts->point_imm,
+                                  TGSI_SWIZZLE_Z,
+                                  TGSI_FILE_TEMPORARY, ts->point_coord_k,
+                                  TGSI_SWIZZLE_X);
+   }
+
+
+   for (i = 0; i < 4; i++) {
+      unsigned point_dir_swz = ts->point_dir_swz[i];
+      unsigned point_coord_swz = ts->point_coord_swz[i];
+
+      /* All outputs need to be emitted for each vertex */
+      for (j = 0; j < ts->num_orig_out; j++) {
+         if (ts->out_tmp_index[j] != INVALID_INDEX) {
+            tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV,
+                                    TGSI_FILE_OUTPUT, j,
+                                    TGSI_WRITEMASK_XYZW,
+                                    TGSI_FILE_TEMPORARY, ts->out_tmp_index[j]);
+         }
+      }
+
+      /* pos = point_scale * point_dir + point_pos */
+      inst = tgsi_default_full_instruction();
+      inst.Instruction.Opcode = TGSI_OPCODE_MAD;
+      inst.Instruction.NumDstRegs = 1;
+      tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_OUTPUT, ts->point_pos_out,
+                             TGSI_WRITEMASK_XYZW);
+      inst.Instruction.NumSrcRegs = 3;
+      tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_TEMPORARY, ts->point_scale_tmp,
+                             TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X,
+                             TGSI_SWIZZLE_X);
+      tgsi_transform_src_reg(&inst.Src[1], TGSI_FILE_IMMEDIATE, ts->point_imm,
+                             get_swizzle(point_dir_swz, 0),
+                             get_swizzle(point_dir_swz, 1),
+                             get_swizzle(point_dir_swz, 2),
+                             get_swizzle(point_dir_swz, 3));
+      tgsi_transform_src_reg(&inst.Src[2], TGSI_FILE_TEMPORARY, ts->point_pos_tmp,
+                             TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z,
+                             TGSI_SWIZZLE_W);
+      ctx->emit_instruction(ctx, &inst);
+
+      /* point coord */
+      for (j = 0, s = 0, en = point_coord_enable; en; en>>=1, s++) {
+         unsigned dstReg;
+
+         if (en & 0x1) {
+            dstReg = ts->point_coord_out + j;
+
+            inst = tgsi_default_full_instruction();
+            inst.Instruction.Opcode = TGSI_OPCODE_MOV;
+            inst.Instruction.NumDstRegs = 1;
+            tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_OUTPUT,
+                                   dstReg, TGSI_WRITEMASK_XYZW);
+            inst.Instruction.NumSrcRegs = 1;
+            tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_IMMEDIATE, ts->point_imm,
+                                   get_swizzle(point_coord_swz, 0),
+                                   get_swizzle(point_coord_swz, 1),
+                                   get_swizzle(point_coord_swz, 2),
+                                   get_swizzle(point_coord_swz, 3));
+            ctx->emit_instruction(ctx, &inst);
+
+            /* MOV point_coord.z  point_coord_k.x */
+            if (s == ts->point_coord_aa) {
+               tgsi_transform_op1_swz_inst(ctx, TGSI_OPCODE_MOV,
+                                           TGSI_FILE_OUTPUT, dstReg, TGSI_WRITEMASK_Z,
+                                           TGSI_FILE_TEMPORARY, ts->point_coord_k,
+                                           TGSI_SWIZZLE_X);
+            }
+            j++;  /* the next point coord output offset */
+         }
+      }
+
+      /* Emit the EMIT instruction for each vertex of the quad */
+      ctx->emit_instruction(ctx, vert_inst);
+   }
+
+   /* Emit the ENDPRIM instruction for the quad */
+   inst = tgsi_default_full_instruction();
+   inst.Instruction.Opcode = TGSI_OPCODE_ENDPRIM;
+   inst.Instruction.NumDstRegs = 0;
+   inst.Instruction.NumSrcRegs = 1;
+   inst.Src[0] = vert_inst->Src[0];
+   ctx->emit_instruction(ctx, &inst);
+}
+
+
+/**
+ * TGSI instruction transform callback.
+ */
+static void
+psprite_inst(struct tgsi_transform_context *ctx,
+             struct tgsi_full_instruction *inst)
+{
+   struct psprite_transform_context *ts = psprite_transform_context(ctx);
+
+   if (inst->Instruction.Opcode == TGSI_OPCODE_EMIT) {
+      psprite_emit_vertex_inst(ctx, inst);
+   }
+   else if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT &&
+            inst->Dst[0].Register.Index == ts->point_size_out) {
+      /**
+       * Replace point size output reg with tmp reg.
+       * The tmp reg will be later used as a src reg for computing
+       * the point scale factor.
+       */
+      inst->Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+      inst->Dst[0].Register.Index = ts->point_size_tmp;
+      ctx->emit_instruction(ctx, inst);
+
+      /* Clamp the point size */
+      /* MAX point_size_tmp.x, point_size_tmp.x, point_imm.y */
+      tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MAX,
+                 TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_WRITEMASK_X,
+                 TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_SWIZZLE_X,
+                 TGSI_FILE_IMMEDIATE, ts->point_imm, TGSI_SWIZZLE_Y);
+
+      /* MIN point_size_tmp.x, point_size_tmp.x, point_ivp.w */
+      tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MIN,
+                 TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_WRITEMASK_X,
+                 TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_SWIZZLE_X,
+                 TGSI_FILE_CONSTANT, ts->point_ivp, TGSI_SWIZZLE_W);
+   }
+   else if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT &&
+            inst->Dst[0].Register.Index == ts->point_pos_out) {
+      /**
+       * Replace point pos output reg with tmp reg.
+       */
+      inst->Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+      inst->Dst[0].Register.Index = ts->point_pos_tmp;
+      ctx->emit_instruction(ctx, inst);
+   }
+   else if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) {
+      /**
+       * Replace output reg with tmp reg.
+       */
+      inst->Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+      inst->Dst[0].Register.Index = ts->out_tmp_index[inst->Dst[0].Register.Index];
+      ctx->emit_instruction(ctx, inst);
+   }
+   else {
+      ctx->emit_instruction(ctx, inst);
+   }
+}
+
+
+/**
+ * TGSI property instruction transform callback.
+ * Transforms a point into a 4-vertex triangle strip.
+ */
+static void
+psprite_property(struct tgsi_transform_context *ctx,
+                 struct tgsi_full_property *prop)
+{
+   switch (prop->Property.PropertyName) {
+   case TGSI_PROPERTY_GS_OUTPUT_PRIM:
+       prop->u[0].Data = PIPE_PRIM_TRIANGLE_STRIP;
+       break;
+   case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
+       prop->u[0].Data *= 4;
+       break;
+   default:
+       break;
+   }
+   ctx->emit_property(ctx, prop);
+}
+
+/**
+ * TGSI utility to transform a geometry shader to support point sprite.
+ */
+struct tgsi_token *
+tgsi_add_point_sprite(const struct tgsi_token *tokens_in,
+                      const unsigned point_coord_enable,
+                      const bool sprite_origin_lower_left,
+                      const bool stream_out_point_pos,
+                      int *aa_point_coord_index)
+{
+   struct psprite_transform_context transform;
+   const uint num_new_tokens = 200; /* should be enough */
+   const uint new_len = tgsi_num_tokens(tokens_in) + num_new_tokens;
+   struct tgsi_token *new_tokens;
+
+   /* setup transformation context */
+   memset(&transform, 0, sizeof(transform));
+   transform.base.transform_declaration = psprite_decl;
+   transform.base.transform_instruction = psprite_inst;
+   transform.base.transform_property = psprite_property;
+   transform.base.transform_immediate = psprite_immediate;
+   transform.base.prolog = psprite_prolog;
+
+   transform.point_size_in = INVALID_INDEX;
+   transform.point_size_out = INVALID_INDEX;
+   transform.point_size_tmp = INVALID_INDEX;
+   transform.point_pos_in = INVALID_INDEX;
+   transform.point_pos_out = INVALID_INDEX;
+   transform.point_pos_sout = INVALID_INDEX;
+   transform.point_pos_tmp = INVALID_INDEX;
+   transform.point_scale_tmp = INVALID_INDEX;
+   transform.point_imm = INVALID_INDEX;
+   transform.point_coord_aa = INVALID_INDEX;
+   transform.point_coord_k = INVALID_INDEX;
+
+   transform.stream_out_point_pos = stream_out_point_pos;
+   transform.point_coord_enable = point_coord_enable;
+   transform.aa_point = aa_point_coord_index != NULL;
+   transform.max_generic = -1;
+
+   /* point sprite directions based on the immediates (0, 1, 0.5, -1) */
+   /* (-1, -1, 0, 0) */
+   transform.point_dir_swz[0] = set_swizzle(-1, -1, 0, 0);
+   /* (-1, 1, 0, 0) */
+   transform.point_dir_swz[1] = set_swizzle(-1, 1, 0, 0);
+   /* (1, -1, 0, 0) */
+   transform.point_dir_swz[2] = set_swizzle(1, -1, 0, 0);
+   /* (1, 1, 0, 0) */
+   transform.point_dir_swz[3] = set_swizzle(1, 1, 0, 0);
+
+   /* point coord based on the immediates (0, 1, 0, -1) */
+   if (sprite_origin_lower_left) {
+      /* (0, 0, 0, 1) */
+      transform.point_coord_swz[0] = set_swizzle(0, 0, 0, 1);
+      /* (0, 1, 0, 1) */
+      transform.point_coord_swz[1] = set_swizzle(0, 1, 0, 1);
+      /* (1, 0, 0, 1) */
+      transform.point_coord_swz[2] = set_swizzle(1, 0, 0, 1);
+      /* (1, 1, 0, 1) */
+      transform.point_coord_swz[3] = set_swizzle(1, 1, 0, 1);
+   }
+   else {
+      /* (0, 1, 0, 1) */
+      transform.point_coord_swz[0] = set_swizzle(0, 1, 0, 1);
+      /* (0, 0, 0, 1) */
+      transform.point_coord_swz[1] = set_swizzle(0, 0, 0, 1);
+      /* (1, 1, 0, 1) */
+      transform.point_coord_swz[2] = set_swizzle(1, 1, 0, 1);
+      /* (1, 0, 0, 1) */
+      transform.point_coord_swz[3] = set_swizzle(1, 0, 0, 1);
+   }
+
+
+   /* allocate new tokens buffer */
+   new_tokens = tgsi_alloc_tokens(new_len);
+   if (!new_tokens)
+      return NULL;
+
+   /* transform the shader */
+   tgsi_transform_shader(tokens_in, new_tokens, new_len, &transform.base);
+
+   if (aa_point_coord_index)
+      *aa_point_coord_index = transform.point_coord_aa;
+
+   return new_tokens;
+}
--- a/src/gallium/auxiliary/tgsi/tgsi_point_sprite.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_point_sprite.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2014 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef TGSI_POINT_SPRITE_H
+#define TGSI_POINT_SPRITE_H
+
+struct tgsi_token;
+
+struct tgsi_token *
+tgsi_add_point_sprite(const struct tgsi_token *tokens_in,
+                      const unsigned point_coord_enable,
+                      const bool sprite_origin_lower_left,
+                      const bool stream_out_point_pos,
+                      int *aa_point_coord_index);
+
+#endif /* TGSI_POINT_SPRITE_H */
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -56,6 +56,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
 {
   uint procType, i;
   struct tgsi_parse_context parse;
+   unsigned current_depth = 0;

   memset(info, 0, sizeof(*info));
   for (i = 0; i < TGSI_FILE_COUNT; i++)
@@ -100,6 +101,72 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
            assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST);
            info->opcode_count[fullinst->Instruction.Opcode]++;

+            switch (fullinst->Instruction.Opcode) {
+            case TGSI_OPCODE_IF:
+            case TGSI_OPCODE_UIF:
+            case TGSI_OPCODE_BGNLOOP:
+               current_depth++;
+               info->max_depth = MAX2(info->max_depth, current_depth);
+               break;
+            case TGSI_OPCODE_ENDIF:
+            case TGSI_OPCODE_ENDLOOP:
+               current_depth--;
+               break;
+            default:
+               break;
+            }
+
+            if (fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID ||
+                fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
+                fullinst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
+               const struct tgsi_full_src_register *src0 = &fullinst->Src[0];
+               unsigned input;
+
+               if (src0->Register.Indirect && src0->Indirect.ArrayID)
+                  input = info->input_array_first[src0->Indirect.ArrayID];
+               else
+                  input = src0->Register.Index;
+
+               /* For the INTERP opcodes, the interpolation is always
+                * PERSPECTIVE unless LINEAR is specified.
+                */
+               switch (info->input_interpolate[input]) {
+               case TGSI_INTERPOLATE_COLOR:
+               case TGSI_INTERPOLATE_CONSTANT:
+               case TGSI_INTERPOLATE_PERSPECTIVE:
+                  switch (fullinst->Instruction.Opcode) {
+                  case TGSI_OPCODE_INTERP_CENTROID:
+                     info->uses_persp_opcode_interp_centroid = true;
+                     break;
+                  case TGSI_OPCODE_INTERP_OFFSET:
+                     info->uses_persp_opcode_interp_offset = true;
+                     break;
+                  case TGSI_OPCODE_INTERP_SAMPLE:
+                     info->uses_persp_opcode_interp_sample = true;
+                     break;
+                  }
+                  break;
+
+               case TGSI_INTERPOLATE_LINEAR:
+                  switch (fullinst->Instruction.Opcode) {
+                  case TGSI_OPCODE_INTERP_CENTROID:
+                     info->uses_linear_opcode_interp_centroid = true;
+                     break;
+                  case TGSI_OPCODE_INTERP_OFFSET:
+                     info->uses_linear_opcode_interp_offset = true;
+                     break;
+                  case TGSI_OPCODE_INTERP_SAMPLE:
+                     info->uses_linear_opcode_interp_sample = true;
+                     break;
+                  }
+                  break;
+               }
+            }
+
+            if (fullinst->Instruction.Opcode >= TGSI_OPCODE_F2D &&
+                fullinst->Instruction.Opcode <= TGSI_OPCODE_DSSG)
+               info->uses_doubles = true;
+
            for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) {
               const struct tgsi_full_src_register *src =
                  &fullinst->Src[i];
@@ -216,8 +283,48 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
                  info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Interp.CylindricalWrap;
                  info->num_inputs++;

-                  if (fulldecl->Interp.Location == TGSI_INTERPOLATE_LOC_CENTROID)
-                     info->uses_centroid = TRUE;
+                  /* Only interpolated varyings. Don't include POSITION.
+                   * Don't include integer varyings, because they are not
+                   * interpolated.
+                   */
+                  if (semName == TGSI_SEMANTIC_GENERIC ||
+                      semName == TGSI_SEMANTIC_TEXCOORD ||
+                      semName == TGSI_SEMANTIC_COLOR ||
+                      semName == TGSI_SEMANTIC_BCOLOR ||
+                      semName == TGSI_SEMANTIC_FOG ||
+                      semName == TGSI_SEMANTIC_CLIPDIST ||
+                      semName == TGSI_SEMANTIC_CULLDIST) {
+                     switch (fulldecl->Interp.Interpolate) {
+                     case TGSI_INTERPOLATE_COLOR:
+                     case TGSI_INTERPOLATE_PERSPECTIVE:
+                        switch (fulldecl->Interp.Location) {
+                        case TGSI_INTERPOLATE_LOC_CENTER:
+                           info->uses_persp_center = true;
+                           break;
+                        case TGSI_INTERPOLATE_LOC_CENTROID:
+                           info->uses_persp_centroid = true;
+                           break;
+                        case TGSI_INTERPOLATE_LOC_SAMPLE:
+                           info->uses_persp_sample = true;
+                           break;
+                        }
+                        break;
+                     case TGSI_INTERPOLATE_LINEAR:
+                        switch (fulldecl->Interp.Location) {
+                        case TGSI_INTERPOLATE_LOC_CENTER:
+                           info->uses_linear_center = true;
+                           break;
+                        case TGSI_INTERPOLATE_LOC_CENTROID:
+                           info->uses_linear_centroid = true;
+                           break;
+                        case TGSI_INTERPOLATE_LOC_SAMPLE:
+                           info->uses_linear_sample = true;
+                           break;
+                        }
+                        break;
+                     /* TGSI_INTERPOLATE_CONSTANT doesn't do any interpolation. */
+                     }
+                  }

                  if (semName == TGSI_SEMANTIC_PRIMID)
                     info->uses_primid = TRUE;
@@ -302,6 +409,8 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
                        info->writes_edgeflag = TRUE;
                     }
                  }
+               } else if (file == TGSI_FILE_SAMPLER) {
+                  info->samplers_declared |= 1 << reg;
               }
            }
         }
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
@@ -64,6 +64,7 @@ struct tgsi_shader_info
   uint file_count[TGSI_FILE_COUNT];  /**< number of declared registers */
   int file_max[TGSI_FILE_COUNT];  /**< highest index of declared registers */
   int const_file_max[PIPE_MAX_CONSTANT_BUFFERS];
+   unsigned samplers_declared; /**< bitmask of declared samplers */

   ubyte input_array_first[PIPE_MAX_SHADER_INPUTS];
   ubyte input_array_last[PIPE_MAX_SHADER_INPUTS];
@@ -82,7 +83,18 @@ struct tgsi_shader_info
   boolean writes_stencil; /**< does fragment shader write stencil value? */
   boolean writes_edgeflag; /**< vertex shader outputs edgeflag */
   boolean uses_kill;  /**< KILL or KILL_IF instruction used? */
-   boolean uses_centroid;
+   boolean uses_persp_center;
+   boolean uses_persp_centroid;
+   boolean uses_persp_sample;
+   boolean uses_linear_center;
+   boolean uses_linear_centroid;
+   boolean uses_linear_sample;
+   boolean uses_persp_opcode_interp_centroid;
+   boolean uses_persp_opcode_interp_offset;
+   boolean uses_persp_opcode_interp_sample;
+   boolean uses_linear_opcode_interp_centroid;
+   boolean uses_linear_opcode_interp_offset;
+   boolean uses_linear_opcode_interp_sample;
   boolean uses_instanceid;
   boolean uses_vertexid;
   boolean uses_vertexid_nobase;
@@ -95,7 +107,7 @@ struct tgsi_shader_info
   boolean writes_viewport_index;
   boolean writes_layer;
   boolean is_msaa_sampler[PIPE_MAX_SAMPLERS];
-
+   boolean uses_doubles; /**< uses any of the double instructions */
   unsigned clipdist_writemask;
   unsigned culldist_writemask;
   unsigned num_written_culldistance;
@@ -113,6 +125,11 @@ struct tgsi_shader_info
   unsigned indirect_files_written;

   unsigned properties[TGSI_PROPERTY_COUNT]; /* index with TGSI_PROPERTY_ */
+
+   /**
+    * Max nesting limit of loops/if's
+    */
+   unsigned max_depth;
 };

 extern void
--- a/src/gallium/auxiliary/tgsi/tgsi_transform.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_transform.h
@@ -95,19 +95,38 @@ struct tgsi_transform_context
 * Helper for emitting temporary register declarations.
 */
 static inline void
-tgsi_transform_temp_decl(struct tgsi_transform_context *ctx,
-                         unsigned index)
+tgsi_transform_temps_decl(struct tgsi_transform_context *ctx,
+                          unsigned firstIdx, unsigned lastIdx)
 {
   struct tgsi_full_declaration decl;

   decl = tgsi_default_full_declaration();
   decl.Declaration.File = TGSI_FILE_TEMPORARY;
-   decl.Range.First =
-   decl.Range.Last = index;
+   decl.Range.First = firstIdx;
+   decl.Range.Last = lastIdx;
   ctx->emit_declaration(ctx, &decl);
 }

+static inline void
+tgsi_transform_temp_decl(struct tgsi_transform_context *ctx,
+                         unsigned index)
+{
+   tgsi_transform_temps_decl(ctx, index, index);
+}

+static inline void
+tgsi_transform_const_decl(struct tgsi_transform_context *ctx,
+                          unsigned firstIdx, unsigned lastIdx)
+{
+   struct tgsi_full_declaration decl;
+
+   decl = tgsi_default_full_declaration();
+   decl.Declaration.File = TGSI_FILE_CONSTANT;
+   decl.Range.First = firstIdx;
+   decl.Range.Last = lastIdx;
+   ctx->emit_declaration(ctx, &decl);
+}
+ 
 static inline void
 tgsi_transform_input_decl(struct tgsi_transform_context *ctx,
                          unsigned index,
@@ -129,6 +148,26 @@ tgsi_transform_input_decl(struct tgsi_transform_context *ctx,
   ctx->emit_declaration(ctx, &decl);
 }

+static inline void
+tgsi_transform_output_decl(struct tgsi_transform_context *ctx,
+                          unsigned index,
+                          unsigned sem_name, unsigned sem_index,
+                          unsigned interp)
+{
+   struct tgsi_full_declaration decl;
+
+   decl = tgsi_default_full_declaration();
+   decl.Declaration.File = TGSI_FILE_OUTPUT;
+   decl.Declaration.Interpolate = 1;
+   decl.Declaration.Semantic = 1;
+   decl.Semantic.Name = sem_name;
+   decl.Semantic.Index = sem_index;
+   decl.Range.First =
+   decl.Range.Last = index;
+   decl.Interp.Interpolate = interp;
+
+   ctx->emit_declaration(ctx, &decl);
+}

 static inline void
 tgsi_transform_sampler_decl(struct tgsi_transform_context *ctx,
@@ -182,6 +221,28 @@ tgsi_transform_immediate_decl(struct tgsi_transform_context *ctx,
   ctx->emit_immediate(ctx, &immed);
 }

+static inline void
+tgsi_transform_dst_reg(struct tgsi_full_dst_register *reg,
+                       unsigned file, unsigned index, unsigned writemask)
+{
+   reg->Register.File = file;
+   reg->Register.Index = index;
+   reg->Register.WriteMask = writemask;
+}
+
+static inline void
+tgsi_transform_src_reg(struct tgsi_full_src_register *reg,
+                       unsigned file, unsigned index, 
+                       unsigned swizzleX, unsigned swizzleY,
+                       unsigned swizzleZ, unsigned swizzleW)
+{
+   reg->Register.File = file;
+   reg->Register.Index = index;
+   reg->Register.SwizzleX = swizzleX; 
+   reg->Register.SwizzleY = swizzleY; 
+   reg->Register.SwizzleZ = swizzleZ; 
+   reg->Register.SwizzleW = swizzleW; 
+}

 /**
 * Helper for emitting 1-operand instructions.
@@ -399,7 +460,8 @@ static inline void
 tgsi_transform_kill_inst(struct tgsi_transform_context *ctx,
                         unsigned src_file,
                         unsigned src_index,
-                         unsigned src_swizzle)
+                         unsigned src_swizzle,
+                         boolean negate)
 {
   struct tgsi_full_instruction inst;

@@ -413,7 +475,7 @@ tgsi_transform_kill_inst(struct tgsi_transform_context *ctx,
   inst.Src[0].Register.SwizzleY =
   inst.Src[0].Register.SwizzleZ =
   inst.Src[0].Register.SwizzleW = src_swizzle;
-   inst.Src[0].Register.Negate = 1;
+   inst.Src[0].Register.Negate = negate;

   ctx->emit_instruction(ctx, &inst);
 }
--- a/src/gallium/auxiliary/tgsi/tgsi_two_side.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_two_side.c
@@ -0,0 +1,228 @@
+/*
+ * Copyright 2013 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/**
+ * This utility transforms fragment shaders to facilitate two-sided lighting.
+ *
+ * Basically, if the FS has any color inputs (TGSI_SEMANTIC_COLOR) we'll:
+ * 1. create corresponding back-color inputs (TGSI_SEMANTIC_BCOLOR)
+ * 2. use the FACE register to choose between front/back colors and put the
+ *    selected color in new temp regs.
+ * 3. replace reads of the original color inputs with the new temp regs.
+ *
+ * Then, the driver just needs to link the VS front/back output colors to
+ * the FS front/back input colors.
+ */
+
+#include "util/u_debug.h"
+#include "util/u_math.h"
+#include "tgsi_info.h"
+#include "tgsi_two_side.h"
+#include "tgsi_transform.h"
+
+
+#define INVALID_INDEX 9999
+
+
+struct two_side_transform_context
+{
+   struct tgsi_transform_context base;
+   uint num_temps;
+   uint num_inputs;
+   uint face_input;           /**< index of the FACE input */
+   uint front_color_input[2]; /**< INPUT regs */
+   uint front_color_interp[2];/**< TGSI_INTERPOLATE_x */
+   uint back_color_input[2];  /**< INPUT regs */
+   uint new_colors[2];        /**< TEMP regs */
+};
+
+
+static inline struct two_side_transform_context *
+two_side_transform_context(struct tgsi_transform_context *ctx)
+{
+   return (struct two_side_transform_context *) ctx;
+}
+
+
+static void
+xform_decl(struct tgsi_transform_context *ctx,
+           struct tgsi_full_declaration *decl)
+{
+   struct two_side_transform_context *ts = two_side_transform_context(ctx);
+
+   if (decl->Declaration.File == TGSI_FILE_INPUT) {
+      if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR) {
+         /* found a front color */
+         assert(decl->Semantic.Index < 2);
+         ts->front_color_input[decl->Semantic.Index] = decl->Range.First;
+         ts->front_color_interp[decl->Semantic.Index] = decl->Interp.Interpolate;
+      }
+      else if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
+         ts->face_input = decl->Range.First;
+      }
+      ts->num_inputs = MAX2(ts->num_inputs, decl->Range.Last + 1);
+   }
+   else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
+      ts->num_temps = MAX2(ts->num_temps, decl->Range.Last + 1);
+   }
+
+   ctx->emit_declaration(ctx, decl);
+}
+
+
+static void
+emit_prolog(struct tgsi_transform_context *ctx)
+{
+   struct two_side_transform_context *ts = two_side_transform_context(ctx);
+   struct tgsi_full_declaration decl;
+   struct tgsi_full_instruction inst;
+   uint num_colors = 0;
+   uint i;
+
+   /* Declare 0, 1 or 2 new BCOLOR inputs */
+   for (i = 0; i < 2; i++) {
+      if (ts->front_color_input[i] != INVALID_INDEX) {
+         decl = tgsi_default_full_declaration();
+         decl.Declaration.File = TGSI_FILE_INPUT;
+         decl.Declaration.Interpolate = 1;
+         decl.Declaration.Semantic = 1;
+         decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR;
+         decl.Semantic.Index = i;
+         decl.Range.First = decl.Range.Last = ts->num_inputs++;
+         decl.Interp.Interpolate = ts->front_color_interp[i];
+         ctx->emit_declaration(ctx, &decl);
+         ts->back_color_input[i] = decl.Range.First;
+         num_colors++;
+      }
+   }
+
+   if (num_colors > 0) {
+      /* Declare 1 or 2 temp registers */
+      decl = tgsi_default_full_declaration();
+      decl.Declaration.File = TGSI_FILE_TEMPORARY;
+      decl.Range.First = ts->num_temps;
+      decl.Range.Last = ts->num_temps + num_colors - 1;
+      ctx->emit_declaration(ctx, &decl);
+      ts->new_colors[0] = ts->num_temps;
+      ts->new_colors[1] = ts->num_temps + 1;
+
+      if (ts->face_input == INVALID_INDEX) {
+         /* declare FACE INPUT register */
+         decl = tgsi_default_full_declaration();
+         decl.Declaration.File = TGSI_FILE_INPUT;
+         decl.Declaration.Semantic = 1;
+         decl.Semantic.Name = TGSI_SEMANTIC_FACE;
+         decl.Semantic.Index = 0;
+         decl.Range.First = decl.Range.Last = ts->num_inputs++;
+         ctx->emit_declaration(ctx, &decl);
+         ts->face_input = decl.Range.First;
+      }
+
+      /* CMP temp[c0], face, bcolor[c0], fcolor[c0]
+       * temp[c0] = face < 0.0 ? bcolor[c0] : fcolor[c0]
+       */
+      for (i = 0; i < 2; i++) {
+         if (ts->front_color_input[i] != INVALID_INDEX) {
+            inst = tgsi_default_full_instruction();
+            inst.Instruction.Opcode = TGSI_OPCODE_CMP;
+            inst.Instruction.NumDstRegs = 1;
+            inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+            inst.Dst[0].Register.Index = ts->new_colors[i];
+            inst.Instruction.NumSrcRegs = 3;
+            inst.Src[0].Register.File = TGSI_FILE_INPUT;
+            inst.Src[0].Register.Index = ts->face_input;
+            inst.Src[1].Register.File = TGSI_FILE_INPUT;
+            inst.Src[1].Register.Index = ts->back_color_input[i];
+            inst.Src[2].Register.File = TGSI_FILE_INPUT;
+            inst.Src[2].Register.Index = ts->front_color_input[i];
+
+            ctx->emit_instruction(ctx, &inst);
+         }
+      }
+   }
+}
+
+
+static void
+xform_inst(struct tgsi_transform_context *ctx,
+           struct tgsi_full_instruction *inst)
+{
+   struct two_side_transform_context *ts = two_side_transform_context(ctx);
+   const struct tgsi_opcode_info *info =
+      tgsi_get_opcode_info(inst->Instruction.Opcode);
+   uint i, j;
+
+   /* Look for src regs which reference the input color and replace
+    * them with the temp color.
+    */
+   for (i = 0; i < info->num_src; i++) {
+      if (inst->Src[i].Register.File == TGSI_FILE_INPUT) {
+         for (j = 0; j < 2; j++) {
+            if (inst->Src[i].Register.Index == ts->front_color_input[j]) {
+               /* replace color input with temp reg */
+               inst->Src[i].Register.File = TGSI_FILE_TEMPORARY;
+               inst->Src[i].Register.Index = ts->new_colors[j];
+               break;
+            }
+         }
+      }
+   }
+
+   ctx->emit_instruction(ctx, inst);
+}
+
+
+struct tgsi_token *
+tgsi_add_two_side(const struct tgsi_token *tokens_in)
+{
+   struct two_side_transform_context transform;
+   const uint num_new_tokens = 100; /* should be enough */
+   const uint new_len = tgsi_num_tokens(tokens_in) + num_new_tokens;
+   struct tgsi_token *new_tokens;
+
+   /* setup transformation context */
+   memset(&transform, 0, sizeof(transform));
+   transform.base.transform_declaration = xform_decl;
+   transform.base.transform_instruction = xform_inst;
+   transform.base.prolog = emit_prolog;
+   transform.face_input = INVALID_INDEX;
+   transform.front_color_input[0] = INVALID_INDEX;
+   transform.front_color_input[1] = INVALID_INDEX;
+   transform.front_color_interp[0] = TGSI_INTERPOLATE_COLOR;
+   transform.front_color_interp[1] = TGSI_INTERPOLATE_COLOR;
+   transform.back_color_input[0] = INVALID_INDEX;
+   transform.back_color_input[1] = INVALID_INDEX;
+
+   /* allocate new tokens buffer */
+   new_tokens = tgsi_alloc_tokens(new_len);
+   if (!new_tokens)
+      return NULL;
+
+   /* transform the shader */
+   tgsi_transform_shader(tokens_in, new_tokens, new_len, &transform.base);
+
+   return new_tokens;
+}
--- a/src/gallium/auxiliary/tgsi/tgsi_two_side.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_two_side.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2013 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef TGSI_TWO_SIDE_H
+#define TGSI_TWO_SIDE_H
+
+struct tgsi_token;
+
+struct tgsi_token *
+tgsi_add_two_side(const struct tgsi_token *tokens_in);
+
+#endif /* TGSI_TWO_SIDE_H */
--- a/src/gallium/auxiliary/tgsi/tgsi_util.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.c
@@ -462,3 +462,21 @@ tgsi_util_get_texture_coord_dim(int tgsi_tex, int *shadow_or_sample)

   return dim;
 }
+
+
+boolean
+tgsi_is_shadow_target(unsigned target)
+{
+   switch (target) {
+   case TGSI_TEXTURE_SHADOW1D:
+   case TGSI_TEXTURE_SHADOW2D:
+   case TGSI_TEXTURE_SHADOWRECT:
+   case TGSI_TEXTURE_SHADOW1D_ARRAY:
+   case TGSI_TEXTURE_SHADOW2D_ARRAY:
+   case TGSI_TEXTURE_SHADOWCUBE:
+   case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
+      return TRUE;
+   default:
+      return FALSE;
+   }
+}
--- a/src/gallium/auxiliary/tgsi/tgsi_util.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.h
@@ -82,6 +82,9 @@ tgsi_util_get_src_from_ind(const struct tgsi_ind_register *reg);
 int
 tgsi_util_get_texture_coord_dim(int tgsi_tex, int *shadow_or_sample);

+boolean
+tgsi_is_shadow_target(unsigned target);
+
 #if defined __cplusplus
 }
 #endif
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -1190,6 +1190,8 @@ static void blitter_draw(struct blitter_context_priv *ctx,

   u_upload_data(ctx->upload, 0, sizeof(ctx->vertices), ctx->vertices,
                 &vb.buffer_offset, &vb.buffer);
+   if (!vb.buffer)
+      return;
   u_upload_unmap(ctx->upload);

   pipe->set_vertex_buffers(pipe, ctx->base.vb_slot, 1, &vb);
@@ -2063,7 +2065,7 @@ void util_blitter_clear_buffer(struct blitter_context *blitter,
   struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
   struct pipe_context *pipe = ctx->base.pipe;
   struct pipe_vertex_buffer vb = {0};
-   struct pipe_stream_output_target *so_target;
+   struct pipe_stream_output_target *so_target = NULL;
   unsigned offsets[PIPE_MAX_SO_BUFFERS] = {0};

   assert(num_channels >= 1);
@@ -2089,6 +2091,9 @@ void util_blitter_clear_buffer(struct blitter_context *blitter,

   u_upload_data(ctx->upload, 0, num_channels*4, clear_value,
                 &vb.buffer_offset, &vb.buffer);
+   if (!vb.buffer)
+      goto out;
+
   vb.stride = 0;

   blitter_set_running_flag(ctx);
@@ -2112,6 +2117,7 @@ void util_blitter_clear_buffer(struct blitter_context *blitter,

   util_draw_arrays(pipe, PIPE_PRIM_POINTS, 0, size / 4);

+out:
   blitter_restore_vertex_states(ctx);
   blitter_restore_render_cond(ctx);
   blitter_unset_running_flag(ctx);
--- a/src/gallium/auxiliary/util/u_blitter.h
+++ b/src/gallium/auxiliary/util/u_blitter.h
@@ -372,30 +372,28 @@ void util_blitter_custom_resolve_color(struct blitter_context *blitter,
 *
 * States not listed here are not affected by util_blitter. */

-static inline
-void util_blitter_save_blend(struct blitter_context *blitter,
-                             void *state)
+static inline void
+util_blitter_save_blend(struct blitter_context *blitter, void *state)
 {
   blitter->saved_blend_state = state;
 }

-static inline
-void util_blitter_save_depth_stencil_alpha(struct blitter_context *blitter,
-                                           void *state)
+static inline void
+util_blitter_save_depth_stencil_alpha(struct blitter_context *blitter,
+                                      void *state)
 {
   blitter->saved_dsa_state = state;
 }

-static inline
-void util_blitter_save_vertex_elements(struct blitter_context *blitter,
-                                       void *state)
+static inline void
+util_blitter_save_vertex_elements(struct blitter_context *blitter, void *state)
 {
   blitter->saved_velem_state = state;
 }

-static inline
-void util_blitter_save_stencil_ref(struct blitter_context *blitter,
-                                   const struct pipe_stencil_ref *state)
+static inline void
+util_blitter_save_stencil_ref(struct blitter_context *blitter,
+                              const struct pipe_stencil_ref *state)
 {
   blitter->saved_stencil_ref = *state;
 }
@@ -407,23 +405,20 @@ void util_blitter_save_rasterizer(struct blitter_context *blitter,
   blitter->saved_rs_state = state;
 }

-static inline
-void util_blitter_save_fragment_shader(struct blitter_context *blitter,
-                                       void *fs)
+static inline void
+util_blitter_save_fragment_shader(struct blitter_context *blitter, void *fs)
 {
   blitter->saved_fs = fs;
 }

-static inline
-void util_blitter_save_vertex_shader(struct blitter_context *blitter,
-                                     void *vs)
+static inline void
+util_blitter_save_vertex_shader(struct blitter_context *blitter, void *vs)
 {
   blitter->saved_vs = vs;
 }

-static inline
-void util_blitter_save_geometry_shader(struct blitter_context *blitter,
-                                       void *gs)
+static inline void
+util_blitter_save_geometry_shader(struct blitter_context *blitter, void *gs)
 {
   blitter->saved_gs = gs;
 }
@@ -442,24 +437,24 @@ util_blitter_save_tesseval_shader(struct blitter_context *blitter,
   blitter->saved_tes = sh;
 }

-static inline
-void util_blitter_save_framebuffer(struct blitter_context *blitter,
-                                   const struct pipe_framebuffer_state *state)
+static inline void
+util_blitter_save_framebuffer(struct blitter_context *blitter,
+                              const struct pipe_framebuffer_state *state)
 {
   blitter->saved_fb_state.nr_cbufs = 0; /* It's ~0 now, meaning it's unsaved. */
   util_copy_framebuffer_state(&blitter->saved_fb_state, state);
 }

-static inline
-void util_blitter_save_viewport(struct blitter_context *blitter,
-                                struct pipe_viewport_state *state)
+static inline void
+util_blitter_save_viewport(struct blitter_context *blitter,
+                           struct pipe_viewport_state *state)
 {
   blitter->saved_viewport = *state;
 }

-static inline
-void util_blitter_save_scissor(struct blitter_context *blitter,
-                               struct pipe_scissor_state *state)
+static inline void
+util_blitter_save_scissor(struct blitter_context *blitter,
+                          struct pipe_scissor_state *state)
 {
   blitter->saved_scissor = *state;
 }
--- a/src/gallium/auxiliary/util/u_debug.c
+++ b/src/gallium/auxiliary/util/u_debug.c
@@ -41,6 +41,7 @@
 #include "util/u_tile.h" 
 #include "util/u_prim.h"
 #include "util/u_surface.h"
+#include <inttypes.h>

 #include <stdio.h>
 #include <limits.h> /* CHAR_BIT */
@@ -275,7 +276,7 @@ debug_get_flags_option(const char *name,
      for (; flags->name; ++flags)
         namealign = MAX2(namealign, strlen(flags->name));
      for (flags = orig; flags->name; ++flags)
-         _debug_printf("| %*s [0x%0*lx]%s%s\n", namealign, flags->name,
+         _debug_printf("| %*s [0x%0*"PRIu64"]%s%s\n", namealign, flags->name,
                      (int)sizeof(uint64_t)*CHAR_BIT/4, flags->value,
                      flags->desc ? " " : "", flags->desc ? flags->desc : "");
   }
@@ -290,9 +291,9 @@ debug_get_flags_option(const char *name,

   if (debug_get_option_should_print()) {
      if (str) {
-         debug_printf("%s: %s = 0x%lx (%s)\n", __FUNCTION__, name, result, str);
+         debug_printf("%s: %s = 0x%"PRIu64" (%s)\n", __FUNCTION__, name, result, str);
      } else {
-         debug_printf("%s: %s = 0x%lx\n", __FUNCTION__, name, result);
+         debug_printf("%s: %s = 0x%"PRIu64"\n", __FUNCTION__, name, result);
      }
   }

--- a/src/gallium/auxiliary/util/u_format_rgb9e5.h
+++ b/src/gallium/auxiliary/util/u_format_rgb9e5.h
@@ -21,7 +21,8 @@
 * DEALINGS IN THE SOFTWARE.
 */

-/* Copied from EXT_texture_shared_exponent and edited. */
+/* Copied from EXT_texture_shared_exponent and edited, getting rid of
+ * expensive float math bits too. */

 #ifndef RGB9E5_H
 #define RGB9E5_H
@@ -39,7 +40,6 @@
 #define RGB9E5_MANTISSA_VALUES       (1<<RGB9E5_MANTISSA_BITS)
 #define MAX_RGB9E5_MANTISSA          (RGB9E5_MANTISSA_VALUES-1)
 #define MAX_RGB9E5                   (((float)MAX_RGB9E5_MANTISSA)/RGB9E5_MANTISSA_VALUES * (1<<MAX_RGB9E5_EXP))
-#define EPSILON_RGB9E5               ((1.0/RGB9E5_MANTISSA_VALUES) / (1<<RGB9E5_EXP_BIAS))

 typedef union {
   unsigned int raw;
@@ -74,63 +74,59 @@ typedef union {
   } field;
 } rgb9e5;

-static inline float rgb9e5_ClampRange(float x)
-{
-   if (x > 0.0f) {
-      if (x >= MAX_RGB9E5) {
-         return MAX_RGB9E5;
-      } else {
-         return x;
-      }
-   } else {
-      /* NaN gets here too since comparisons with NaN always fail! */
-      return 0.0;
-   }
-}

-/* Ok, FloorLog2 is not correct for the denorm and zero values, but we
-   are going to do a max of this value with the minimum rgb9e5 exponent
-   that will hide these problem cases. */
-static inline int rgb9e5_FloorLog2(float x)
+static inline int rgb9e5_ClampRange(float x)
 {
   float754 f;
-
+   float754 max;
   f.value = x;
-   return (f.field.biasedexponent - 127);
+   max.value = MAX_RGB9E5;
+
+   if (f.raw > 0x7f800000)
+  /* catches neg, NaNs */
+      return 0;
+   else if (f.raw >= max.raw)
+      return max.raw;
+   else
+      return f.raw;
 }

 static inline unsigned float3_to_rgb9e5(const float rgb[3])
 {
   rgb9e5 retval;
-   float maxrgb;
-   int rm, gm, bm;
-   float rc, gc, bc;
-   int exp_shared, maxm;
-   double denom;
+   int rm, gm, bm, exp_shared;
+   float754 revdenom = {0};
+   float754 rc, bc, gc, maxrgb;

-   rc = rgb9e5_ClampRange(rgb[0]);
-   gc = rgb9e5_ClampRange(rgb[1]);
-   bc = rgb9e5_ClampRange(rgb[2]);
+   rc.raw = rgb9e5_ClampRange(rgb[0]);
+   gc.raw = rgb9e5_ClampRange(rgb[1]);
+   bc.raw = rgb9e5_ClampRange(rgb[2]);
+   maxrgb.raw = MAX3(rc.raw, gc.raw, bc.raw);

-   maxrgb = MAX3(rc, gc, bc);
-   exp_shared = MAX2(-RGB9E5_EXP_BIAS-1, rgb9e5_FloorLog2(maxrgb)) + 1 + RGB9E5_EXP_BIAS;
+   /*
+    * Compared to what the spec suggests, instead of conditionally adjusting
+    * the exponent after the fact do it here by doing the equivalent of +0.5 -
+    * the int add will spill over into the exponent in this case.
+    */
+   maxrgb.raw += maxrgb.raw & (1 << (23-9));
+   exp_shared = MAX2((maxrgb.raw >> 23), -RGB9E5_EXP_BIAS - 1 + 127) +
+                1 + RGB9E5_EXP_BIAS - 127;
+   revdenom.field.biasedexponent = 127 - (exp_shared - RGB9E5_EXP_BIAS -
+                                          RGB9E5_MANTISSA_BITS) + 1;
   assert(exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP);
-   assert(exp_shared >= 0);
-   /* This exp2 function could be replaced by a table. */
-   denom = exp2(exp_shared - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS);

-   maxm = (int) floor(maxrgb / denom + 0.5);
-   if (maxm == MAX_RGB9E5_MANTISSA+1) {
-      denom *= 2;
-      exp_shared += 1;
-      assert(exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP);
-   } else {
-      assert(maxm <= MAX_RGB9E5_MANTISSA);
-   }
-
-   rm = (int) floor(rc / denom + 0.5);
-   gm = (int) floor(gc / denom + 0.5);
-   bm = (int) floor(bc / denom + 0.5);
+   /*
+    * The spec uses strict round-up behavior (d3d10 disagrees, but in any case
+    * must match what is done above for figuring out exponent).
+    * We avoid the doubles ((int) rc * revdenom + 0.5) by doing the rounding
+    * ourselves (revdenom was adjusted by +1, above).
+    */
+   rm = (int) (rc.value * revdenom.value);
+   gm = (int) (gc.value * revdenom.value);
+   bm = (int) (bc.value * revdenom.value);
+   rm = (rm & 1) + (rm >> 1);
+   gm = (gm & 1) + (gm >> 1);
+   bm = (bm & 1) + (bm >> 1);

   assert(rm <= MAX_RGB9E5_MANTISSA);
   assert(gm <= MAX_RGB9E5_MANTISSA);
@@ -151,15 +147,15 @@ static inline void rgb9e5_to_float3(unsigned rgb, float retval[3])
 {
   rgb9e5 v;
   int exponent;
-   float scale;
+   float754 scale = {0};

   v.raw = rgb;
   exponent = v.field.biasedexponent - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS;
-   scale = exp2f(exponent);
+   scale.field.biasedexponent = exponent + 127;

-   retval[0] = v.field.r * scale;
-   retval[1] = v.field.g * scale;
-   retval[2] = v.field.b * scale;
+   retval[0] = v.field.r * scale.value;
+   retval[1] = v.field.g * scale.value;
+   retval[2] = v.field.b * scale.value;
 }

 #endif
--- a/src/gallium/auxiliary/util/u_helpers.c
+++ b/src/gallium/auxiliary/util/u_helpers.c
@@ -88,3 +88,18 @@ void util_set_vertex_buffers_count(struct pipe_vertex_buffer *dst,

   *dst_count = util_last_bit(enabled_buffers);
 }
+
+
+void
+util_set_index_buffer(struct pipe_index_buffer *dst,
+                      const struct pipe_index_buffer *src)
+{
+   if (src) {
+      pipe_resource_reference(&dst->buffer, src->buffer);
+      memcpy(dst, src, sizeof(*dst));
+   }
+   else {
+      pipe_resource_reference(&dst->buffer, NULL);
+      memset(dst, 0, sizeof(*dst));
+   }
+}
--- a/src/gallium/auxiliary/util/u_helpers.h
+++ b/src/gallium/auxiliary/util/u_helpers.h
@@ -44,6 +44,9 @@ void util_set_vertex_buffers_count(struct pipe_vertex_buffer *dst,
                                   const struct pipe_vertex_buffer *src,
                                   unsigned start_slot, unsigned count);

+void util_set_index_buffer(struct pipe_index_buffer *dst,
+                           const struct pipe_index_buffer *src);
+
 #ifdef __cplusplus
 }
 #endif
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -389,6 +389,26 @@ unsigned ffs( unsigned u )
 #define ffs __builtin_ffs
 #endif

+#ifdef HAVE___BUILTIN_FFSLL
+#define ffsll __builtin_ffsll
+#else
+static inline int
+ffsll(long long int val)
+{
+   int bit;
+
+   bit = ffs((unsigned) (val & 0xffffffff));
+   if (bit != 0)
+      return bit;
+
+   bit = ffs((unsigned) (val >> 32));
+   if (bit != 0)
+      return 32 + bit;
+
+   return 0;
+}
+#endif
+
 #endif /* FFS_DEFINED */

 /**
@@ -483,6 +503,26 @@ u_bit_scan64(uint64_t *mask)
 }
 #endif

+/* For looping over a bitmask when you want to loop over consecutive bits
+ * manually, for example:
+ *
+ * while (mask) {
+ *    int start, count, i;
+ *
+ *    u_bit_scan_consecutive_range(&mask, &start, &count);
+ *
+ *    for (i = 0; i < count; i++)
+ *       ... process element (start+i)
+ * }
+ */
+static inline void
+u_bit_scan_consecutive_range(unsigned *mask, int *start, int *count)
+{
+   *start = ffs(*mask) - 1;
+   *count = ffs(~(*mask >> *start)) - 1;
+   *mask &= ~(((1 << *count) - 1) << *start);
+}
+
 /**
 * Return float bits.
 */
--- a/src/gallium/auxiliary/util/u_prim_restart.c
+++ b/src/gallium/auxiliary/util/u_prim_restart.c
@@ -0,0 +1,267 @@
+/*
+ * Copyright 2014 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+
+#include "u_inlines.h"
+#include "u_memory.h"
+#include "u_prim_restart.h"
+
+
+/**
+ * Translate an index buffer for primitive restart.
+ * Create a new index buffer which is a copy of the original index buffer
+ * except that instances of 'restart_index' are converted to 0xffff or
+ * 0xffffffff.
+ * Also, index buffers using 1-byte indexes are converted to 2-byte indexes.
+ */
+enum pipe_error
+util_translate_prim_restart_ib(struct pipe_context *context,
+                               struct pipe_index_buffer *src_buffer,
+                               struct pipe_resource **dst_buffer,
+                               unsigned num_indexes,
+                               unsigned restart_index)
+{
+   struct pipe_screen *screen = context->screen;
+   struct pipe_transfer *src_transfer = NULL, *dst_transfer = NULL;
+   void *src_map = NULL, *dst_map = NULL;
+   const unsigned src_index_size = src_buffer->index_size;
+   unsigned dst_index_size;
+
+   /* 1-byte indexes are converted to 2-byte indexes, 4-byte stays 4-byte */
+   dst_index_size = MAX2(2, src_buffer->index_size);
+   assert(dst_index_size == 2 || dst_index_size == 4);
+
+   /* no user buffers for now */
+   assert(src_buffer->user_buffer == NULL);
+
+   /* Create new index buffer */
+   *dst_buffer = pipe_buffer_create(screen, PIPE_BIND_INDEX_BUFFER,
+                                    PIPE_USAGE_STREAM,
+                                    num_indexes * dst_index_size);
+   if (!*dst_buffer)
+      goto error;
+
+   /* Map new / dest index buffer */
+   dst_map = pipe_buffer_map(context, *dst_buffer,
+                             PIPE_TRANSFER_WRITE, &dst_transfer);
+   if (!dst_map)
+      goto error;
+
+   /* Map original / src index buffer */
+   src_map = pipe_buffer_map_range(context, src_buffer->buffer,
+                                   src_buffer->offset,
+                                   num_indexes * src_index_size,
+                                   PIPE_TRANSFER_READ,
+                                   &src_transfer);
+   if (!src_map)
+      goto error;
+
+   if (src_index_size == 1 && dst_index_size == 2) {
+      uint8_t *src = (uint8_t *) src_map;
+      uint16_t *dst = (uint16_t *) dst_map;
+      unsigned i;
+      for (i = 0; i < num_indexes; i++) {
+         dst[i] = (src[i] == restart_index) ? 0xffff : src[i];
+      }
+   }
+   else if (src_index_size == 2 && dst_index_size == 2) {
+      uint16_t *src = (uint16_t *) src_map;
+      uint16_t *dst = (uint16_t *) dst_map;
+      unsigned i;
+      for (i = 0; i < num_indexes; i++) {
+         dst[i] = (src[i] == restart_index) ? 0xffff : src[i];
+      }
+   }
+   else {
+      uint32_t *src = (uint32_t *) src_map;
+      uint32_t *dst = (uint32_t *) dst_map;
+      unsigned i;
+      assert(src_index_size == 4);
+      assert(dst_index_size == 4);
+      for (i = 0; i < num_indexes; i++) {
+         dst[i] = (src[i] == restart_index) ? 0xffffffff : src[i];
+      }
+   }
+
+   pipe_buffer_unmap(context, src_transfer);
+   pipe_buffer_unmap(context, dst_transfer);
+
+   return PIPE_OK;
+
+error:
+   if (src_transfer)
+      pipe_buffer_unmap(context, src_transfer);
+   if (dst_transfer)
+      pipe_buffer_unmap(context, dst_transfer);
+   if (*dst_buffer)
+      screen->resource_destroy(screen, *dst_buffer);
+   return PIPE_ERROR_OUT_OF_MEMORY;
+}
+
+
+/** Helper structs for util_draw_vbo_without_prim_restart() */
+
+struct range {
+   unsigned start, count;
+};
+
+struct range_info {
+   struct range *ranges;
+   unsigned count, max;
+};
+
+
+/**
+ * Helper function for util_draw_vbo_without_prim_restart()
+ * \return true for success, false if out of memory
+ */
+static boolean
+add_range(struct range_info *info, unsigned start, unsigned count)
+{
+   if (info->max == 0) {
+      info->max = 10;
+      info->ranges = MALLOC(info->max * sizeof(struct range));
+      if (!info->ranges) {
+         return FALSE;
+      }
+   }
+   else if (info->count == info->max) {
+      /* grow the ranges[] array */
+      info->ranges = REALLOC(info->ranges,
+                             info->max * sizeof(struct range),
+                             2 * info->max * sizeof(struct range));
+      if (!info->ranges) {
+         return FALSE;
+      }
+
+      info->max *= 2;
+   }
+
+   /* save the range */
+   info->ranges[info->count].start = start;
+   info->ranges[info->count].count = count;
+   info->count++;
+
+   return TRUE;
+}
+
+
+/**
+ * Implement primitive restart by breaking an indexed primitive into
+ * pieces which do not contain restart indexes.  Each piece is then
+ * drawn by calling pipe_context::draw_vbo().
+ * \return PIPE_OK if no error, an error code otherwise.
+ */
+enum pipe_error
+util_draw_vbo_without_prim_restart(struct pipe_context *context,
+                                   const struct pipe_index_buffer *ib,
+                                   const struct pipe_draw_info *info)
+{
+   const void *src_map;
+   struct range_info ranges = {0};
+   struct pipe_draw_info new_info;
+   struct pipe_transfer *src_transfer = NULL;
+   unsigned i, start, count;
+
+   assert(info->indexed);
+   assert(info->primitive_restart);
+
+   /* Get pointer to the index data */
+   if (ib->buffer) {
+      /* map the index buffer (only the range we need to scan) */
+      src_map = pipe_buffer_map_range(context, ib->buffer,
+                                      ib->offset + info->start * ib->index_size,
+                                      info->count * ib->index_size,
+                                      PIPE_TRANSFER_READ,
+                                      &src_transfer);
+      if (!src_map) {
+         return PIPE_ERROR_OUT_OF_MEMORY;
+      }
+   }
+   else {
+      if (!ib->user_buffer) {
+         debug_printf("User-space index buffer is null!");
+         return PIPE_ERROR_BAD_INPUT;
+      }
+      src_map = (const uint8_t *) ib->user_buffer
+         + ib->offset
+         + info->start * ib->index_size;
+   }
+
+#define SCAN_INDEXES(TYPE) \
+   for (i = 0; i <= info->count; i++) { \
+      if (i == info->count || \
+          ((const TYPE *) src_map)[i] == info->restart_index) { \
+         /* cut / restart */ \
+         if (count > 0) { \
+            if (!add_range(&ranges, info->start + start, count)) { \
+               if (src_transfer) \
+                  pipe_buffer_unmap(context, src_transfer); \
+               return PIPE_ERROR_OUT_OF_MEMORY; \
+            } \
+         } \
+         start = i + 1; \
+         count = 0; \
+      } \
+      else { \
+         count++; \
+      } \
+   }
+
+   start = info->start;
+   count = 0;
+   switch (ib->index_size) {
+   case 1:
+      SCAN_INDEXES(uint8_t);
+      break;
+   case 2:
+      SCAN_INDEXES(uint16_t);
+      break;
+   case 4:
+      SCAN_INDEXES(uint32_t);
+      break;
+   default:
+      assert(!"Bad index size");
+      return PIPE_ERROR_BAD_INPUT;
+   }
+
+   /* unmap index buffer */
+   if (src_transfer)
+      pipe_buffer_unmap(context, src_transfer);
+
+   /* draw ranges between the restart indexes */
+   new_info = *info;
+   new_info.primitive_restart = FALSE;
+   for (i = 0; i < ranges.count; i++) {
+      new_info.start = ranges.ranges[i].start;
+      new_info.count = ranges.ranges[i].count;
+      context->draw_vbo(context, &new_info);
+   }
+
+   FREE(ranges.ranges);
+
+   return PIPE_OK;
+}
--- a/src/gallium/auxiliary/util/u_prim_restart.h
+++ b/src/gallium/auxiliary/util/u_prim_restart.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright 2014 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef U_PRIM_RESTART_H
+#define U_PRIM_RESTART_H
+
+
+#include "pipe/p_defines.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+struct pipe_context;
+struct pipe_draw_info;
+struct pipe_index_buffer;
+struct pipe_resource;
+
+
+enum pipe_error
+util_translate_prim_restart_ib(struct pipe_context *context,
+                               struct pipe_index_buffer *src_buffer,
+                               struct pipe_resource **dst_buffer,
+                               unsigned num_indexes,
+                               unsigned restart_index);
+
+enum pipe_error
+util_draw_vbo_without_prim_restart(struct pipe_context *context,
+                                   const struct pipe_index_buffer *ib,
+                                   const struct pipe_draw_info *info);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/src/gallium/auxiliary/util/u_pstipple.c
+++ b/src/gallium/auxiliary/util/u_pstipple.c
@@ -339,7 +339,7 @@ pstip_transform_prolog(struct tgsi_transform_context *ctx)
   /* KILL_IF -texTemp;   # if -texTemp < 0, kill fragment */
   tgsi_transform_kill_inst(ctx,
                            TGSI_FILE_TEMPORARY, texTemp,
-                            TGSI_SWIZZLE_W);
+                            TGSI_SWIZZLE_W, TRUE);
 }


--- a/src/gallium/auxiliary/util/u_rect.h
+++ b/src/gallium/auxiliary/util/u_rect.h
@@ -42,6 +42,7 @@ struct u_rect {
 };

 /* Do two rectangles intersect?
+ * Note: empty rectangles are valid as inputs (and never intersect).
 */
 static inline boolean
 u_rect_test_intersection(const struct u_rect *a,
@@ -50,7 +51,11 @@ u_rect_test_intersection(const struct u_rect *a,
   return (!(a->x1 < b->x0 ||
             b->x1 < a->x0 ||
             a->y1 < b->y0 ||
-             b->y1 < a->y0));
+             b->y1 < a->y0 ||
+             a->x1 < a->x0 ||
+             a->y1 < a->y0 ||
+             b->x1 < b->x0 ||
+             b->y1 < b->y0));
 }

 /* Find the intersection of two rectangles known to intersect.
@@ -82,7 +87,12 @@ u_rect_possible_intersection(const struct u_rect *a,
      u_rect_find_intersection(a,b);
   }
   else {
-      b->x0 = b->x1 = b->y0 = b->y1 = 0;
+      /*
+       * Note the u_rect_xx tests deal with inclusive coordinates
+       * hence all-zero would not be an empty box.
+       */
+      b->x0 = b->y0 = 0;
+      b->x1 = b->y1 = -1;
   }
 }

--- a/src/gallium/auxiliary/util/u_simple_shaders.c
+++ b/src/gallium/auxiliary/util/u_simple_shaders.c
@@ -831,3 +831,54 @@ util_make_fs_msaa_resolve_bilinear(struct pipe_context *pipe,

   return ureg_create_shader_and_destroy(ureg, pipe);
 }
+
+void *
+util_make_geometry_passthrough_shader(struct pipe_context *pipe,
+                                      uint num_attribs,
+                                      const ubyte *semantic_names,
+                                      const ubyte *semantic_indexes)
+{
+   static const unsigned zero[4] = {0, 0, 0, 0};
+
+   struct ureg_program *ureg;
+   struct ureg_dst dst[PIPE_MAX_SHADER_OUTPUTS];
+   struct ureg_src src[PIPE_MAX_SHADER_INPUTS];
+   struct ureg_src imm;
+
+   unsigned i;
+
+   ureg = ureg_create(TGSI_PROCESSOR_GEOMETRY);
+   if (ureg == NULL)
+      return NULL;
+
+   ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM, PIPE_PRIM_POINTS);
+   ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM, PIPE_PRIM_POINTS);
+   ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES, 1);
+   ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS, 1);
+   imm = ureg_DECL_immediate_uint(ureg, zero, 4);
+
+   /**
+    * Loop over all the attribs and declare the corresponding
+    * declarations in the geometry shader
+    */
+   for (i = 0; i < num_attribs; i++) {
+      src[i] = ureg_DECL_input(ureg, semantic_names[i],
+                               semantic_indexes[i], 0, 1);
+      src[i] = ureg_src_dimension(src[i], 0);
+      dst[i] = ureg_DECL_output(ureg, semantic_names[i], semantic_indexes[i]);
+   }
+
+   /* MOV dst[i] src[i] */
+   for (i = 0; i < num_attribs; i++) {
+      ureg_MOV(ureg, dst[i], src[i]);
+   }
+
+   /* EMIT IMM[0] */
+   ureg_insn(ureg, TGSI_OPCODE_EMIT, NULL, 0, &imm, 1);
+
+   /* END */
+   ureg_END(ureg);
+
+   return ureg_create_shader_and_destroy(ureg, pipe);
+}
+
--- a/src/gallium/auxiliary/util/u_simple_shaders.h
+++ b/src/gallium/auxiliary/util/u_simple_shaders.h
@@ -146,6 +146,12 @@ util_make_fs_msaa_resolve_bilinear(struct pipe_context *pipe,
                                   unsigned tgsi_tex, unsigned nr_samples,
                                   enum tgsi_return_type stype);

+extern void *
+util_make_geometry_passthrough_shader(struct pipe_context *pipe,
+                                      uint num_attribs,
+                                      const ubyte *semantic_names,
+                                      const ubyte *semantic_indexes);
+
 #ifdef __cplusplus
 }
 #endif
--- a/src/gallium/auxiliary/util/u_string.h
+++ b/src/gallium/auxiliary/util/u_string.h
@@ -199,6 +199,8 @@ util_memmove(void *dest, const void *src, size_t n)
 }


+#define util_strcasecmp stricmp
+
 #else

 #define util_vsnprintf vsnprintf
@@ -211,6 +213,7 @@ util_memmove(void *dest, const void *src, size_t n)
 #define util_strncat strncat
 #define util_strstr strstr
 #define util_memmove memmove
+#define util_strcasecmp strcasecmp

 #endif

--- a/src/gallium/auxiliary/util/u_tests.c
+++ b/src/gallium/auxiliary/util/u_tests.c
@@ -457,7 +457,7 @@ null_constant_buffer(struct pipe_context *ctx)
 void
 util_run_tests(struct pipe_screen *screen)
 {
-   struct pipe_context *ctx = screen->context_create(screen, NULL);
+   struct pipe_context *ctx = screen->context_create(screen, NULL, 0);

   tgsi_vs_window_space_position(ctx);
   null_sampler_view(ctx, TGSI_TEXTURE_2D);
--- a/src/gallium/auxiliary/util/u_upload_mgr.c
+++ b/src/gallium/auxiliary/util/u_upload_mgr.c
@@ -129,9 +129,9 @@ void u_upload_destroy( struct u_upload_mgr *upload )
 }


-static enum pipe_error 
-u_upload_alloc_buffer( struct u_upload_mgr *upload,
-                       unsigned min_size )
+static void
+u_upload_alloc_buffer(struct u_upload_mgr *upload,
+                      unsigned min_size)
 {
   struct pipe_screen *screen = upload->pipe->screen;
   struct pipe_resource buffer;
@@ -161,9 +161,8 @@ u_upload_alloc_buffer( struct u_upload_mgr *upload,
   }

   upload->buffer = screen->resource_create(screen, &buffer);
-   if (upload->buffer == NULL) {
-      return PIPE_ERROR_OUT_OF_MEMORY;
-   }
+   if (upload->buffer == NULL)
+      return;

   /* Map the new buffer. */
   upload->map = pipe_buffer_map_range(upload->pipe, upload->buffer,
@@ -172,52 +171,54 @@ u_upload_alloc_buffer( struct u_upload_mgr *upload,
   if (upload->map == NULL) {
      upload->transfer = NULL;
      pipe_resource_reference(&upload->buffer, NULL);
-      return PIPE_ERROR_OUT_OF_MEMORY;
+      return;
   }

   upload->offset = 0;
-   return PIPE_OK;
 }

-enum pipe_error u_upload_alloc( struct u_upload_mgr *upload,
-                                unsigned min_out_offset,
-                                unsigned size,
-                                unsigned *out_offset,
-                                struct pipe_resource **outbuf,
-                                void **ptr )
+void
+u_upload_alloc(struct u_upload_mgr *upload,
+               unsigned min_out_offset,
+               unsigned size,
+               unsigned *out_offset,
+               struct pipe_resource **outbuf,
+               void **ptr)
 {
-   unsigned alloc_size = align( size, upload->alignment );
+   unsigned alloc_size = align(size, upload->alignment);
   unsigned alloc_offset = align(min_out_offset, upload->alignment);
+   unsigned buffer_size = upload->buffer ? upload->buffer->width0 : 0;
   unsigned offset;

-   /* Init these return values here in case we fail below to make
-    * sure the caller doesn't get garbage values.
-    */
-   *out_offset = ~0;
-   pipe_resource_reference(outbuf, NULL);
-   *ptr = NULL;
-
   /* Make sure we have enough space in the upload buffer
    * for the sub-allocation. */
-   if (!upload->buffer ||
-       MAX2(upload->offset, alloc_offset) + alloc_size > upload->buffer->width0) {
-      enum pipe_error ret = u_upload_alloc_buffer(upload,
-                                                  alloc_offset + alloc_size);
-      if (ret != PIPE_OK)
-         return ret;
+   if (unlikely(MAX2(upload->offset, alloc_offset) + alloc_size > buffer_size)) {
+      u_upload_alloc_buffer(upload, alloc_offset + alloc_size);
+
+      if (unlikely(!upload->buffer)) {
+         *out_offset = ~0;
+         pipe_resource_reference(outbuf, NULL);
+         *ptr = NULL;
+         return;
+      }
+
+      buffer_size = upload->buffer->width0;
   }

   offset = MAX2(upload->offset, alloc_offset);

-   if (!upload->map) {
+   if (unlikely(!upload->map)) {
      upload->map = pipe_buffer_map_range(upload->pipe, upload->buffer,
                                          offset,
-                                          upload->buffer->width0 - offset,
+                                          buffer_size - offset,
                                          upload->map_flags,
 					  &upload->transfer);
-      if (!upload->map) {
+      if (unlikely(!upload->map)) {
         upload->transfer = NULL;
-         return PIPE_ERROR_OUT_OF_MEMORY;
+         *out_offset = ~0;
+         pipe_resource_reference(outbuf, NULL);
+         *ptr = NULL;
+         return;
      }

      upload->map -= offset;
@@ -229,46 +230,37 @@ enum pipe_error u_upload_alloc( struct u_upload_mgr *upload,

   /* Emit the return values: */
   *ptr = upload->map + offset;
-   pipe_resource_reference( outbuf, upload->buffer );
+   pipe_resource_reference(outbuf, upload->buffer);
   *out_offset = offset;

   upload->offset = offset + alloc_size;
-   return PIPE_OK;
 }

-enum pipe_error u_upload_data( struct u_upload_mgr *upload,
-                               unsigned min_out_offset,
-                               unsigned size,
-                               const void *data,
-                               unsigned *out_offset,
-                               struct pipe_resource **outbuf)
+void u_upload_data(struct u_upload_mgr *upload,
+                   unsigned min_out_offset,
+                   unsigned size,
+                   const void *data,
+                   unsigned *out_offset,
+                   struct pipe_resource **outbuf)
 {
   uint8_t *ptr;
-   enum pipe_error ret = u_upload_alloc(upload, min_out_offset, size,
-                                        out_offset, outbuf,
-                                        (void**)&ptr);
-   if (ret != PIPE_OK)
-      return ret;

-   memcpy(ptr, data, size);
-   return PIPE_OK;
+   u_upload_alloc(upload, min_out_offset, size,
+                  out_offset, outbuf,
+                  (void**)&ptr);
+   if (ptr)
+      memcpy(ptr, data, size);
 }

-
-/* As above, but upload the full contents of a buffer.  Useful for
- * uploading user buffers, avoids generating an explosion of GPU
- * buffers if you have an app that does lots of small vertex buffer
- * renders or DrawElements calls.
- */
-enum pipe_error u_upload_buffer( struct u_upload_mgr *upload,
-                                 unsigned min_out_offset,
-                                 unsigned offset,
-                                 unsigned size,
-                                 struct pipe_resource *inbuf,
-                                 unsigned *out_offset,
-                                 struct pipe_resource **outbuf)
+/* XXX: Remove. It's basically a CPU fallback of resource_copy_region. */
+void u_upload_buffer(struct u_upload_mgr *upload,
+                     unsigned min_out_offset,
+                     unsigned offset,
+                     unsigned size,
+                     struct pipe_resource *inbuf,
+                     unsigned *out_offset,
+                     struct pipe_resource **outbuf)
 {
-   enum pipe_error ret = PIPE_OK;
   struct pipe_transfer *transfer = NULL;
   const char *map = NULL;

@@ -279,20 +271,13 @@ enum pipe_error u_upload_buffer( struct u_upload_mgr *upload,
                                             &transfer);

   if (map == NULL) {
-      return PIPE_ERROR_OUT_OF_MEMORY;
+      pipe_resource_reference(outbuf, NULL);
+      return;
   }

   if (0)
      debug_printf("upload ptr %p ofs %d sz %d\n", map, offset, size);

-   ret = u_upload_data( upload,
-                        min_out_offset,
-                        size,
-                        map,
-                        out_offset,
-                        outbuf);
-
+   u_upload_data(upload, min_out_offset, size, map, out_offset, outbuf);
   pipe_buffer_unmap( upload->pipe, transfer );
-
-   return ret;
 }
--- a/src/gallium/auxiliary/util/u_upload_mgr.h
+++ b/src/gallium/auxiliary/util/u_upload_mgr.h
@@ -78,12 +78,12 @@ void u_upload_unmap( struct u_upload_mgr *upload );
 * \param outbuf           Pointer to where the upload buffer will be returned.
 * \param ptr              Pointer to the allocated memory that is returned.
 */
-enum pipe_error u_upload_alloc( struct u_upload_mgr *upload,
-                                unsigned min_out_offset,
-                                unsigned size,
-                                unsigned *out_offset,
-                                struct pipe_resource **outbuf,
-                                void **ptr );
+void u_upload_alloc(struct u_upload_mgr *upload,
+                    unsigned min_out_offset,
+                    unsigned size,
+                    unsigned *out_offset,
+                    struct pipe_resource **outbuf,
+                    void **ptr);


 /**
@@ -92,12 +92,12 @@ enum pipe_error u_upload_alloc( struct u_upload_mgr *upload,
 * Same as u_upload_alloc, but in addition to that, it copies "data"
 * to the pointer returned from u_upload_alloc.
 */
-enum pipe_error u_upload_data( struct u_upload_mgr *upload,
-                               unsigned min_out_offset,
-                               unsigned size,
-                               const void *data,
-                               unsigned *out_offset,
-                               struct pipe_resource **outbuf);
+void u_upload_data(struct u_upload_mgr *upload,
+                   unsigned min_out_offset,
+                   unsigned size,
+                   const void *data,
+                   unsigned *out_offset,
+                   struct pipe_resource **outbuf);


 /**
@@ -106,13 +106,13 @@ enum pipe_error u_upload_data( struct u_upload_mgr *upload,
 * Same as u_upload_data, except that the input data comes from a buffer
 * instead of a user pointer.
 */
-enum pipe_error u_upload_buffer( struct u_upload_mgr *upload,
-                                 unsigned min_out_offset,
-                                 unsigned offset,
-                                 unsigned size,
-                                 struct pipe_resource *inbuf,
-                                 unsigned *out_offset,
-                                 struct pipe_resource **outbuf);
+void u_upload_buffer(struct u_upload_mgr *upload,
+                     unsigned min_out_offset,
+                     unsigned offset,
+                     unsigned size,
+                     struct pipe_resource *inbuf,
+                     unsigned *out_offset,
+                     struct pipe_resource **outbuf);



--- a/src/gallium/auxiliary/util/u_vbuf.c
+++ b/src/gallium/auxiliary/util/u_vbuf.c
@@ -406,7 +406,6 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,
   struct pipe_resource *out_buffer = NULL;
   uint8_t *out_map;
   unsigned out_offset, mask;
-   enum pipe_error err;

   /* Get a translate object. */
   tr = translate_cache_find(mgr->translate_cache, key);
@@ -454,12 +453,12 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,
      assert((ib->buffer || ib->user_buffer) && ib->index_size);

      /* Create and map the output buffer. */
-      err = u_upload_alloc(mgr->uploader, 0,
-                           key->output_stride * num_indices,
-                           &out_offset, &out_buffer,
-                           (void**)&out_map);
-      if (err != PIPE_OK)
-         return err;
+      u_upload_alloc(mgr->uploader, 0,
+                     key->output_stride * num_indices,
+                     &out_offset, &out_buffer,
+                     (void**)&out_map);
+      if (!out_buffer)
+         return PIPE_ERROR_OUT_OF_MEMORY;

      if (ib->user_buffer) {
         map = (uint8_t*)ib->user_buffer + offset;
@@ -486,13 +485,13 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,
      }
   } else {
      /* Create and map the output buffer. */
-      err = u_upload_alloc(mgr->uploader,
-                           key->output_stride * start_vertex,
-                           key->output_stride * num_vertices,
-                           &out_offset, &out_buffer,
-                           (void**)&out_map);
-      if (err != PIPE_OK)
-         return err;
+      u_upload_alloc(mgr->uploader,
+                     key->output_stride * start_vertex,
+                     key->output_stride * num_vertices,
+                     &out_offset, &out_buffer,
+                     (void**)&out_map);
+      if (!out_buffer)
+         return PIPE_ERROR_OUT_OF_MEMORY;

      out_offset -= key->output_stride * start_vertex;

@@ -977,7 +976,6 @@ u_vbuf_upload_buffers(struct u_vbuf *mgr,
      unsigned start, end;
      struct pipe_vertex_buffer *real_vb;
      const uint8_t *ptr;
-      enum pipe_error err;

      i = u_bit_scan(&buffer_mask);

@@ -988,10 +986,10 @@ u_vbuf_upload_buffers(struct u_vbuf *mgr,
      real_vb = &mgr->real_vertex_buffer[i];
      ptr = mgr->vertex_buffer[i].user_buffer;

-      err = u_upload_data(mgr->uploader, start, end - start, ptr + start,
-                          &real_vb->buffer_offset, &real_vb->buffer);
-      if (err != PIPE_OK)
-         return err;
+      u_upload_data(mgr->uploader, start, end - start, ptr + start,
+                    &real_vb->buffer_offset, &real_vb->buffer);
+      if (!real_vb->buffer)
+         return PIPE_ERROR_OUT_OF_MEMORY;

      real_vb->buffer_offset -= start;
   }
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -1120,7 +1120,7 @@ vl_create_mpeg12_decoder(struct pipe_context *context,

   dec->base = *templat;
   dec->base.context = context;
-   dec->context = context->screen->context_create(context->screen, NULL);
+   dec->context = context->screen->context_create(context->screen, NULL, 0);

   dec->base.destroy = vl_mpeg12_destroy;
   dec->base.begin_frame = vl_mpeg12_begin_frame;
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -267,6 +267,16 @@ The integer capabilities:
 * ``PIPE_CAP_DEPTH_BOUNDS_TEST``: Whether bounds_test, bounds_min, and
  bounds_max states of pipe_depth_stencil_alpha_state behave according
  to the GL_EXT_depth_bounds_test specification.
+* ``PIPE_CAP_TGSI_TXQS``: Whether the `TXQS` opcode is supported
+* ``PIPE_CAP_FORCE_PERSAMPLE_INTERP``: If the driver can force per-sample
+  interpolation for all fragment shader inputs if
+  pipe_rasterizer_state::force_persample_interp is set. This is only used
+  by GL3-level sample shading (ARB_sample_shading). GL4-level sample shading
+  (ARB_gpu_shader5) doesn't use this. While GL3 hardware has a state for it,
+  GL4 hardware will likely need to emulate it with a shader variant, or by
+  selecting the interpolation weights with a conditional assignment
+  in the shader.
+


 .. _pipe_capf:
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -960,7 +960,6 @@ XXX doesn't look like most of the opcodes really belong here.
  For components which don't return a resource dimension, their value
  is undefined.

-
 .. math::

  lod = src0.x
@@ -973,6 +972,17 @@ XXX doesn't look like most of the opcodes really belong here.

  dst.w = texture\_levels(unit)

+
+.. opcode:: TXQS - Texture Samples Query
+
+  This retrieves the number of samples in the texture, and stores it
+  into the x component. The other components are undefined.
+
+.. math::
+
+  dst.x = texture\_samples(unit)
+
+
 .. opcode:: TG4 - Texture Gather

  As per ARB_texture_gather, gathers the four texels to be used in a bi-linear
--- a/src/gallium/drivers/ddebug/Makefile.am
+++ b/src/gallium/drivers/ddebug/Makefile.am
@@ -0,0 +1,9 @@
+include Makefile.sources
+include $(top_srcdir)/src/gallium/Automake.inc
+
+AM_CFLAGS = \
+	$(GALLIUM_DRIVER_CFLAGS)
+
+noinst_LTLIBRARIES = libddebug.la
+
+libddebug_la_SOURCES = $(C_SOURCES)
--- a/src/gallium/drivers/ddebug/Makefile.sources
+++ b/src/gallium/drivers/ddebug/Makefile.sources
@@ -0,0 +1,7 @@
+C_SOURCES := \
+	dd_context.c \
+	dd_draw.c \
+	dd_pipe.h \
+	dd_public.h \
+	dd_screen.c \
+	dd_util.h
--- a/src/gallium/drivers/ddebug/dd_context.c
+++ b/src/gallium/drivers/ddebug/dd_context.c
@@ -0,0 +1,771 @@
+/**************************************************************************
+ *
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ * Copyright 2008 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "dd_pipe.h"
+#include "tgsi/tgsi_parse.h"
+#include "util/u_memory.h"
+
+
+static void
+safe_memcpy(void *dst, const void *src, size_t size)
+{
+   if (src)
+      memcpy(dst, src, size);
+   else
+      memset(dst, 0, size);
+}
+
+
+/********************************************************************
+ * queries
+ */
+
+static struct dd_query *
+dd_query(struct pipe_query *query)
+{
+   return (struct dd_query *)query;
+}
+
+static struct pipe_query *
+dd_query_unwrap(struct pipe_query *query)
+{
+   if (query) {
+      return dd_query(query)->query;
+   } else {
+      return NULL;
+   }
+}
+
+static struct pipe_query *
+dd_context_create_query(struct pipe_context *_pipe, unsigned query_type,
+                        unsigned index)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+   struct pipe_query *query;
+
+   query = pipe->create_query(pipe, query_type, index);
+
+   /* Wrap query object. */
+   if (query) {
+      struct dd_query *dd_query = CALLOC_STRUCT(dd_query);
+      if (dd_query) {
+         dd_query->type = query_type;
+         dd_query->query = query;
+         query = (struct pipe_query *)dd_query;
+      } else {
+         pipe->destroy_query(pipe, query);
+         query = NULL;
+      }
+   }
+
+   return query;
+}
+
+static void
+dd_context_destroy_query(struct pipe_context *_pipe,
+                         struct pipe_query *query)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+
+   pipe->destroy_query(pipe, dd_query_unwrap(query));
+   FREE(query);
+}
+
+static boolean
+dd_context_begin_query(struct pipe_context *_pipe, struct pipe_query *query)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+
+   return pipe->begin_query(pipe, dd_query_unwrap(query));
+}
+
+static void
+dd_context_end_query(struct pipe_context *_pipe, struct pipe_query *query)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+
+   pipe->end_query(pipe, dd_query_unwrap(query));
+}
+
+static boolean
+dd_context_get_query_result(struct pipe_context *_pipe,
+                            struct pipe_query *query, boolean wait,
+                            union pipe_query_result *result)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+
+   return pipe->get_query_result(pipe, dd_query_unwrap(query), wait, result);
+}
+
+static void
+dd_context_render_condition(struct pipe_context *_pipe,
+                            struct pipe_query *query, boolean condition,
+                            uint mode)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+
+   pipe->render_condition(pipe, dd_query_unwrap(query), condition, mode);
+   dctx->render_cond.query = dd_query(query);
+   dctx->render_cond.condition = condition;
+   dctx->render_cond.mode = mode;
+}
+
+
+/********************************************************************
+ * constant (immutable) non-shader states
+ */
+
+#define DD_CSO_CREATE(name, shortname) \
+   static void * \
+   dd_context_create_##name##_state(struct pipe_context *_pipe, \
+                                    const struct pipe_##name##_state *state) \
+   { \
+      struct pipe_context *pipe = dd_context(_pipe)->pipe; \
+      struct dd_state *hstate = CALLOC_STRUCT(dd_state); \
+ \
+      if (!hstate) \
+         return NULL; \
+      hstate->cso = pipe->create_##name##_state(pipe, state); \
+      hstate->state.shortname = *state; \
+      return hstate; \
+   }
+
+#define DD_CSO_BIND(name, shortname) \
+   static void \
+   dd_context_bind_##name##_state(struct pipe_context *_pipe, void *state) \
+   { \
+      struct dd_context *dctx = dd_context(_pipe); \
+      struct pipe_context *pipe = dctx->pipe; \
+      struct dd_state *hstate = state; \
+ \
+      dctx->shortname = hstate; \
+      pipe->bind_##name##_state(pipe, hstate ? hstate->cso : NULL); \
+   }
+
+#define DD_CSO_DELETE(name) \
+   static void \
+   dd_context_delete_##name##_state(struct pipe_context *_pipe, void *state) \
+   { \
+      struct dd_context *dctx = dd_context(_pipe); \
+      struct pipe_context *pipe = dctx->pipe; \
+      struct dd_state *hstate = state; \
+ \
+      pipe->delete_##name##_state(pipe, hstate->cso); \
+      FREE(hstate); \
+   }
+
+#define DD_CSO_WHOLE(name, shortname) \
+   DD_CSO_CREATE(name, shortname) \
+   DD_CSO_BIND(name, shortname) \
+   DD_CSO_DELETE(name)
+
+DD_CSO_WHOLE(blend, blend)
+DD_CSO_WHOLE(rasterizer, rs)
+DD_CSO_WHOLE(depth_stencil_alpha, dsa)
+
+DD_CSO_CREATE(sampler, sampler)
+DD_CSO_DELETE(sampler)
+
+static void
+dd_context_bind_sampler_states(struct pipe_context *_pipe, unsigned shader,
+                               unsigned start, unsigned count, void **states)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+
+   memcpy(&dctx->sampler_states[shader][start], states,
+          sizeof(void*) * count);
+
+   if (states) {
+      void *samp[PIPE_MAX_SAMPLERS];
+      int i;
+
+      for (i = 0; i < count; i++) {
+         struct dd_state *s = states[i];
+         samp[i] = s ? s->cso : NULL;
+      }
+
+      pipe->bind_sampler_states(pipe, shader, start, count, samp);
+   }
+   else
+      pipe->bind_sampler_states(pipe, shader, start, count, NULL);
+}
+
+static void *
+dd_context_create_vertex_elements_state(struct pipe_context *_pipe,
+                                        unsigned num_elems,
+                                        const struct pipe_vertex_element *elems)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+   struct dd_state *hstate = CALLOC_STRUCT(dd_state);
+
+   if (!hstate)
+      return NULL;
+   hstate->cso = pipe->create_vertex_elements_state(pipe, num_elems, elems);
+   memcpy(hstate->state.velems.velems, elems, sizeof(elems[0]) * num_elems);
+   hstate->state.velems.count = num_elems;
+   return hstate;
+}
+
+DD_CSO_BIND(vertex_elements, velems)
+DD_CSO_DELETE(vertex_elements)
+
+
+/********************************************************************
+ * shaders
+ */
+
+#define DD_SHADER(NAME, name) \
+   static void * \
+   dd_context_create_##name##_state(struct pipe_context *_pipe, \
+                                    const struct pipe_shader_state *state) \
+   { \
+      struct pipe_context *pipe = dd_context(_pipe)->pipe; \
+      struct dd_state *hstate = CALLOC_STRUCT(dd_state); \
+ \
+      if (!hstate) \
+         return NULL; \
+      hstate->cso = pipe->create_##name##_state(pipe, state); \
+      hstate->state.shader = *state; \
+      hstate->state.shader.tokens = tgsi_dup_tokens(state->tokens); \
+      return hstate; \
+   } \
+    \
+   static void \
+   dd_context_bind_##name##_state(struct pipe_context *_pipe, void *state) \
+   { \
+      struct dd_context *dctx = dd_context(_pipe); \
+      struct pipe_context *pipe = dctx->pipe; \
+      struct dd_state *hstate = state; \
+   \
+      dctx->shaders[PIPE_SHADER_##NAME] = hstate; \
+      pipe->bind_##name##_state(pipe, hstate ? hstate->cso : NULL); \
+   } \
+    \
+   static void \
+   dd_context_delete_##name##_state(struct pipe_context *_pipe, void *state) \
+   { \
+      struct dd_context *dctx = dd_context(_pipe); \
+      struct pipe_context *pipe = dctx->pipe; \
+      struct dd_state *hstate = state; \
+   \
+      pipe->delete_##name##_state(pipe, hstate->cso); \
+      tgsi_free_tokens(hstate->state.shader.tokens); \
+      FREE(hstate); \
+   }
+
+DD_SHADER(FRAGMENT, fs)
+DD_SHADER(VERTEX, vs)
+DD_SHADER(GEOMETRY, gs)
+DD_SHADER(TESS_CTRL, tcs)
+DD_SHADER(TESS_EVAL, tes)
+
+
+/********************************************************************
+ * immediate states
+ */
+
+#define DD_IMM_STATE(name, type, deref, ref) \
+   static void \
+   dd_context_set_##name(struct pipe_context *_pipe, type deref) \
+   { \
+      struct dd_context *dctx = dd_context(_pipe); \
+      struct pipe_context *pipe = dctx->pipe; \
+ \
+      dctx->name = deref; \
+      pipe->set_##name(pipe, ref); \
+   }
+
+DD_IMM_STATE(blend_color, const struct pipe_blend_color, *state, state)
+DD_IMM_STATE(stencil_ref, const struct pipe_stencil_ref, *state, state)
+DD_IMM_STATE(clip_state, const struct pipe_clip_state, *state, state)
+DD_IMM_STATE(sample_mask, unsigned, sample_mask, sample_mask)
+DD_IMM_STATE(min_samples, unsigned, min_samples, min_samples)
+DD_IMM_STATE(framebuffer_state, const struct pipe_framebuffer_state, *state, state)
+DD_IMM_STATE(polygon_stipple, const struct pipe_poly_stipple, *state, state)
+
+static void
+dd_context_set_constant_buffer(struct pipe_context *_pipe,
+                               uint shader, uint index,
+                               struct pipe_constant_buffer *constant_buffer)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+
+   safe_memcpy(&dctx->constant_buffers[shader][index], constant_buffer,
+          sizeof(*constant_buffer));
+   pipe->set_constant_buffer(pipe, shader, index, constant_buffer);
+}
+
+static void
+dd_context_set_scissor_states(struct pipe_context *_pipe,
+                              unsigned start_slot, unsigned num_scissors,
+                              const struct pipe_scissor_state *states)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+
+   safe_memcpy(&dctx->scissors[start_slot], states,
+               sizeof(*states) * num_scissors);
+   pipe->set_scissor_states(pipe, start_slot, num_scissors, states);
+}
+
+static void
+dd_context_set_viewport_states(struct pipe_context *_pipe,
+                               unsigned start_slot, unsigned num_viewports,
+                               const struct pipe_viewport_state *states)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+
+   safe_memcpy(&dctx->viewports[start_slot], states,
+               sizeof(*states) * num_viewports);
+   pipe->set_viewport_states(pipe, start_slot, num_viewports, states);
+}
+
+static void dd_context_set_tess_state(struct pipe_context *_pipe,
+                                      const float default_outer_level[4],
+                                      const float default_inner_level[2])
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+
+   memcpy(dctx->tess_default_levels, default_outer_level, sizeof(float) * 4);
+   memcpy(dctx->tess_default_levels+4, default_inner_level, sizeof(float) * 2);
+   pipe->set_tess_state(pipe, default_outer_level, default_inner_level);
+}
+
+
+/********************************************************************
+ * views
+ */
+
+static struct pipe_surface *
+dd_context_create_surface(struct pipe_context *_pipe,
+                          struct pipe_resource *resource,
+                          const struct pipe_surface *surf_tmpl)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+   struct pipe_surface *view =
+      pipe->create_surface(pipe, resource, surf_tmpl);
+
+   if (!view)
+      return NULL;
+   view->context = _pipe;
+   return view;
+}
+
+static void
+dd_context_surface_destroy(struct pipe_context *_pipe,
+                           struct pipe_surface *surf)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+
+   pipe->surface_destroy(pipe, surf);
+}
+
+static struct pipe_sampler_view *
+dd_context_create_sampler_view(struct pipe_context *_pipe,
+                               struct pipe_resource *resource,
+                               const struct pipe_sampler_view *templ)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+   struct pipe_sampler_view *view =
+      pipe->create_sampler_view(pipe, resource, templ);
+
+   if (!view)
+      return NULL;
+   view->context = _pipe;
+   return view;
+}
+
+static void
+dd_context_sampler_view_destroy(struct pipe_context *_pipe,
+                                struct pipe_sampler_view *view)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+
+   pipe->sampler_view_destroy(pipe, view);
+}
+
+static struct pipe_image_view *
+dd_context_create_image_view(struct pipe_context *_pipe,
+                             struct pipe_resource *resource,
+                             const struct pipe_image_view *templ)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+   struct pipe_image_view *view =
+      pipe->create_image_view(pipe, resource, templ);
+
+   if (!view)
+      return NULL;
+   view->context = _pipe;
+   return view;
+}
+
+static void
+dd_context_image_view_destroy(struct pipe_context *_pipe,
+                              struct pipe_image_view *view)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+
+   pipe->image_view_destroy(pipe, view);
+}
+
+static struct pipe_stream_output_target *
+dd_context_create_stream_output_target(struct pipe_context *_pipe,
+                                       struct pipe_resource *res,
+                                       unsigned buffer_offset,
+                                       unsigned buffer_size)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+   struct pipe_stream_output_target *view =
+      pipe->create_stream_output_target(pipe, res, buffer_offset,
+                                        buffer_size);
+
+   if (!view)
+      return NULL;
+   view->context = _pipe;
+   return view;
+}
+
+static void
+dd_context_stream_output_target_destroy(struct pipe_context *_pipe,
+                                        struct pipe_stream_output_target *target)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+
+   pipe->stream_output_target_destroy(pipe, target);
+}
+
+
+/********************************************************************
+ * set states
+ */
+
+static void
+dd_context_set_sampler_views(struct pipe_context *_pipe, unsigned shader,
+                             unsigned start, unsigned num,
+                             struct pipe_sampler_view **views)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+
+   safe_memcpy(&dctx->sampler_views[shader][start], views,
+               sizeof(views[0]) * num);
+   pipe->set_sampler_views(pipe, shader, start, num, views);
+}
+
+static void
+dd_context_set_shader_images(struct pipe_context *_pipe, unsigned shader,
+                             unsigned start, unsigned num,
+                             struct pipe_image_view **views)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+
+   safe_memcpy(&dctx->shader_images[shader][start], views,
+               sizeof(views[0]) * num);
+   pipe->set_shader_images(pipe, shader, start, num, views);
+}
+
+static void
+dd_context_set_shader_buffers(struct pipe_context *_pipe, unsigned shader,
+                              unsigned start, unsigned num_buffers,
+                              struct pipe_shader_buffer *buffers)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+
+   safe_memcpy(&dctx->shader_buffers[shader][start], buffers,
+               sizeof(buffers[0]) * num_buffers);
+   pipe->set_shader_buffers(pipe, shader, start, num_buffers, buffers);
+}
+
+static void
+dd_context_set_vertex_buffers(struct pipe_context *_pipe,
+                              unsigned start, unsigned num_buffers,
+                              const struct pipe_vertex_buffer *buffers)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+
+   safe_memcpy(&dctx->vertex_buffers[start], buffers,
+               sizeof(buffers[0]) * num_buffers);
+   pipe->set_vertex_buffers(pipe, start, num_buffers, buffers);
+}
+
+static void
+dd_context_set_index_buffer(struct pipe_context *_pipe,
+                            const struct pipe_index_buffer *ib)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+
+   safe_memcpy(&dctx->index_buffer, ib, sizeof(*ib));
+   pipe->set_index_buffer(pipe, ib);
+}
+
+static void
+dd_context_set_stream_output_targets(struct pipe_context *_pipe,
+                                     unsigned num_targets,
+                                     struct pipe_stream_output_target **tgs,
+                                     const unsigned *offsets)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+
+   dctx->num_so_targets = num_targets;
+   safe_memcpy(dctx->so_targets, tgs, sizeof(*tgs) * num_targets);
+   safe_memcpy(dctx->so_offsets, offsets, sizeof(*offsets) * num_targets);
+   pipe->set_stream_output_targets(pipe, num_targets, tgs, offsets);
+}
+
+static void
+dd_context_destroy(struct pipe_context *_pipe)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+
+   pipe->destroy(pipe);
+   FREE(dctx);
+}
+
+
+/********************************************************************
+ * transfer
+ */
+
+static void *
+dd_context_transfer_map(struct pipe_context *_pipe,
+                        struct pipe_resource *resource, unsigned level,
+                        unsigned usage, const struct pipe_box *box,
+                        struct pipe_transfer **transfer)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+
+   return pipe->transfer_map(pipe, resource, level, usage, box, transfer);
+}
+
+static void
+dd_context_transfer_flush_region(struct pipe_context *_pipe,
+                                 struct pipe_transfer *transfer,
+                                 const struct pipe_box *box)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+
+   pipe->transfer_flush_region(pipe, transfer, box);
+}
+
+static void
+dd_context_transfer_unmap(struct pipe_context *_pipe,
+                          struct pipe_transfer *transfer)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+
+   pipe->transfer_unmap(pipe, transfer);
+}
+
+static void
+dd_context_transfer_inline_write(struct pipe_context *_pipe,
+                                 struct pipe_resource *resource,
+                                 unsigned level, unsigned usage,
+                                 const struct pipe_box *box,
+                                 const void *data, unsigned stride,
+                                 unsigned layer_stride)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+
+   pipe->transfer_inline_write(pipe, resource, level, usage, box, data,
+                               stride, layer_stride);
+}
+
+
+/********************************************************************
+ * miscellaneous
+ */
+
+static void
+dd_context_texture_barrier(struct pipe_context *_pipe)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+
+   pipe->texture_barrier(pipe);
+}
+
+static void
+dd_context_memory_barrier(struct pipe_context *_pipe, unsigned flags)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+
+   pipe->memory_barrier(pipe, flags);
+}
+
+static void
+dd_context_get_sample_position(struct pipe_context *_pipe,
+                               unsigned sample_count, unsigned sample_index,
+                               float *out_value)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+
+   return pipe->get_sample_position(pipe, sample_count, sample_index,
+                                    out_value);
+}
+
+static void
+dd_context_invalidate_resource(struct pipe_context *_pipe,
+                               struct pipe_resource *resource)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+
+   pipe->invalidate_resource(pipe, resource);
+}
+
+static enum pipe_reset_status
+dd_context_get_device_reset_status(struct pipe_context *_pipe)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+
+   return pipe->get_device_reset_status(pipe);
+}
+
+static void
+dd_context_dump_debug_state(struct pipe_context *_pipe, FILE *stream,
+                            unsigned flags)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+
+   return pipe->dump_debug_state(pipe, stream, flags);
+}
+
+struct pipe_context *
+dd_context_create(struct dd_screen *dscreen, struct pipe_context *pipe)
+{
+   struct dd_context *dctx;
+
+   if (!pipe)
+      return NULL;
+
+   dctx = CALLOC_STRUCT(dd_context);
+   if (!dctx) {
+      pipe->destroy(pipe);
+      return NULL;
+   }
+
+   dctx->pipe = pipe;
+   dctx->base.priv = pipe->priv; /* expose wrapped priv data */
+   dctx->base.screen = &dscreen->base;
+
+   dctx->base.destroy = dd_context_destroy;
+
+   CTX_INIT(render_condition);
+   CTX_INIT(create_query);
+   CTX_INIT(destroy_query);
+   CTX_INIT(begin_query);
+   CTX_INIT(end_query);
+   CTX_INIT(get_query_result);
+   CTX_INIT(create_blend_state);
+   CTX_INIT(bind_blend_state);
+   CTX_INIT(delete_blend_state);
+   CTX_INIT(create_sampler_state);
+   CTX_INIT(bind_sampler_states);
+   CTX_INIT(delete_sampler_state);
+   CTX_INIT(create_rasterizer_state);
+   CTX_INIT(bind_rasterizer_state);
+   CTX_INIT(delete_rasterizer_state);
+   CTX_INIT(create_depth_stencil_alpha_state);
+   CTX_INIT(bind_depth_stencil_alpha_state);
+   CTX_INIT(delete_depth_stencil_alpha_state);
+   CTX_INIT(create_fs_state);
+   CTX_INIT(bind_fs_state);
+   CTX_INIT(delete_fs_state);
+   CTX_INIT(create_vs_state);
+   CTX_INIT(bind_vs_state);
+   CTX_INIT(delete_vs_state);
+   CTX_INIT(create_gs_state);
+   CTX_INIT(bind_gs_state);
+   CTX_INIT(delete_gs_state);
+   CTX_INIT(create_tcs_state);
+   CTX_INIT(bind_tcs_state);
+   CTX_INIT(delete_tcs_state);
+   CTX_INIT(create_tes_state);
+   CTX_INIT(bind_tes_state);
+   CTX_INIT(delete_tes_state);
+   CTX_INIT(create_vertex_elements_state);
+   CTX_INIT(bind_vertex_elements_state);
+   CTX_INIT(delete_vertex_elements_state);
+   CTX_INIT(set_blend_color);
+   CTX_INIT(set_stencil_ref);
+   CTX_INIT(set_sample_mask);
+   CTX_INIT(set_min_samples);
+   CTX_INIT(set_clip_state);
+   CTX_INIT(set_constant_buffer);
+   CTX_INIT(set_framebuffer_state);
+   CTX_INIT(set_polygon_stipple);
+   CTX_INIT(set_scissor_states);
+   CTX_INIT(set_viewport_states);
+   CTX_INIT(set_sampler_views);
+   CTX_INIT(set_tess_state);
+   CTX_INIT(set_shader_buffers);
+   CTX_INIT(set_shader_images);
+   CTX_INIT(set_vertex_buffers);
+   CTX_INIT(set_index_buffer);
+   CTX_INIT(create_stream_output_target);
+   CTX_INIT(stream_output_target_destroy);
+   CTX_INIT(set_stream_output_targets);
+   CTX_INIT(create_sampler_view);
+   CTX_INIT(sampler_view_destroy);
+   CTX_INIT(create_surface);
+   CTX_INIT(surface_destroy);
+   CTX_INIT(create_image_view);
+   CTX_INIT(image_view_destroy);
+   CTX_INIT(transfer_map);
+   CTX_INIT(transfer_flush_region);
+   CTX_INIT(transfer_unmap);
+   CTX_INIT(transfer_inline_write);
+   CTX_INIT(texture_barrier);
+   CTX_INIT(memory_barrier);
+   /* create_video_codec */
+   /* create_video_buffer */
+   /* create_compute_state */
+   /* bind_compute_state */
+   /* delete_compute_state */
+   /* set_compute_resources */
+   /* set_global_binding */
+   CTX_INIT(get_sample_position);
+   CTX_INIT(invalidate_resource);
+   CTX_INIT(get_device_reset_status);
+   CTX_INIT(dump_debug_state);
+
+   dd_init_draw_functions(dctx);
+
+   dctx->sample_mask = ~0;
+   return &dctx->base;
+}
--- a/src/gallium/drivers/ddebug/dd_draw.c
+++ b/src/gallium/drivers/ddebug/dd_draw.c
@@ -0,0 +1,784 @@
+/**************************************************************************
+ *
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ * Copyright 2008 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "dd_pipe.h"
+
+#include "util/u_dump.h"
+#include "util/u_format.h"
+#include "tgsi/tgsi_scan.h"
+
+
+enum call_type
+{
+   CALL_DRAW_VBO,
+   CALL_RESOURCE_COPY_REGION,
+   CALL_BLIT,
+   CALL_FLUSH_RESOURCE,
+   CALL_CLEAR,
+   CALL_CLEAR_BUFFER,
+   CALL_CLEAR_RENDER_TARGET,
+   CALL_CLEAR_DEPTH_STENCIL,
+};
+
+struct call_resource_copy_region
+{
+   struct pipe_resource *dst;
+   unsigned dst_level;
+   unsigned dstx, dsty, dstz;
+   struct pipe_resource *src;
+   unsigned src_level;
+   const struct pipe_box *src_box;
+};
+
+struct call_clear
+{
+   unsigned buffers;
+   const union pipe_color_union *color;
+   double depth;
+   unsigned stencil;
+};
+
+struct call_clear_buffer
+{
+   struct pipe_resource *res;
+   unsigned offset;
+   unsigned size;
+   const void *clear_value;
+   int clear_value_size;
+};
+
+struct dd_call
+{
+   enum call_type type;
+
+   union {
+      struct pipe_draw_info draw_vbo;
+      struct call_resource_copy_region resource_copy_region;
+      struct pipe_blit_info blit;
+      struct pipe_resource *flush_resource;
+      struct call_clear clear;
+      struct call_clear_buffer clear_buffer;
+   } info;
+};
+
+static FILE *
+dd_get_file_stream(struct dd_context *dctx)
+{
+   struct pipe_screen *screen = dctx->pipe->screen;
+   FILE *f = dd_get_debug_file();
+   if (!f)
+      return NULL;
+
+   fprintf(f, "Driver vendor: %s\n", screen->get_vendor(screen));
+   fprintf(f, "Device vendor: %s\n", screen->get_device_vendor(screen));
+   fprintf(f, "Device name: %s\n\n", screen->get_name(screen));
+   return f;
+}
+
+static void
+dd_close_file_stream(FILE *f)
+{
+   fclose(f);
+}
+
+static unsigned
+dd_num_active_viewports(struct dd_context *dctx)
+{
+   struct tgsi_shader_info info;
+   const struct tgsi_token *tokens;
+
+   if (dctx->shaders[PIPE_SHADER_GEOMETRY])
+      tokens = dctx->shaders[PIPE_SHADER_GEOMETRY]->state.shader.tokens;
+   else if (dctx->shaders[PIPE_SHADER_TESS_EVAL])
+      tokens = dctx->shaders[PIPE_SHADER_TESS_EVAL]->state.shader.tokens;
+   else if (dctx->shaders[PIPE_SHADER_VERTEX])
+      tokens = dctx->shaders[PIPE_SHADER_VERTEX]->state.shader.tokens;
+   else
+      return 1;
+
+   tgsi_scan_shader(tokens, &info);
+   return info.writes_viewport_index ? PIPE_MAX_VIEWPORTS : 1;
+}
+
+#define COLOR_RESET	"\033[0m"
+#define COLOR_SHADER	"\033[1;32m"
+#define COLOR_STATE	"\033[1;33m"
+
+#define DUMP(name, var) do { \
+   fprintf(f, COLOR_STATE #name ": " COLOR_RESET); \
+   util_dump_##name(f, var); \
+   fprintf(f, "\n"); \
+} while(0)
+
+#define DUMP_I(name, var, i) do { \
+   fprintf(f, COLOR_STATE #name " %i: " COLOR_RESET, i); \
+   util_dump_##name(f, var); \
+   fprintf(f, "\n"); \
+} while(0)
+
+#define DUMP_M(name, var, member) do { \
+   fprintf(f, "  " #member ": "); \
+   util_dump_##name(f, (var)->member); \
+   fprintf(f, "\n"); \
+} while(0)
+
+#define DUMP_M_ADDR(name, var, member) do { \
+   fprintf(f, "  " #member ": "); \
+   util_dump_##name(f, &(var)->member); \
+   fprintf(f, "\n"); \
+} while(0)
+
+static void
+print_named_value(FILE *f, const char *name, int value)
+{
+   fprintf(f, COLOR_STATE "%s" COLOR_RESET " = %i\n", name, value);
+}
+
+static void
+print_named_xvalue(FILE *f, const char *name, int value)
+{
+   fprintf(f, COLOR_STATE "%s" COLOR_RESET " = 0x%08x\n", name, value);
+}
+
+static void
+util_dump_uint(FILE *f, unsigned i)
+{
+   fprintf(f, "%u", i);
+}
+
+static void
+util_dump_hex(FILE *f, unsigned i)
+{
+   fprintf(f, "0x%x", i);
+}
+
+static void
+util_dump_double(FILE *f, double d)
+{
+   fprintf(f, "%f", d);
+}
+
+static void
+util_dump_format(FILE *f, enum pipe_format format)
+{
+   fprintf(f, "%s", util_format_name(format));
+}
+
+static void
+util_dump_color_union(FILE *f, const union pipe_color_union *color)
+{
+   fprintf(f, "{f = {%f, %f, %f, %f}, ui = {%u, %u, %u, %u}",
+           color->f[0], color->f[1], color->f[2], color->f[3],
+           color->ui[0], color->ui[1], color->ui[2], color->ui[3]);
+}
+
+static void
+util_dump_query(FILE *f, struct dd_query *query)
+{
+   if (query->type >= PIPE_QUERY_DRIVER_SPECIFIC)
+      fprintf(f, "PIPE_QUERY_DRIVER_SPECIFIC + %i",
+              query->type - PIPE_QUERY_DRIVER_SPECIFIC);
+   else
+      fprintf(f, "%s", util_dump_query_type(query->type, false));
+}
+
+static void
+dd_dump_render_condition(struct dd_context *dctx, FILE *f)
+{
+   if (dctx->render_cond.query) {
+      fprintf(f, "render condition:\n");
+      DUMP_M(query, &dctx->render_cond, query);
+      DUMP_M(uint, &dctx->render_cond, condition);
+      DUMP_M(uint, &dctx->render_cond, mode);
+      fprintf(f, "\n");
+   }
+}
+
+static void
+dd_dump_draw_vbo(struct dd_context *dctx, struct pipe_draw_info *info, FILE *f)
+{
+   int sh, i;
+   const char *shader_str[PIPE_SHADER_TYPES];
+
+   shader_str[PIPE_SHADER_VERTEX] = "VERTEX";
+   shader_str[PIPE_SHADER_TESS_CTRL] = "TESS_CTRL";
+   shader_str[PIPE_SHADER_TESS_EVAL] = "TESS_EVAL";
+   shader_str[PIPE_SHADER_GEOMETRY] = "GEOMETRY";
+   shader_str[PIPE_SHADER_FRAGMENT] = "FRAGMENT";
+   shader_str[PIPE_SHADER_COMPUTE] = "COMPUTE";
+
+   DUMP(draw_info, info);
+   if (info->indexed) {
+      DUMP(index_buffer, &dctx->index_buffer);
+      if (dctx->index_buffer.buffer)
+         DUMP_M(resource, &dctx->index_buffer, buffer);
+   }
+   if (info->count_from_stream_output)
+      DUMP_M(stream_output_target, info,
+             count_from_stream_output);
+   if (info->indirect)
+      DUMP_M(resource, info, indirect);
+   fprintf(f, "\n");
+
+   /* TODO: dump active queries */
+
+   dd_dump_render_condition(dctx, f);
+
+   for (i = 0; i < PIPE_MAX_ATTRIBS; i++)
+      if (dctx->vertex_buffers[i].buffer ||
+          dctx->vertex_buffers[i].user_buffer) {
+         DUMP_I(vertex_buffer, &dctx->vertex_buffers[i], i);
+         if (dctx->vertex_buffers[i].buffer)
+            DUMP_M(resource, &dctx->vertex_buffers[i], buffer);
+      }
+
+   if (dctx->velems) {
+      print_named_value(f, "num vertex elements",
+                        dctx->velems->state.velems.count);
+      for (i = 0; i < dctx->velems->state.velems.count; i++) {
+         fprintf(f, "  ");
+         DUMP_I(vertex_element, &dctx->velems->state.velems.velems[i], i);
+      }
+   }
+
+   print_named_value(f, "num stream output targets", dctx->num_so_targets);
+   for (i = 0; i < dctx->num_so_targets; i++)
+      if (dctx->so_targets[i]) {
+         DUMP_I(stream_output_target, dctx->so_targets[i], i);
+         DUMP_M(resource, dctx->so_targets[i], buffer);
+         fprintf(f, "  offset = %i\n", dctx->so_offsets[i]);
+      }
+
+   fprintf(f, "\n");
+   for (sh = 0; sh < PIPE_SHADER_TYPES; sh++) {
+      if (sh == PIPE_SHADER_COMPUTE)
+         continue;
+
+      if (sh == PIPE_SHADER_TESS_CTRL &&
+          !dctx->shaders[PIPE_SHADER_TESS_CTRL] &&
+          dctx->shaders[PIPE_SHADER_TESS_EVAL])
+         fprintf(f, "tess_state: {default_outer_level = {%f, %f, %f, %f}, "
+                 "default_inner_level = {%f, %f}}\n",
+                 dctx->tess_default_levels[0],
+                 dctx->tess_default_levels[1],
+                 dctx->tess_default_levels[2],
+                 dctx->tess_default_levels[3],
+                 dctx->tess_default_levels[4],
+                 dctx->tess_default_levels[5]);
+
+      if (sh == PIPE_SHADER_FRAGMENT)
+         if (dctx->rs) {
+            unsigned num_viewports = dd_num_active_viewports(dctx);
+
+            if (dctx->rs->state.rs.clip_plane_enable)
+               DUMP(clip_state, &dctx->clip_state);
+
+            for (i = 0; i < num_viewports; i++)
+               DUMP_I(viewport_state, &dctx->viewports[i], i);
+
+            if (dctx->rs->state.rs.scissor)
+               for (i = 0; i < num_viewports; i++)
+                  DUMP_I(scissor_state, &dctx->scissors[i], i);
+
+            DUMP(rasterizer_state, &dctx->rs->state.rs);
+
+            if (dctx->rs->state.rs.poly_stipple_enable)
+               DUMP(poly_stipple, &dctx->polygon_stipple);
+            fprintf(f, "\n");
+         }
+
+      if (!dctx->shaders[sh])
+         continue;
+
+      fprintf(f, COLOR_SHADER "begin shader: %s" COLOR_RESET "\n", shader_str[sh]);
+      DUMP(shader_state, &dctx->shaders[sh]->state.shader);
+
+      for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++)
+         if (dctx->constant_buffers[sh][i].buffer ||
+             dctx->constant_buffers[sh][i].user_buffer) {
+            DUMP_I(constant_buffer, &dctx->constant_buffers[sh][i], i);
+            if (dctx->constant_buffers[sh][i].buffer)
+               DUMP_M(resource, &dctx->constant_buffers[sh][i], buffer);
+         }
+
+      for (i = 0; i < PIPE_MAX_SAMPLERS; i++)
+         if (dctx->sampler_states[sh][i])
+            DUMP_I(sampler_state, &dctx->sampler_states[sh][i]->state.sampler, i);
+
+      for (i = 0; i < PIPE_MAX_SAMPLERS; i++)
+         if (dctx->sampler_views[sh][i]) {
+            DUMP_I(sampler_view, dctx->sampler_views[sh][i], i);
+            DUMP_M(resource, dctx->sampler_views[sh][i], texture);
+         }
+
+      /* TODO: print shader images */
+      /* TODO: print shader buffers */
+
+      fprintf(f, COLOR_SHADER "end shader: %s" COLOR_RESET "\n\n", shader_str[sh]);
+   }
+
+   if (dctx->dsa)
+      DUMP(depth_stencil_alpha_state, &dctx->dsa->state.dsa);
+   DUMP(stencil_ref, &dctx->stencil_ref);
+
+   if (dctx->blend)
+      DUMP(blend_state, &dctx->blend->state.blend);
+   DUMP(blend_color, &dctx->blend_color);
+
+   print_named_value(f, "min_samples", dctx->min_samples);
+   print_named_xvalue(f, "sample_mask", dctx->sample_mask);
+   fprintf(f, "\n");
+
+   DUMP(framebuffer_state, &dctx->framebuffer_state);
+   for (i = 0; i < dctx->framebuffer_state.nr_cbufs; i++)
+      if (dctx->framebuffer_state.cbufs[i]) {
+         fprintf(f, "  " COLOR_STATE "cbufs[%i]:" COLOR_RESET "\n    ", i);
+         DUMP(surface, dctx->framebuffer_state.cbufs[i]);
+         fprintf(f, "    ");
+         DUMP(resource, dctx->framebuffer_state.cbufs[i]->texture);
+      }
+   if (dctx->framebuffer_state.zsbuf) {
+      fprintf(f, "  " COLOR_STATE "zsbuf:" COLOR_RESET "\n    ");
+      DUMP(surface, dctx->framebuffer_state.zsbuf);
+      fprintf(f, "    ");
+      DUMP(resource, dctx->framebuffer_state.zsbuf->texture);
+   }
+   fprintf(f, "\n");
+}
+
+static void
+dd_dump_resource_copy_region(struct dd_context *dctx,
+                             struct call_resource_copy_region *info,
+                             FILE *f)
+{
+   fprintf(f, "%s:\n", __func__+8);
+   DUMP_M(resource, info, dst);
+   DUMP_M(uint, info, dst_level);
+   DUMP_M(uint, info, dstx);
+   DUMP_M(uint, info, dsty);
+   DUMP_M(uint, info, dstz);
+   DUMP_M(resource, info, src);
+   DUMP_M(uint, info, src_level);
+   DUMP_M(box, info, src_box);
+}
+
+static void
+dd_dump_blit(struct dd_context *dctx, struct pipe_blit_info *info, FILE *f)
+{
+   fprintf(f, "%s:\n", __func__+8);
+   DUMP_M(resource, info, dst.resource);
+   DUMP_M(uint, info, dst.level);
+   DUMP_M_ADDR(box, info, dst.box);
+   DUMP_M(format, info, dst.format);
+
+   DUMP_M(resource, info, src.resource);
+   DUMP_M(uint, info, src.level);
+   DUMP_M_ADDR(box, info, src.box);
+   DUMP_M(format, info, src.format);
+
+   DUMP_M(hex, info, mask);
+   DUMP_M(uint, info, filter);
+   DUMP_M(uint, info, scissor_enable);
+   DUMP_M_ADDR(scissor_state, info, scissor);
+   DUMP_M(uint, info, render_condition_enable);
+
+   if (info->render_condition_enable)
+      dd_dump_render_condition(dctx, f);
+}
+
+static void
+dd_dump_flush_resource(struct dd_context *dctx, struct pipe_resource *res,
+                       FILE *f)
+{
+   fprintf(f, "%s:\n", __func__+8);
+   DUMP(resource, res);
+}
+
+static void
+dd_dump_clear(struct dd_context *dctx, struct call_clear *info, FILE *f)
+{
+   fprintf(f, "%s:\n", __func__+8);
+   DUMP_M(uint, info, buffers);
+   DUMP_M(color_union, info, color);
+   DUMP_M(double, info, depth);
+   DUMP_M(hex, info, stencil);
+}
+
+static void
+dd_dump_clear_buffer(struct dd_context *dctx, struct call_clear_buffer *info,
+                     FILE *f)
+{
+   int i;
+   const char *value = (const char*)info->clear_value;
+
+   fprintf(f, "%s:\n", __func__+8);
+   DUMP_M(resource, info, res);
+   DUMP_M(uint, info, offset);
+   DUMP_M(uint, info, size);
+   DUMP_M(uint, info, clear_value_size);
+
+   fprintf(f, "  clear_value:");
+   for (i = 0; i < info->clear_value_size; i++)
+      fprintf(f, " %02x", value[i]);
+   fprintf(f, "\n");
+}
+
+static void
+dd_dump_clear_render_target(struct dd_context *dctx, FILE *f)
+{
+   fprintf(f, "%s:\n", __func__+8);
+   /* TODO */
+}
+
+static void
+dd_dump_clear_depth_stencil(struct dd_context *dctx, FILE *f)
+{
+   fprintf(f, "%s:\n", __func__+8);
+   /* TODO */
+}
+
+static void
+dd_dump_driver_state(struct dd_context *dctx, FILE *f, unsigned flags)
+{
+   if (dctx->pipe->dump_debug_state) {
+	   fprintf(f,"\n\n**************************************************"
+		     "***************************\n");
+	   fprintf(f, "Driver-specific state:\n\n");
+	   dctx->pipe->dump_debug_state(dctx->pipe, f, flags);
+   }
+}
+
+static void
+dd_dump_call(struct dd_context *dctx, struct dd_call *call, unsigned flags)
+{
+   FILE *f = dd_get_file_stream(dctx);
+
+   if (!f)
+      return;
+
+   switch (call->type) {
+   case CALL_DRAW_VBO:
+      dd_dump_draw_vbo(dctx, &call->info.draw_vbo, f);
+      break;
+   case CALL_RESOURCE_COPY_REGION:
+      dd_dump_resource_copy_region(dctx, &call->info.resource_copy_region, f);
+      break;
+   case CALL_BLIT:
+      dd_dump_blit(dctx, &call->info.blit, f);
+      break;
+   case CALL_FLUSH_RESOURCE:
+      dd_dump_flush_resource(dctx, call->info.flush_resource, f);
+      break;
+   case CALL_CLEAR:
+      dd_dump_clear(dctx, &call->info.clear, f);
+      break;
+   case CALL_CLEAR_BUFFER:
+      dd_dump_clear_buffer(dctx, &call->info.clear_buffer, f);
+      break;
+   case CALL_CLEAR_RENDER_TARGET:
+      dd_dump_clear_render_target(dctx, f);
+      break;
+   case CALL_CLEAR_DEPTH_STENCIL:
+      dd_dump_clear_depth_stencil(dctx, f);
+   }
+
+   dd_dump_driver_state(dctx, f, flags);
+   dd_close_file_stream(f);
+}
+
+static void
+dd_kill_process(void)
+{
+   sync();
+   fprintf(stderr, "dd: Aborting the process...\n");
+   fflush(stdout);
+   fflush(stderr);
+   abort();
+}
+
+static bool
+dd_flush_and_check_hang(struct dd_context *dctx,
+                        struct pipe_fence_handle **flush_fence,
+                        unsigned flush_flags)
+{
+   struct pipe_fence_handle *fence = NULL;
+   struct pipe_context *pipe = dctx->pipe;
+   struct pipe_screen *screen = pipe->screen;
+   uint64_t timeout_ms = dd_screen(dctx->base.screen)->timeout_ms;
+   bool idle;
+
+   assert(timeout_ms > 0);
+
+   pipe->flush(pipe, &fence, flush_flags);
+   if (flush_fence)
+      screen->fence_reference(screen, flush_fence, fence);
+   if (!fence)
+      return false;
+
+   idle = screen->fence_finish(screen, fence, timeout_ms * 1000000);
+   screen->fence_reference(screen, &fence, NULL);
+   if (!idle)
+      fprintf(stderr, "dd: GPU hang detected!\n");
+   return !idle;
+}
+
+static void
+dd_flush_and_handle_hang(struct dd_context *dctx,
+                         struct pipe_fence_handle **fence, unsigned flags,
+                         const char *cause)
+{
+   if (dd_flush_and_check_hang(dctx, fence, flags)) {
+      FILE *f = dd_get_file_stream(dctx);
+
+      if (f) {
+         fprintf(f, "dd: %s.\n", cause);
+         dd_dump_driver_state(dctx, f, PIPE_DEBUG_DEVICE_IS_HUNG);
+         dd_close_file_stream(f);
+      }
+
+      /* Terminate the process to prevent future hangs. */
+      dd_kill_process();
+   }
+}
+
+static void
+dd_context_flush(struct pipe_context *_pipe,
+                 struct pipe_fence_handle **fence, unsigned flags)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+
+   switch (dd_screen(dctx->base.screen)->mode) {
+   case DD_DETECT_HANGS:
+      dd_flush_and_handle_hang(dctx, fence, flags,
+                               "GPU hang detected in pipe->flush()");
+      break;
+   case DD_DUMP_ALL_CALLS:
+      pipe->flush(pipe, fence, flags);
+      break;
+   default:
+      assert(0);
+   }
+}
+
+static void
+dd_before_draw(struct dd_context *dctx)
+{
+   if (dd_screen(dctx->base.screen)->mode == DD_DETECT_HANGS &&
+       !dd_screen(dctx->base.screen)->no_flush)
+      dd_flush_and_handle_hang(dctx, NULL, 0,
+                               "GPU hang most likely caused by internal "
+                               "driver commands");
+}
+
+static void
+dd_after_draw(struct dd_context *dctx, struct dd_call *call)
+{
+   switch (dd_screen(dctx->base.screen)->mode) {
+   case DD_DETECT_HANGS:
+      if (!dd_screen(dctx->base.screen)->no_flush &&
+          dd_flush_and_check_hang(dctx, NULL, 0)) {
+         dd_dump_call(dctx, call, PIPE_DEBUG_DEVICE_IS_HUNG);
+
+         /* Terminate the process to prevent future hangs. */
+         dd_kill_process();
+      }
+      break;
+   case DD_DUMP_ALL_CALLS:
+      dd_dump_call(dctx, call, 0);
+      break;
+   default:
+      assert(0);
+   }
+}
+
+static void
+dd_context_draw_vbo(struct pipe_context *_pipe,
+                    const struct pipe_draw_info *info)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+   struct dd_call call;
+
+   call.type = CALL_DRAW_VBO;
+   call.info.draw_vbo = *info;
+
+   dd_before_draw(dctx);
+   pipe->draw_vbo(pipe, info);
+   dd_after_draw(dctx, &call);
+}
+
+static void
+dd_context_resource_copy_region(struct pipe_context *_pipe,
+                                struct pipe_resource *dst, unsigned dst_level,
+                                unsigned dstx, unsigned dsty, unsigned dstz,
+                                struct pipe_resource *src, unsigned src_level,
+                                const struct pipe_box *src_box)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+   struct dd_call call;
+
+   call.type = CALL_RESOURCE_COPY_REGION;
+   call.info.resource_copy_region.dst = dst;
+   call.info.resource_copy_region.dst_level = dst_level;
+   call.info.resource_copy_region.dstx = dstx;
+   call.info.resource_copy_region.dsty = dsty;
+   call.info.resource_copy_region.dstz = dstz;
+   call.info.resource_copy_region.src = src;
+   call.info.resource_copy_region.src_level = src_level;
+   call.info.resource_copy_region.src_box = src_box;
+
+   dd_before_draw(dctx);
+   pipe->resource_copy_region(pipe,
+                              dst, dst_level, dstx, dsty, dstz,
+                              src, src_level, src_box);
+   dd_after_draw(dctx, &call);
+}
+
+static void
+dd_context_blit(struct pipe_context *_pipe, const struct pipe_blit_info *info)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+   struct dd_call call;
+
+   call.type = CALL_BLIT;
+   call.info.blit = *info;
+
+   dd_before_draw(dctx);
+   pipe->blit(pipe, info);
+   dd_after_draw(dctx, &call);
+}
+
+static void
+dd_context_flush_resource(struct pipe_context *_pipe,
+                          struct pipe_resource *resource)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+   struct dd_call call;
+
+   call.type = CALL_FLUSH_RESOURCE;
+   call.info.flush_resource = resource;
+
+   dd_before_draw(dctx);
+   pipe->flush_resource(pipe, resource);
+   dd_after_draw(dctx, &call);
+}
+
+static void
+dd_context_clear(struct pipe_context *_pipe, unsigned buffers,
+                 const union pipe_color_union *color, double depth,
+                 unsigned stencil)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+   struct dd_call call;
+
+   call.type = CALL_CLEAR;
+   call.info.clear.buffers = buffers;
+   call.info.clear.color = color;
+   call.info.clear.depth = depth;
+   call.info.clear.stencil = stencil;
+
+   dd_before_draw(dctx);
+   pipe->clear(pipe, buffers, color, depth, stencil);
+   dd_after_draw(dctx, &call);
+}
+
+static void
+dd_context_clear_render_target(struct pipe_context *_pipe,
+                               struct pipe_surface *dst,
+                               const union pipe_color_union *color,
+                               unsigned dstx, unsigned dsty,
+                               unsigned width, unsigned height)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+   struct dd_call call;
+
+   call.type = CALL_CLEAR_RENDER_TARGET;
+
+   dd_before_draw(dctx);
+   pipe->clear_render_target(pipe, dst, color, dstx, dsty, width, height);
+   dd_after_draw(dctx, &call);
+}
+
+static void
+dd_context_clear_depth_stencil(struct pipe_context *_pipe,
+                               struct pipe_surface *dst, unsigned clear_flags,
+                               double depth, unsigned stencil, unsigned dstx,
+                               unsigned dsty, unsigned width, unsigned height)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+   struct dd_call call;
+
+   call.type = CALL_CLEAR_DEPTH_STENCIL;
+
+   dd_before_draw(dctx);
+   pipe->clear_depth_stencil(pipe, dst, clear_flags, depth, stencil,
+                             dstx, dsty, width, height);
+   dd_after_draw(dctx, &call);
+}
+
+static void
+dd_context_clear_buffer(struct pipe_context *_pipe, struct pipe_resource *res,
+                        unsigned offset, unsigned size,
+                        const void *clear_value, int clear_value_size)
+{
+   struct dd_context *dctx = dd_context(_pipe);
+   struct pipe_context *pipe = dctx->pipe;
+   struct dd_call call;
+
+   call.type = CALL_CLEAR_BUFFER;
+   call.info.clear_buffer.res = res;
+   call.info.clear_buffer.offset = offset;
+   call.info.clear_buffer.size = size;
+   call.info.clear_buffer.clear_value = clear_value;
+   call.info.clear_buffer.clear_value_size = clear_value_size;
+
+   dd_before_draw(dctx);
+   pipe->clear_buffer(pipe, res, offset, size, clear_value, clear_value_size);
+   dd_after_draw(dctx, &call);
+}
+
+void
+dd_init_draw_functions(struct dd_context *dctx)
+{
+   CTX_INIT(flush);
+   CTX_INIT(draw_vbo);
+   CTX_INIT(resource_copy_region);
+   CTX_INIT(blit);
+   CTX_INIT(clear);
+   CTX_INIT(clear_render_target);
+   CTX_INIT(clear_depth_stencil);
+   CTX_INIT(clear_buffer);
+   CTX_INIT(flush_resource);
+   /* launch_grid */
+}
--- a/src/gallium/drivers/ddebug/dd_pipe.h
+++ b/src/gallium/drivers/ddebug/dd_pipe.h
@@ -0,0 +1,139 @@
+/**************************************************************************
+ *
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ * Copyright 2008 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef DD_H_
+#define DD_H_
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "pipe/p_screen.h"
+#include "dd_util.h"
+
+enum dd_mode {
+   DD_DETECT_HANGS,
+   DD_DUMP_ALL_CALLS
+};
+
+struct dd_screen
+{
+   struct pipe_screen base;
+   struct pipe_screen *screen;
+   unsigned timeout_ms;
+   enum dd_mode mode;
+   bool no_flush;
+};
+
+struct dd_query
+{
+   unsigned type;
+   struct pipe_query *query;
+};
+
+struct dd_state
+{
+   void *cso;
+
+   union {
+      struct pipe_blend_state blend;
+      struct pipe_depth_stencil_alpha_state dsa;
+      struct pipe_rasterizer_state rs;
+      struct pipe_sampler_state sampler;
+      struct {
+         struct pipe_vertex_element velems[PIPE_MAX_ATTRIBS];
+         unsigned count;
+      } velems;
+      struct pipe_shader_state shader;
+   } state;
+};
+
+struct dd_context
+{
+   struct pipe_context base;
+   struct pipe_context *pipe;
+
+   struct {
+      struct dd_query *query;
+      bool condition;
+      unsigned mode;
+   } render_cond;
+
+   struct pipe_index_buffer index_buffer;
+   struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
+
+   unsigned num_so_targets;
+   struct pipe_stream_output_target *so_targets[PIPE_MAX_SO_BUFFERS];
+   unsigned so_offsets[PIPE_MAX_SO_BUFFERS];
+
+   struct dd_state *shaders[PIPE_SHADER_TYPES];
+   struct pipe_constant_buffer constant_buffers[PIPE_SHADER_TYPES][PIPE_MAX_CONSTANT_BUFFERS];
+   struct pipe_sampler_view *sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
+   struct dd_state *sampler_states[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
+   struct pipe_image_view *shader_images[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES];
+   struct pipe_shader_buffer shader_buffers[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_BUFFERS];
+
+   struct dd_state *velems;
+   struct dd_state *rs;
+   struct dd_state *dsa;
+   struct dd_state *blend;
+
+   struct pipe_blend_color blend_color;
+   struct pipe_stencil_ref stencil_ref;
+   unsigned sample_mask;
+   unsigned min_samples;
+   struct pipe_clip_state clip_state;
+   struct pipe_framebuffer_state framebuffer_state;
+   struct pipe_poly_stipple polygon_stipple;
+   struct pipe_scissor_state scissors[PIPE_MAX_VIEWPORTS];
+   struct pipe_viewport_state viewports[PIPE_MAX_VIEWPORTS];
+   float tess_default_levels[6];
+};
+
+
+struct pipe_context *
+dd_context_create(struct dd_screen *dscreen, struct pipe_context *pipe);
+
+void
+dd_init_draw_functions(struct dd_context *dctx);
+
+
+static inline struct dd_context *
+dd_context(struct pipe_context *pipe)
+{
+   return (struct dd_context *)pipe;
+}
+
+static inline struct dd_screen *
+dd_screen(struct pipe_screen *screen)
+{
+   return (struct dd_screen*)screen;
+}
+
+
+#define CTX_INIT(_member) \
+   dctx->base._member = dctx->pipe->_member ? dd_context_##_member : NULL
+
+#endif /* DD_H_ */
--- a/src/gallium/drivers/radeonsi/si_commands.c
+++ b/src/gallium/drivers/radeonsi/si_commands.c
@@ -1,5 +1,8 @@
-/*
- * Copyright 2012 Advanced Micro Devices, Inc.
+/**************************************************************************
+ *
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -20,17 +23,14 @@
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
- * Authors:
- *      Christian König <christian.koenig@amd.com>
- */
+ **************************************************************************/

-#include "sid.h"
-#include "si_pipe.h"
+#ifndef DD_PUBLIC_H_
+#define DD_PUBLIC_H_

-void si_cmd_context_control(struct si_pm4_state *pm4)
-{
-	si_pm4_cmd_begin(pm4, PKT3_CONTEXT_CONTROL);
-	si_pm4_cmd_add(pm4, 0x80000000);
-	si_pm4_cmd_add(pm4, 0x80000000);
-	si_pm4_cmd_end(pm4, false);
-}
+struct pipe_screen;
+
+struct pipe_screen *
+ddebug_screen_create(struct pipe_screen *screen);
+
+#endif /* DD_PUBLIC_H_ */
--- a/src/gallium/drivers/ddebug/dd_screen.c
+++ b/src/gallium/drivers/ddebug/dd_screen.c
@@ -0,0 +1,353 @@
+/**************************************************************************
+ *
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ * Copyright 2008 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "dd_pipe.h"
+#include "dd_public.h"
+#include "util/u_memory.h"
+#include <stdio.h>
+
+
+static const char *
+dd_screen_get_name(struct pipe_screen *_screen)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+   return screen->get_name(screen);
+}
+
+static const char *
+dd_screen_get_vendor(struct pipe_screen *_screen)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+   return screen->get_vendor(screen);
+}
+
+static const char *
+dd_screen_get_device_vendor(struct pipe_screen *_screen)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+   return screen->get_device_vendor(screen);
+}
+
+static int
+dd_screen_get_param(struct pipe_screen *_screen,
+                    enum pipe_cap param)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+   return screen->get_param(screen, param);
+}
+
+static float
+dd_screen_get_paramf(struct pipe_screen *_screen,
+                     enum pipe_capf param)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+   return screen->get_paramf(screen, param);
+}
+
+static int
+dd_screen_get_shader_param(struct pipe_screen *_screen, unsigned shader,
+                           enum pipe_shader_cap param)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+   return screen->get_shader_param(screen, shader, param);
+}
+
+static uint64_t
+dd_screen_get_timestamp(struct pipe_screen *_screen)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+   return screen->get_timestamp(screen);
+}
+
+static struct pipe_context *
+dd_screen_context_create(struct pipe_screen *_screen, void *priv,
+                         unsigned flags)
+{
+   struct dd_screen *dscreen = dd_screen(_screen);
+   struct pipe_screen *screen = dscreen->screen;
+
+   flags |= PIPE_CONTEXT_DEBUG;
+
+   return dd_context_create(dscreen,
+                            screen->context_create(screen, priv, flags));
+}
+
+static boolean
+dd_screen_is_format_supported(struct pipe_screen *_screen,
+                              enum pipe_format format,
+                              enum pipe_texture_target target,
+                              unsigned sample_count,
+                              unsigned tex_usage)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+   return screen->is_format_supported(screen, format, target, sample_count,
+                                      tex_usage);
+}
+
+static boolean
+dd_screen_can_create_resource(struct pipe_screen *_screen,
+                              const struct pipe_resource *templat)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+   return screen->can_create_resource(screen, templat);
+}
+
+static void
+dd_screen_flush_frontbuffer(struct pipe_screen *_screen,
+                            struct pipe_resource *resource,
+                            unsigned level, unsigned layer,
+                            void *context_private,
+                            struct pipe_box *sub_box)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+   screen->flush_frontbuffer(screen, resource, level, layer, context_private,
+                             sub_box);
+}
+
+static int
+dd_screen_get_driver_query_info(struct pipe_screen *_screen,
+                                unsigned index,
+                                struct pipe_driver_query_info *info)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+   return screen->get_driver_query_info(screen, index, info);
+}
+
+static int
+dd_screen_get_driver_query_group_info(struct pipe_screen *_screen,
+                                      unsigned index,
+                                      struct pipe_driver_query_group_info *info)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+   return screen->get_driver_query_group_info(screen, index, info);
+}
+
+
+/********************************************************************
+ * resource
+ */
+
+static struct pipe_resource *
+dd_screen_resource_create(struct pipe_screen *_screen,
+                          const struct pipe_resource *templat)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+   struct pipe_resource *res = screen->resource_create(screen, templat);
+
+   if (!res)
+      return NULL;
+   res->screen = _screen;
+   return res;
+}
+
+static struct pipe_resource *
+dd_screen_resource_from_handle(struct pipe_screen *_screen,
+                               const struct pipe_resource *templ,
+                               struct winsys_handle *handle)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+   struct pipe_resource *res =
+      screen->resource_from_handle(screen, templ, handle);
+
+   if (!res)
+      return NULL;
+   res->screen = _screen;
+   return res;
+}
+
+static struct pipe_resource *
+dd_screen_resource_from_user_memory(struct pipe_screen *_screen,
+                                    const struct pipe_resource *templ,
+                                    void *user_memory)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+   struct pipe_resource *res =
+      screen->resource_from_user_memory(screen, templ, user_memory);
+
+   if (!res)
+      return NULL;
+   res->screen = _screen;
+   return res;
+}
+
+static void
+dd_screen_resource_destroy(struct pipe_screen *_screen,
+                           struct pipe_resource *res)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+   screen->resource_destroy(screen, res);
+}
+
+static boolean
+dd_screen_resource_get_handle(struct pipe_screen *_screen,
+                              struct pipe_resource *resource,
+                              struct winsys_handle *handle)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+   return screen->resource_get_handle(screen, resource, handle);
+}
+
+
+/********************************************************************
+ * fence
+ */
+
+static void
+dd_screen_fence_reference(struct pipe_screen *_screen,
+                          struct pipe_fence_handle **pdst,
+                          struct pipe_fence_handle *src)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+   screen->fence_reference(screen, pdst, src);
+}
+
+static boolean
+dd_screen_fence_finish(struct pipe_screen *_screen,
+                       struct pipe_fence_handle *fence,
+                       uint64_t timeout)
+{
+   struct pipe_screen *screen = dd_screen(_screen)->screen;
+
+   return screen->fence_finish(screen, fence, timeout);
+}
+
+
+/********************************************************************
+ * screen
+ */
+
+static void
+dd_screen_destroy(struct pipe_screen *_screen)
+{
+   struct dd_screen *dscreen = dd_screen(_screen);
+   struct pipe_screen *screen = dscreen->screen;
+
+   screen->destroy(screen);
+   FREE(dscreen);
+}
+
+struct pipe_screen *
+ddebug_screen_create(struct pipe_screen *screen)
+{
+   struct dd_screen *dscreen;
+   const char *option = debug_get_option("GALLIUM_DDEBUG", NULL);
+   bool dump_always = option && !strcmp(option, "always");
+   bool no_flush = option && strstr(option, "noflush");
+   bool help = option && !strcmp(option, "help");
+   unsigned timeout = 0;
+
+   if (help) {
+      puts("Gallium driver debugger");
+      puts("");
+      puts("Usage:");
+      puts("");
+      puts("  GALLIUM_DDEBUG=always");
+      puts("    Dump context and driver information after every draw call into");
+      puts("    $HOME/"DD_DIR"/.");
+      puts("");
+      puts("  GALLIUM_DDEBUG=[timeout in ms] noflush");
+      puts("    Flush and detect a device hang after every draw call based on the given");
+      puts("    fence timeout and dump context and driver information into");
+      puts("    $HOME/"DD_DIR"/ when a hang is detected.");
+      puts("    If 'noflush' is specified, only detect hangs in pipe->flush.");
+      puts("");
+      exit(0);
+   }
+
+   if (!option)
+      return screen;
+   if (!dump_always && sscanf(option, "%u", &timeout) != 1)
+      return screen;
+
+   dscreen = CALLOC_STRUCT(dd_screen);
+   if (!dscreen)
+      return NULL;
+
+#define SCR_INIT(_member) \
+   dscreen->base._member = screen->_member ? dd_screen_##_member : NULL
+
+   dscreen->base.destroy = dd_screen_destroy;
+   dscreen->base.get_name = dd_screen_get_name;
+   dscreen->base.get_vendor = dd_screen_get_vendor;
+   dscreen->base.get_device_vendor = dd_screen_get_device_vendor;
+   dscreen->base.get_param = dd_screen_get_param;
+   dscreen->base.get_paramf = dd_screen_get_paramf;
+   dscreen->base.get_shader_param = dd_screen_get_shader_param;
+   /* get_video_param */
+   /* get_compute_param */
+   SCR_INIT(get_timestamp);
+   dscreen->base.context_create = dd_screen_context_create;
+   dscreen->base.is_format_supported = dd_screen_is_format_supported;
+   /* is_video_format_supported */
+   SCR_INIT(can_create_resource);
+   dscreen->base.resource_create = dd_screen_resource_create;
+   dscreen->base.resource_from_handle = dd_screen_resource_from_handle;
+   SCR_INIT(resource_from_user_memory);
+   dscreen->base.resource_get_handle = dd_screen_resource_get_handle;
+   dscreen->base.resource_destroy = dd_screen_resource_destroy;
+   SCR_INIT(flush_frontbuffer);
+   SCR_INIT(fence_reference);
+   SCR_INIT(fence_finish);
+   SCR_INIT(get_driver_query_info);
+   SCR_INIT(get_driver_query_group_info);
+
+#undef SCR_INIT
+
+   dscreen->screen = screen;
+   dscreen->timeout_ms = timeout;
+   dscreen->mode = dump_always ? DD_DUMP_ALL_CALLS : DD_DETECT_HANGS;
+   dscreen->no_flush = no_flush;
+
+   switch (dscreen->mode) {
+   case DD_DUMP_ALL_CALLS:
+      fprintf(stderr, "Gallium debugger active. Logging all calls.\n");
+      break;
+   case DD_DETECT_HANGS:
+      fprintf(stderr, "Gallium debugger active. "
+              "The hang detection timout is %i ms.\n", timeout);
+      break;
+   default:
+      assert(0);
+   }
+
+   return &dscreen->base;
+}
--- a/src/gallium/drivers/ddebug/dd_util.h
+++ b/src/gallium/drivers/ddebug/dd_util.h
@@ -0,0 +1,71 @@
+/**************************************************************************
+ *
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ * Copyright 2008 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef DD_UTIL_H
+#define DD_UTIL_H
+
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+#include "os/os_process.h"
+#include "util/u_debug.h"
+
+/* name of the directory in home */
+#define DD_DIR "ddebug_dumps"
+
+static inline FILE *
+dd_get_debug_file()
+{
+   static unsigned index;
+   char proc_name[128], dir[256], name[512];
+   FILE *f;
+
+   if (!os_get_process_name(proc_name, sizeof(proc_name))) {
+      fprintf(stderr, "dd: can't get the process name\n");
+      return NULL;
+   }
+
+   snprintf(dir, sizeof(dir), "%s/"DD_DIR, debug_get_option("HOME", "."));
+
+   if (mkdir(dir, 0774) && errno != EEXIST) {
+      fprintf(stderr, "dd: can't create a directory (%i)\n", errno);
+      return NULL;
+   }
+
+   snprintf(name, sizeof(name), "%s/%s_%u_%08u", dir, proc_name, getpid(), index++);
+   f = fopen(name, "w");
+   if (!f) {
+      fprintf(stderr, "dd: can't open file %s\n", name);
+      return NULL;
+   }
+
+   return f;
+}
+
+#endif /* DD_UTIL_H */
--- a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
+++ b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
@@ -11,10 +11,10 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    364 bytes, from 2015-05-20 20:03:07)
 - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2015-05-20 20:03:07)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10755 bytes, from 2015-09-14 20:46:55)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67120 bytes, from 2015-08-14 23:22:03)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63785 bytes, from 2015-08-14 18:27:06)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67771 bytes, from 2015-09-14 20:46:55)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63970 bytes, from 2015-09-14 20:50:12)

 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
--- a/src/gallium/drivers/freedreno/a2xx/fd2_context.c
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_context.c
@@ -86,7 +86,7 @@ static const uint8_t a20x_primtypes[PIPE_PRIM_MAX] = {
 };

 struct pipe_context *
-fd2_context_create(struct pipe_screen *pscreen, void *priv)
+fd2_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
 {
 	struct fd_screen *screen = fd_screen(pscreen);
 	struct fd2_context *fd2_ctx = CALLOC_STRUCT(fd2_context);
--- a/src/gallium/drivers/freedreno/a2xx/fd2_context.h
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_context.h
@@ -47,6 +47,6 @@ fd2_context(struct fd_context *ctx)
 }

 struct pipe_context *
-fd2_context_create(struct pipe_screen *pscreen, void *priv);
+fd2_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags);

 #endif /* FD2_CONTEXT_H_ */
--- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
+++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
@@ -11,10 +11,10 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    364 bytes, from 2015-05-20 20:03:07)
 - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2015-05-20 20:03:07)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10755 bytes, from 2015-09-14 20:46:55)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67120 bytes, from 2015-08-14 23:22:03)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63785 bytes, from 2015-08-14 18:27:06)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67771 bytes, from 2015-09-14 20:46:55)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63970 bytes, from 2015-09-14 20:50:12)

 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
@@ -280,6 +280,8 @@ enum a3xx_rb_blend_opcode {
 enum a3xx_intp_mode {
 	SMOOTH = 0,
 	FLAT = 1,
+	ZERO = 2,
+	ONE = 3,
 };

 enum a3xx_repl_mode {
@@ -680,9 +682,16 @@ static inline uint32_t REG_A3XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000460
 #define A3XX_GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE		0x00080000
 #define A3XX_GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE			0x00100000
 #define A3XX_GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE		0x00200000
+#define A3XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z			0x00400000
 #define A3XX_GRAS_CL_CLIP_CNTL_ZCOORD				0x00800000
 #define A3XX_GRAS_CL_CLIP_CNTL_WCOORD				0x01000000
 #define A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE			0x02000000
+#define A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES__MASK	0x1c000000
+#define A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES__SHIFT	26
+static inline uint32_t A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES(uint32_t val)
+{
+	return ((val) << A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES__SHIFT) & A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES__MASK;
+}

 #define REG_A3XX_GRAS_CL_GB_CLIP_ADJ				0x00002044
 #define A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__MASK			0x000003ff
@@ -773,7 +782,7 @@ static inline uint32_t A3XX_GRAS_SU_POINT_SIZE(float val)
 #define A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT		0
 static inline uint32_t A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL(float val)
 {
-	return ((((int32_t)(val * 16384.0))) << A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT) & A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__MASK;
+	return ((((int32_t)(val * 1048576.0))) << A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT) & A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__MASK;
 }

 #define REG_A3XX_GRAS_SU_POLY_OFFSET_OFFSET			0x0000206d
@@ -894,6 +903,9 @@ static inline uint32_t A3XX_RB_MODE_CONTROL_MRT(uint32_t val)
 #define A3XX_RB_MODE_CONTROL_PACKER_TIMER_ENABLE		0x00010000

 #define REG_A3XX_RB_RENDER_CONTROL				0x000020c1
+#define A3XX_RB_RENDER_CONTROL_DUAL_COLOR_IN_ENABLE		0x00000001
+#define A3XX_RB_RENDER_CONTROL_YUV_IN_ENABLE			0x00000002
+#define A3XX_RB_RENDER_CONTROL_COV_VALUE_INPUT_ENABLE		0x00000004
 #define A3XX_RB_RENDER_CONTROL_FACENESS				0x00000008
 #define A3XX_RB_RENDER_CONTROL_BIN_WIDTH__MASK			0x00000ff0
 #define A3XX_RB_RENDER_CONTROL_BIN_WIDTH__SHIFT			4
@@ -907,6 +919,8 @@ static inline uint32_t A3XX_RB_RENDER_CONTROL_BIN_WIDTH(uint32_t val)
 #define A3XX_RB_RENDER_CONTROL_YCOORD				0x00008000
 #define A3XX_RB_RENDER_CONTROL_ZCOORD				0x00010000
 #define A3XX_RB_RENDER_CONTROL_WCOORD				0x00020000
+#define A3XX_RB_RENDER_CONTROL_I_CLAMP_ENABLE			0x00080000
+#define A3XX_RB_RENDER_CONTROL_COV_VALUE_OUTPUT_ENABLE		0x00100000
 #define A3XX_RB_RENDER_CONTROL_ALPHA_TEST			0x00400000
 #define A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__MASK		0x07000000
 #define A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__SHIFT		24
@@ -914,6 +928,8 @@ static inline uint32_t A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(enum adreno_compar
 {
 	return ((val) << A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__SHIFT) & A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__MASK;
 }
+#define A3XX_RB_RENDER_CONTROL_ALPHA_TO_COVERAGE		0x40000000
+#define A3XX_RB_RENDER_CONTROL_ALPHA_TO_ONE			0x80000000

 #define REG_A3XX_RB_MSAA_CONTROL				0x000020c2
 #define A3XX_RB_MSAA_CONTROL_DISABLE				0x00000400
--- a/src/gallium/drivers/freedreno/a3xx/fd3_blend.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_blend.c
@@ -28,6 +28,7 @@

 #include "pipe/p_state.h"
 #include "util/u_blend.h"
+#include "util/u_dual_blend.h"
 #include "util/u_string.h"
 #include "util/u_memory.h"

@@ -131,5 +132,8 @@ fd3_blend_state_create(struct pipe_context *pctx,
 			so->rb_mrt[i].control |= A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_ALWAYS);
 	}

+	if (cso->rt[0].blend_enable && util_blend_state_is_dual(cso, 0))
+		so->rb_render_control = A3XX_RB_RENDER_CONTROL_DUAL_COLOR_IN_ENABLE;
+
 	return so;
 }
--- a/src/gallium/drivers/freedreno/a3xx/fd3_blend.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_blend.h
@@ -36,6 +36,7 @@

 struct fd3_blend_stateobj {
 	struct pipe_blend_state base;
+	uint32_t rb_render_control;
 	struct {
 		/* Blend control bits for color if there is an alpha channel */
 		uint32_t blend_control_rgb;
--- a/src/gallium/drivers/freedreno/a3xx/fd3_context.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.c
@@ -98,7 +98,7 @@ static const uint8_t primtypes[PIPE_PRIM_MAX] = {
 };

 struct pipe_context *
-fd3_context_create(struct pipe_screen *pscreen, void *priv)
+fd3_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
 {
 	struct fd_screen *screen = fd_screen(pscreen);
 	struct fd3_context *fd3_ctx = CALLOC_STRUCT(fd3_context);
--- a/src/gallium/drivers/freedreno/a3xx/fd3_context.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.h
@@ -73,22 +73,6 @@ struct fd3_context {
 	 */
 	struct fd_vertex_state blit_vbuf_state;

-
-	/*
-	 * Border color layout *appears* to be as arrays of 0x40 byte
-	 * elements, with frag shader elements starting at (16 x 0x40).
-	 * But at some point I should probably experiment more with
-	 * samplers in vertex shaders to be sure.  Unclear about why
-	 * there is this offset when there are separate VS and FS base
-	 * addr regs.
-	 *
-	 * The first 8 bytes of each entry are the requested border
-	 * color in fp16.  Unclear about the rest.. could be used for
-	 * other formats, or could simply be for aligning the pitch
-	 * to 32 pixels.
-	 */
-#define BORDERCOLOR_SIZE 0x40
-
 	struct u_upload_mgr *border_color_uploader;
 	struct pipe_resource *border_color_buf;

@@ -119,6 +103,6 @@ fd3_context(struct fd_context *ctx)
 }

 struct pipe_context *
-fd3_context_create(struct pipe_screen *pscreen, void *priv);
+fd3_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags);

 #endif /* FD3_CONTEXT_H_ */
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -149,6 +149,8 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
 			&fd3_ctx->border_color_buf,
 			&ptr);

+	fd_setup_border_colors(tex, ptr, tex_off[sb]);
+
 	if (tex->num_samplers > 0) {
 		/* output sampler state: */
 		OUT_PKT3(ring, CP_LOAD_STATE, 2 + (2 * tex->num_samplers));
@@ -163,57 +165,6 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
 			const struct fd3_sampler_stateobj *sampler = tex->samplers[i] ?
 					fd3_sampler_stateobj(tex->samplers[i]) :
 					&dummy_sampler;
-			uint16_t *bcolor = (uint16_t *)((uint8_t *)ptr +
-					(BORDERCOLOR_SIZE * tex_off[sb]) +
-					(BORDERCOLOR_SIZE * i));
-			uint32_t *bcolor32 = (uint32_t *)&bcolor[16];
-
-			/*
-			 * XXX HACK ALERT XXX
-			 *
-			 * The border colors need to be swizzled in a particular
-			 * format-dependent order. Even though samplers don't know about
-			 * formats, we can assume that with a GL state tracker, there's a
-			 * 1:1 correspondence between sampler and texture. Take advantage
-			 * of that knowledge.
-			 */
-			if (i < tex->num_textures && tex->textures[i]) {
-				const struct util_format_description *desc =
-					util_format_description(tex->textures[i]->format);
-				for (j = 0; j < 4; j++) {
-					if (desc->swizzle[j] >= 4)
-						continue;
-
-					const struct util_format_channel_description *chan =
-						&desc->channel[desc->swizzle[j]];
-					int size = chan->size;
-
-					/* The Z16 texture format we use seems to look in the
-					 * 32-bit border color slots
-					 */
-					if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS)
-						size = 32;
-
-					/* Formats like R11G11B10 or RGB9_E5 don't specify
-					 * per-channel sizes properly.
-					 */
-					if (desc->layout == UTIL_FORMAT_LAYOUT_OTHER)
-						size = 16;
-
-					if (chan->pure_integer && size > 16)
-						bcolor32[desc->swizzle[j] + 4] =
-							sampler->base.border_color.i[j];
-					else if (size > 16)
-						bcolor32[desc->swizzle[j]] =
-							fui(sampler->base.border_color.f[j]);
-					else if (chan->pure_integer)
-						bcolor[desc->swizzle[j] + 8] =
-							sampler->base.border_color.i[j];
-					else
-						bcolor[desc->swizzle[j]] =
-							util_float_to_half(sampler->base.border_color.f[j]);
-				}
-			}

 			OUT_RING(ring, sampler->texsamp0);
 			OUT_RING(ring, sampler->texsamp1);
@@ -400,15 +351,27 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
 	unsigned vtxcnt_regid = regid(63, 0);

 	for (i = 0; i < vp->inputs_count; i++) {
-		uint8_t semantic = sem2name(vp->inputs[i].semantic);
-		if (semantic == TGSI_SEMANTIC_VERTEXID_NOBASE)
-			vertex_regid = vp->inputs[i].regid;
-		else if (semantic == TGSI_SEMANTIC_INSTANCEID)
-			instance_regid = vp->inputs[i].regid;
-		else if (semantic == IR3_SEMANTIC_VTXCNT)
-			vtxcnt_regid = vp->inputs[i].regid;
-		else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask)
+		if (vp->inputs[i].sysval) {
+			switch(vp->inputs[i].slot) {
+			case SYSTEM_VALUE_BASE_VERTEX:
+				/* handled elsewhere */
+				break;
+			case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
+				vertex_regid = vp->inputs[i].regid;
+				break;
+			case SYSTEM_VALUE_INSTANCE_ID:
+				instance_regid = vp->inputs[i].regid;
+				break;
+			case SYSTEM_VALUE_VERTEX_CNT:
+				vtxcnt_regid = vp->inputs[i].regid;
+				break;
+			default:
+				unreachable("invalid system value");
+				break;
+			}
+		} else if (i < vtx->vtx->num_elements && vp->inputs[i].compmask) {
 			last = i;
+		}
 	}

 	/* hw doesn't like to be configured for zero vbo's, it seems: */
@@ -419,7 +382,7 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
 		return;

 	for (i = 0, j = 0; i <= last; i++) {
-		assert(sem2name(vp->inputs[i].semantic) == 0);
+		assert(!vp->inputs[i].sysval);
 		if (vp->inputs[i].compmask) {
 			struct pipe_vertex_element *elem = &vtx->vtx->pipe[i];
 			const struct pipe_vertex_buffer *vb =
@@ -492,8 +455,10 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 				A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(ctx->sample_mask));
 	}

-	if ((dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) && !emit->key.binning_pass) {
-		uint32_t val = fd3_zsa_stateobj(ctx->zsa)->rb_render_control;
+	if ((dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG | FD_DIRTY_BLEND_DUAL)) &&
+		!emit->key.binning_pass) {
+		uint32_t val = fd3_zsa_stateobj(ctx->zsa)->rb_render_control |
+			fd3_blend_stateobj(ctx->blend)->rb_render_control;

 		val |= COND(fp->frag_face, A3XX_RB_RENDER_CONTROL_FACENESS);
 		val |= COND(fp->frag_coord, A3XX_RB_RENDER_CONTROL_XCOORD |
@@ -563,10 +528,30 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 		val |= COND(fp->writes_pos, A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE);
 		val |= COND(fp->frag_coord, A3XX_GRAS_CL_CLIP_CNTL_ZCOORD |
 				A3XX_GRAS_CL_CLIP_CNTL_WCOORD);
+		/* TODO only use if prog doesn't use clipvertex/clipdist */
+		val |= A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES(
+				MIN2(util_bitcount(ctx->rasterizer->clip_plane_enable), 6));
 		OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
 		OUT_RING(ring, val);
 	}

+	if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_UCP)) {
+		uint32_t planes = ctx->rasterizer->clip_plane_enable;
+		int count = 0;
+
+		while (planes && count < 6) {
+			int i = ffs(planes) - 1;
+
+			planes &= ~(1U << i);
+			fd_wfi(ctx, ring);
+			OUT_PKT0(ring, REG_A3XX_GRAS_CL_USER_PLANE(count++), 4);
+			OUT_RING(ring, fui(ctx->ucp.ucp[i][0]));
+			OUT_RING(ring, fui(ctx->ucp.ucp[i][1]));
+			OUT_RING(ring, fui(ctx->ucp.ucp[i][2]));
+			OUT_RING(ring, fui(ctx->ucp.ucp[i][3]));
+		}
+	}
+
 	/* NOTE: since primitive_restart is not actually part of any
 	 * state object, we need to make sure that we always emit
 	 * PRIM_VTX_CNTL.. either that or be more clever and detect
@@ -620,9 +605,13 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 		OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(ctx->viewport.scale[2]));
 	}

-	if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER)) {
+	if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER | FD_DIRTY_BLEND_DUAL)) {
 		struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
-		fd3_program_emit(ring, emit, pfb->nr_cbufs, pfb->cbufs);
+		int nr_cbufs = pfb->nr_cbufs;
+		if (fd3_blend_stateobj(ctx->blend)->rb_render_control &
+			A3XX_RB_RENDER_CONTROL_DUAL_COLOR_IN_ENABLE)
+			nr_cbufs++;
+		fd3_program_emit(ring, emit, nr_cbufs, pfb->cbufs);
 	}

 	/* TODO we should not need this or fd_wfi() before emit_constants():
--- a/src/gallium/drivers/freedreno/a3xx/fd3_format.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_format.c
@@ -355,6 +355,8 @@ fd3_fs_output_format(enum pipe_format format)
 	case PIPE_FORMAT_R16G16_FLOAT:
 	case PIPE_FORMAT_R11G11B10_FLOAT:
 		return RB_R16G16B16A16_FLOAT;
+	case PIPE_FORMAT_L8_UNORM:
+		return RB_R8G8B8A8_UNORM;
 	default:
 		return fd3_pipe2color(format);
 	}
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
@@ -194,24 +194,17 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
 	/* seems like vs->constlen + fs->constlen > 256, then CONSTMODE=1 */
 	constmode = ((vp->constlen + fp->constlen) > 256) ? 1 : 0;

-	pos_regid = ir3_find_output_regid(vp,
-		ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
-	posz_regid = ir3_find_output_regid(fp,
-		ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
-	psize_regid = ir3_find_output_regid(vp,
-		ir3_semantic_name(TGSI_SEMANTIC_PSIZE, 0));
+	pos_regid = ir3_find_output_regid(vp, VARYING_SLOT_POS);
+	posz_regid = ir3_find_output_regid(fp, FRAG_RESULT_DEPTH);
+	psize_regid = ir3_find_output_regid(vp, VARYING_SLOT_PSIZ);
 	if (fp->color0_mrt) {
 		color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
-			ir3_find_output_regid(fp, ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0));
+			ir3_find_output_regid(fp, FRAG_RESULT_COLOR);
 	} else {
-		for (i = 0; i < fp->outputs_count; i++) {
-			ir3_semantic sem = fp->outputs[i].semantic;
-			unsigned idx = sem2idx(sem);
-			if (sem2name(sem) != TGSI_SEMANTIC_COLOR)
-				continue;
-			debug_assert(idx < ARRAY_SIZE(color_regid));
-			color_regid[idx] = fp->outputs[i].regid;
-		}
+		color_regid[0] = ir3_find_output_regid(fp, FRAG_RESULT_DATA0);
+		color_regid[1] = ir3_find_output_regid(fp, FRAG_RESULT_DATA1);
+		color_regid[2] = ir3_find_output_regid(fp, FRAG_RESULT_DATA2);
+		color_regid[3] = ir3_find_output_regid(fp, FRAG_RESULT_DATA3);
 	}

 	/* adjust regids for alpha output formats. there is no alpha render
@@ -280,14 +273,14 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,

 		j = ir3_next_varying(fp, j);
 		if (j < fp->inputs_count) {
-			k = ir3_find_output(vp, fp->inputs[j].semantic);
+			k = ir3_find_output(vp, fp->inputs[j].slot);
 			reg |= A3XX_SP_VS_OUT_REG_A_REGID(vp->outputs[k].regid);
 			reg |= A3XX_SP_VS_OUT_REG_A_COMPMASK(fp->inputs[j].compmask);
 		}

 		j = ir3_next_varying(fp, j);
 		if (j < fp->inputs_count) {
-			k = ir3_find_output(vp, fp->inputs[j].semantic);
+			k = ir3_find_output(vp, fp->inputs[j].slot);
 			reg |= A3XX_SP_VS_OUT_REG_B_REGID(vp->outputs[k].regid);
 			reg |= A3XX_SP_VS_OUT_REG_B_COMPMASK(fp->inputs[j].compmask);
 		}
@@ -394,7 +387,6 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,

 		/* figure out VARYING_INTERP / FLAT_SHAD register values: */
 		for (j = -1; (j = ir3_next_varying(fp, j)) < (int)fp->inputs_count; ) {
-			uint32_t interp = fp->inputs[j].interpolate;

 			/* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG
 			 * instead.. rather than -8 everywhere else..
@@ -406,8 +398,8 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
 			 */
 			debug_assert((inloc % 4) == 0);

-			if ((interp == TGSI_INTERPOLATE_CONSTANT) ||
-					((interp == TGSI_INTERPOLATE_COLOR) && emit->rasterflat)) {
+			if ((fp->inputs[j].interpolate == INTERP_QUALIFIER_FLAT) ||
+					(fp->inputs[j].rasterflat && emit->rasterflat)) {
 				uint32_t loc = inloc;
 				for (i = 0; i < 4; i++, loc++) {
 					vinterp[loc / 16] |= FLAT << ((loc % 16) * 2);
@@ -415,14 +407,20 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
 				}
 			}

-			/* Replace the .xy coordinates with S/T from the point sprite. Set
-			 * interpolation bits for .zw such that they become .01
-			 */
-			if (emit->sprite_coord_enable & (1 << sem2idx(fp->inputs[j].semantic))) {
-				vpsrepl[inloc / 16] |= (emit->sprite_coord_mode ? 0x0d : 0x09)
-					<< ((inloc % 16) * 2);
-				vinterp[(inloc + 2) / 16] |= 2 << (((inloc + 2) % 16) * 2);
-				vinterp[(inloc + 3) / 16] |= 3 << (((inloc + 3) % 16) * 2);
+			gl_varying_slot slot = fp->inputs[j].slot;
+
+			/* since we don't enable PIPE_CAP_TGSI_TEXCOORD: */
+			if (slot >= VARYING_SLOT_VAR0) {
+				unsigned texmask = 1 << (slot - VARYING_SLOT_VAR0);
+				/* Replace the .xy coordinates with S/T from the point sprite. Set
+				 * interpolation bits for .zw such that they become .01
+				 */
+				if (emit->sprite_coord_enable & texmask) {
+					vpsrepl[inloc / 16] |= (emit->sprite_coord_mode ? 0x0d : 0x09)
+							<< ((inloc % 16) * 2);
+					vinterp[(inloc + 2) / 16] |= 2 << (((inloc + 2) % 16) * 2);
+					vinterp[(inloc + 3) / 16] |= 3 << (((inloc + 3) % 16) * 2);
+				}
 			}
 		}

--- a/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c
@@ -65,7 +65,8 @@ fd3_rasterizer_state_create(struct pipe_context *pctx,
 	if (cso->multisample)
 		TODO
 */
-	so->gras_cl_clip_cntl = A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER; /* ??? */
+	so->gras_cl_clip_cntl = A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER /* ??? */ |
+		COND(cso->clip_halfz, A3XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z);
 	so->gras_su_point_minmax =
 			A3XX_GRAS_SU_POINT_MINMAX_MIN(psize_min) |
 			A3XX_GRAS_SU_POINT_MINMAX_MAX(psize_max);
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -11,10 +11,10 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno.xml               (    364 bytes, from 2015-05-20 20:03:07)
 - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2015-05-20 20:03:07)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2015-05-20 20:03:14)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2015-05-20 20:03:14)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10755 bytes, from 2015-09-14 20:46:55)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67120 bytes, from 2015-08-14 23:22:03)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63785 bytes, from 2015-08-14 18:27:06)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67771 bytes, from 2015-09-14 20:46:55)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63970 bytes, from 2015-09-14 20:50:12)

 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
@@ -162,10 +162,13 @@ enum a4xx_tex_fmt {
 	TFMT4_8_UNORM = 4,
 	TFMT4_8_8_UNORM = 14,
 	TFMT4_8_8_8_8_UNORM = 28,
+	TFMT4_8_SNORM = 5,
 	TFMT4_8_8_SNORM = 15,
 	TFMT4_8_8_8_8_SNORM = 29,
+	TFMT4_8_UINT = 6,
 	TFMT4_8_8_UINT = 16,
 	TFMT4_8_8_8_8_UINT = 30,
+	TFMT4_8_SINT = 7,
 	TFMT4_8_8_SINT = 17,
 	TFMT4_8_8_8_8_SINT = 31,
 	TFMT4_16_UINT = 21,
@@ -246,7 +249,8 @@ enum a4xx_tex_clamp {
 	A4XX_TEX_REPEAT = 0,
 	A4XX_TEX_CLAMP_TO_EDGE = 1,
 	A4XX_TEX_MIRROR_REPEAT = 2,
-	A4XX_TEX_CLAMP_NONE = 3,
+	A4XX_TEX_CLAMP_TO_BORDER = 3,
+	A4XX_TEX_MIRROR_CLAMP = 4,
 };

 enum a4xx_tex_aniso {
--- a/Show More
+++ b/Show More