docs: Add sha256 sums for the 10.1.6 release files

Just after generating these files and tagging the release.
docs: Add release notes for the 10.1.6 release.
2014-06-24 21:24:53 -07:00 · 2014-06-24 21:17:37 -07:00 · 2014-06-24 21:14:06 -07:00 · 2014-06-24 12:52:21 -07:00 · 2014-06-24 12:48:06 -07:00 · 2014-06-23 16:50:38 -07:00
257 changed files with 14330 additions and 3464 deletions
--- a/2
+++ b/2
@@ -1 +1 @@
-10.1.0-rc2
+10.1.6
--- a/bin/.cherry-ignore
+++ b/bin/.cherry-ignore
@@ -0,0 +1,28 @@
+# This patch does not apply cleanly, author says it can be skipped.
+dff3eccd158d648482bb47118ef5d57a9186e5a4
+
+# And this one depends on the above, author says it too can be skipped.
+ac35ded4733883037316d556af596524e5e02535
+
+# This patch introduces some regressions. See:
+# https://bugs.freedesktop.org/show_bug.cgi?id=77443
+1afe3359258a9e89b62c8638761f52d78f6d1cbc
+
+# Author retracted this from consideration for stable branch
+3e817e7e56806d8adb8f16c35136045c29908944
+
+# And this one was simply a bug fix for the previously-retracted commit
+2bab95973d8ad3a84f62670143d6f26c230d9582
+
+# Here we have a commit, and its subsequent "revert" both proposed within a
+# single window of the stable release. So we can achieve the same final effect
+# by ignoring both of the commits.
+e3cc0d90e14e62a0a787b6c07a6df0f5c84039be
+0d5ec2c615784929be095951f9269773a790a2dd
+
+# The function being modified here (_eglCreateWindowSurfaceCommon) does not
+# exist in the 10.1 branch.
+91ff0d4c6510dc38f279c586ced17fba917873e7
+
+# This patch is not needed (modifies work only in 10.2)
+6980cae6aeb6671b6b0245e20a2d34957c1fff0a
--- a/bin/get-pick-list.sh
+++ b/bin/get-pick-list.sh
@@ -14,7 +14,7 @@ git log --reverse --grep="cherry picked from commit" origin/master..HEAD |\
 	sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked

 # Grep for commits that were marked as a candidate for the stable tree.
-git log --reverse --pretty=%H -i --grep='^\([[:space:]]*NOTE: .*[Cc]andidate\|CC:.*mesa-stable\)' HEAD..origin/master |\
+git log --reverse --pretty=%H -i --grep='^\([[:space:]]*NOTE: .*[Cc]andidate\|CC:.*10\.1.*mesa-stable\)' HEAD..origin/master |\
 while read sha
 do
 	# Check to see whether the patch is on the ignore list.
--- a/configure.ac
+++ b/configure.ac
@@ -542,11 +542,20 @@ AC_ARG_ENABLE([dri],
        [enable DRI modules @<:@default=enabled@:>@])],
    [enable_dri="$enableval"],
    [enable_dri=yes])
+
+case "$host_os" in
+linux*)
+    dri3_default=yes
+    ;;
+*)
+    dri3_default=no
+    ;;
+esac
 AC_ARG_ENABLE([dri3],
    [AS_HELP_STRING([--enable-dri3],
-        [enable DRI3 @<:@default=enabled@:>@])],
+        [enable DRI3 @<:@default=auto@:>@])],
    [enable_dri3="$enableval"],
-    [enable_dri3=yes])
+    [enable_dri3="$dri3_default"])
 AC_ARG_ENABLE([glx],
    [AS_HELP_STRING([--enable-glx],
        [enable GLX library @<:@default=enabled@:>@])],
@@ -771,6 +780,13 @@ if test "x$have_libdrm" = xyes; then
 	DEFINES="$DEFINES -DHAVE_LIBDRM"
 fi

+case "$host_os" in
+linux*)
+    need_libudev=yes ;;
+*)
+    need_libudev=no ;;
+esac
+
 PKG_CHECK_MODULES([LIBUDEV], [libudev >= $LIBUDEV_REQUIRED],
                  have_libudev=yes, have_libudev=no)

@@ -830,9 +846,6 @@ xyesno)
        PKG_CHECK_MODULES([DRI2PROTO], [dri2proto >= $DRI2PROTO_REQUIRED])
        GL_PC_REQ_PRIV="$GL_PC_REQ_PRIV libdrm >= $LIBDRM_REQUIRED"
        if test x"$enable_dri3" = xyes; then
-            if test x"$have_libudev" != xyes; then
-              AC_MSG_ERROR([DRI3 requires libudev >= $LIBUDEV_REQUIRED])
-            fi
            PKG_CHECK_MODULES([DRI3PROTO], [dri3proto >= $DRI3PROTO_REQUIRED])
            PKG_CHECK_MODULES([PRESENTPROTO], [presentproto >= $PRESENTPROTO_REQUIRED])
        fi
@@ -1017,7 +1030,7 @@ if test "x$enable_dri" = xyes; then
    gnu*)
        DEFINES="$DEFINES -DUSE_EXTERNAL_DXTN_LIB=1"
        DEFINES="$DEFINES -DHAVE_ALIAS"
-	;;
+        ;;
    solaris*)
        DEFINES="$DEFINES -DUSE_EXTERNAL_DXTN_LIB=1"
        ;;
@@ -1037,7 +1050,7 @@ if test "x$enable_dri" = xyes; then
    DRI_DIRS=`echo "$DRI_DIRS" | $SED 's/  */ /g'`

    # Check for expat
-    PKG_CHECK_EXISTS([EXPAT], [have_expat=yes], [have_expat=no])
+    PKG_CHECK_EXISTS([expat], [have_expat=yes], [have_expat=no])
    if test "x$have_expat" = "xyes"; then
       PKG_CHECK_MODULES([EXPAT], [expat], [],
                         AC_MSG_ERROR([Expat required for DRI.]))
@@ -1178,8 +1191,8 @@ if test "x$enable_gbm" = xauto; then
    esac
 fi
 if test "x$enable_gbm" = xyes; then
-    if test x"$have_libudev" != xyes; then
-        AC_MSG_ERROR([gbm needs udev])
+    if test "x$need_libudev$have_libudev" = xyesno; then
+        AC_MSG_ERROR([gbm requires udev >= $LIBUDEV_REQUIRED])
    fi

    if test "x$enable_dri" = xyes; then
@@ -1187,10 +1200,21 @@ if test "x$enable_gbm" = xyes; then
        if test "x$enable_shared_glapi" = xno; then
            AC_MSG_ERROR([gbm_dri requires --enable-shared-glapi])
        fi
+    else
+        # Strictly speaking libgbm does not require --enable-dri, although
+        # both of its backends do. Thus one can build libgbm without any
+        # backends if --disable-dri is set.
+        # To avoid unnecessary complexity of checking if at least one backend
+        # is available when building, just mandate --enable-dri.
+        AC_MSG_ERROR([gbm requires --enable-dri])
    fi
 fi
 AM_CONDITIONAL(HAVE_GBM, test "x$enable_gbm" = xyes)
-GBM_PC_REQ_PRIV="libudev"
+if test "x$need_libudev" = xyes; then
+    GBM_PC_REQ_PRIV="libudev >= $LIBUDEV_REQUIRED"
+else
+    GBM_PC_REQ_PRIV=""
+fi
 GBM_PC_LIB_PRIV="$DLOPEN_LIBS"
 AC_SUBST([GBM_PC_REQ_PRIV])
 AC_SUBST([GBM_PC_LIB_PRIV])
@@ -1461,9 +1485,9 @@ for plat in $egl_platforms; do
 		;;
 	esac

-        case "$plat$have_libudev" in
-                waylandno|drmno)
-                    AC_MSG_ERROR([cannot build $plat platfrom without udev]) ;;
+        case "$plat$need_libudev$have_libudev" in
+                waylandyesno|drmyesno)
+                    AC_MSG_ERROR([cannot build $plat platform without udev >= $LIBUDEV_REQUIRED]) ;;
        esac
 done

@@ -1529,11 +1553,11 @@ AC_ARG_ENABLE([gallium-llvm],
    [enable_gallium_llvm="$enableval"],
    [enable_gallium_llvm=auto])

-AC_ARG_WITH([llvm-shared-libs],
-    [AS_HELP_STRING([--with-llvm-shared-libs],
-        [link with LLVM shared libraries @<:@default=disabled@:>@])],
+AC_ARG_ENABLE([llvm-shared-libs],
+    [AS_HELP_STRING([--enable-llvm-shared-libs],
+        [link with LLVM shared libraries @<:@default=enabled@:>@])],
    [],
-    [with_llvm_shared_libs=no])
+    [with_llvm_shared_libs=yes])

 AC_ARG_WITH([llvm-prefix],
    [AS_HELP_STRING([--with-llvm-prefix],
@@ -1588,6 +1612,12 @@ if test "x$enable_gallium_llvm" = xyes; then
        AC_COMPUTE_INT([LLVM_VERSION_MINOR], [LLVM_VERSION_MINOR],
            [#include "${LLVM_INCLUDEDIR}/llvm/Config/llvm-config.h"])

+        dnl In LLVM 3.4.1 patch level was defined in config.h and not
+        dnl llvm-config.h
+        AC_COMPUTE_INT([LLVM_VERSION_PATCH], [LLVM_VERSION_PATCH],
+            [#include "${LLVM_INCLUDEDIR}/llvm/Config/config.h"],
+            LLVM_VERSION_PATCH=0) dnl Default if LLVM_VERSION_PATCH not found
+
        if test "x${LLVM_VERSION_MAJOR}" != x; then
            LLVM_VERSION_INT="${LLVM_VERSION_MAJOR}0${LLVM_VERSION_MINOR}"
        else
@@ -1610,7 +1640,7 @@ if test "x$enable_gallium_llvm" = xyes; then
                LLVM_COMPONENTS="${LLVM_COMPONENTS} option"
            fi
        fi
-        DEFINES="${DEFINES} -DHAVE_LLVM=0x0$LLVM_VERSION_INT"
+        DEFINES="${DEFINES} -DHAVE_LLVM=0x0$LLVM_VERSION_INT -DLLVM_VERSION_PATCH=$LLVM_VERSION_PATCH"
        MESA_LLVM=1

        dnl Check for Clang internal headers
@@ -1629,6 +1659,10 @@ if test "x$enable_gallium_llvm" = xyes; then
 else
    MESA_LLVM=0
    LLVM_VERSION_INT=0
+
+    if test "x$enable_opencl" = xyes; then
+        AC_MSG_ERROR([cannot enable OpenCL without LLVM])
+    fi
 fi

 dnl Directory for XVMC libs
@@ -1702,8 +1736,9 @@ gallium_require_llvm() {

 gallium_require_drm_loader() {
    if test "x$enable_gallium_loader" = xyes; then
-        PKG_CHECK_MODULES([LIBUDEV], [libudev], [],
-                          AC_MSG_ERROR([Gallium drm loader requires libudev]))
+        if test "x$need_libudev$have_libudev" = xyesno; then
+            AC_MSG_ERROR([Gallium drm loader requires libudev >= $LIBUDEV_REQUIRED])
+        fi
        if test "x$have_libdrm" != xyes; then
            AC_MSG_ERROR([Gallium drm loader requires libdrm >= $LIBDRM_REQUIRED])
        fi
--- a/docs/relnotes/10.1.1.html
+++ b/docs/relnotes/10.1.1.html
@@ -0,0 +1,254 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.1.1 Release Notes / (April 18, 2014)</h1>
+
+<p>
+Mesa 10.1.1 is a bug fix release which fixes bugs found since the 10.1 release.
+</p>
+<p>
+Mesa 10.1.1 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>MD5 checksums</h2>
+<pre>
+96e63674ccfa98e7ec6eb4fee3f770c3  MesaLib-10.1.1.tar.gz
+1fde7ed079df7aeb9b6a744ca033de8d  MesaLib-10.1.1.tar.bz2
+e64d0a562638664b13d2edf22321df59  MesaLib-10.1.1.zip
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=71547">Bug 71547</a> - compilation failure :#error &quot;SSE4.1 instruction set not enabled&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=74868">Bug 74868</a> - r600g: Diablo III Crashes After a few minutes</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=74988">Bug 74988</a> - Buffer overrun (segfault) decompressing ETC2 texture in GLBenchmark 3.0 Manhattan</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=75279">Bug 75279</a> - XCloseDisplay() takes one minute around nouveau_dri.so, freezing Firefox startup</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=75543">Bug 75543</a> - OSMesa Gallium OSMesaMakeCurrent</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=75660">Bug 75660</a> - u_inlines.h:277:pipe_buffer_map_range: Assertion `length' failed.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=76323">Bug 76323</a> - GLSL compiler ignores layout(binding=N) on uniform blocks</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=76377">Bug 76377</a> - DRI3 should only be enabled on Linux due to a udev dependency</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=76749">Bug 76749</a> - [HSW] DOTA world lighting has no effect</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=77102">Bug 77102</a> - gallium nouveau has no profile in vdpau and libva</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=77207">Bug 77207</a> - [ivb/hsw] batch overwritten with garbage</li>
+
+</ul>
+
+<h2>Changes</h2>
+
+<p>Aaron Watry (1):</p>
+<ul>
+  <li>gallium/util: Fix memory leak</li>
+</ul>
+
+<p>Alexander von Gluck IV (1):</p>
+<ul>
+  <li>haiku: Fix build through scons corrections and viewport fixes</li>
+</ul>
+
+<p>Anuj Phogat (2):</p>
+<ul>
+  <li>mesa: Set initial internal format of a texture to GL_RGBA</li>
+  <li>mesa: Allow GL_DEPTH_COMPONENT and GL_DEPTH_STENCIL combinations in glTexImage{123}D()</li>
+</ul>
+
+<p>Brian Paul (12):</p>
+<ul>
+  <li>softpipe: use 64-bit arithmetic in softpipe_resource_layout()</li>
+  <li>mesa: don't call ctx-&gt;Driver.ClearBufferSubData() if size==0</li>
+  <li>st/osmesa: check buffer size when searching for buffers</li>
+  <li>mesa: fix copy &amp; paste bugs in pack_ubyte_SARGB8()</li>
+  <li>mesa: fix copy &amp; paste bugs in pack_ubyte_SRGB8()</li>
+  <li>c11/threads: don't include assert.h if the assert macro is already defined</li>
+  <li>mesa: fix unpack_Z32_FLOAT_X24S8() / unpack_Z32_FLOAT() mix-up</li>
+  <li>st/mesa: add null pointer checking in query object functions</li>
+  <li>mesa: fix glMultiDrawArrays inside a display list</li>
+  <li>cso: fix sampler view count in cso_set_sampler_views()</li>
+  <li>svga: replace sampler assertion with conditional</li>
+  <li>svga: move LIST_INITHEAD(dirty_buffers) earlier in svga_context_create()</li>
+</ul>
+
+<p>Carl Worth (3):</p>
+<ul>
+  <li>cherry-ignore: Ignore a few patches</li>
+  <li>glsl: Allow explicit binding on atomics again</li>
+  <li>Update VERSION to 10.1.1</li>
+</ul>
+
+<p>Chia-I Wu (1):</p>
+<ul>
+  <li>i965/vec4: fix record clearing in copy propagation</li>
+</ul>
+
+<p>Christian König (2):</p>
+<ul>
+  <li>st/mesa: recreate sampler view on context change v3</li>
+  <li>st/mesa: fix sampler view handling with shared textures v4</li>
+</ul>
+
+<p>Courtney Goeltzenleuchter (1):</p>
+<ul>
+  <li>mesa: add bounds checking to eliminate buffer overrun</li>
+</ul>
+
+<p>Emil Velikov (5):</p>
+<ul>
+  <li>nv50: add missing brackets when handling the samplers array</li>
+  <li>mesa: return v.value_int64 when the requested type is TYPE_INT64</li>
+  <li>configure: enable dri3 only for linux</li>
+  <li>glx: drop obsolete _XUnlock_Mutex in __glXInitialize error path</li>
+  <li>configure: cleanup libudev handling</li>
+</ul>
+
+<p>Eric Anholt (1):</p>
+<ul>
+  <li>i965: Fix buffer overruns in MSAA MCS buffer clearing.</li>
+</ul>
+
+<p>Hans (2):</p>
+<ul>
+  <li>util: don't define isfinite(), isnan() for MSVC &gt;= 1800</li>
+  <li>mesa: don't define c99 math functions for MSVC &gt;= 1800</li>
+</ul>
+
+<p>Ian Romanick (7):</p>
+<ul>
+  <li>linker: Split set_uniform_binding into separate functions for blocks and samplers</li>
+  <li>linker: Various trivial clean-ups in set_sampler_binding</li>
+  <li>linker: Fold set_uniform_binding into call site</li>
+  <li>linker: Clean up "unused parameter" warnings</li>
+  <li>linker: Set block bindings based on UniformBlocks rather than UniformStorage</li>
+  <li>linker: Set binding for all elements of UBO array</li>
+  <li>glsl: Propagate explicit binding information from the AST all the way to the linker</li>
+</ul>
+
+<p>Ilia Mirkin (8):</p>
+<ul>
+  <li>nouveau: fix fence waiting logic in screen destroy</li>
+  <li>nv50: adjust blit_3d handling of ms output textures</li>
+  <li>loader: add special logic to distinguish nouveau from nouveau_vieux</li>
+  <li>mesa/main: condition GL_DEPTH_STENCIL on ARB_depth_texture</li>
+  <li>nouveau: add forgotten GL_COMPRESSED_INTENSITY to texture format list</li>
+  <li>nouveau: there may not have been a texture if the fbo was incomplete</li>
+  <li>nvc0/ir: move sample id to second source arg to fix sampler2DMS</li>
+  <li>nouveau: fix firmware check on nvd7/nvd9</li>
+</ul>
+
+<p>Johannes Nixdorf (1):</p>
+<ul>
+  <li>configure.ac: fix the detection of expat with pkg-config</li>
+</ul>
+
+<p>Jonathan Gray (7):</p>
+<ul>
+  <li>gallium: add endian detection for OpenBSD</li>
+  <li>loader: use 0 instead of FALSE which isn't defined</li>
+  <li>loader: don't limit the non-udev path to only android</li>
+  <li>megadriver_stub.c: don't use _GNU_SOURCE to gate the compat code</li>
+  <li>egl/dri2: don't require libudev to build drm/wayland platforms</li>
+  <li>egl/dri2: use drm macros to construct device name</li>
+  <li>configure: don't require libudev for gbm or egl drm/wayland</li>
+</ul>
+
+<p>José Fonseca (4):</p>
+<ul>
+  <li>c11/threads: Fix nano to milisecond conversion.</li>
+  <li>mapi/u_thread: Use GetCurrentThreadId</li>
+  <li>c11/threads: Don't implement thrd_current on Windows.</li>
+  <li>draw: Duplicate TGSI tokens in draw_pipe_pstipple module.</li>
+</ul>
+
+<p>Kenneth Graunke (4):</p>
+<ul>
+  <li>i965/fs: Fix register comparisons in saturate propagation.</li>
+  <li>glsl: Fix lack of i2u in lower_ubo_reference.</li>
+  <li>i965: Stop advertising GL_MESA_ycbcr_texture.</li>
+  <li>glsl: Try vectorizing when seeing a repeated assignment to a channel.</li>
+</ul>
+
+<p>Marek Olšák (13):</p>
+<ul>
+  <li>r600g: fix texelFetchOffset GLSL functions</li>
+  <li>r600g: fix blitting the last 2 mipmap levels for Evergreen</li>
+  <li>mesa: fix the format of glEdgeFlagPointer</li>
+  <li>r600g,radeonsi: fix MAX_TEXTURE_3D_LEVELS and MAX_TEXTURE_ARRAY_LAYERS limits</li>
+  <li>st/mesa: fix per-vertex edge flags and GLSL support (v2)</li>
+  <li>mesa: mark GL_RGB9_E5 as not color-renderable</li>
+  <li>mesa: fix texture border handling for cube arrays</li>
+  <li>mesa: allow generating mipmaps for cube arrays</li>
+  <li>mesa: fix software fallback for generating mipmaps for cube arrays</li>
+  <li>mesa: fix software fallback for generating mipmaps for 3D textures</li>
+  <li>st/mesa: fix generating mipmaps for cube arrays</li>
+  <li>st/mesa: drop the lowering of quad strips to triangle strips</li>
+  <li>r600g: implement edge flags</li>
+</ul>
+
+<p>Matt Turner (4):</p>
+<ul>
+  <li>mesa: Wrap SSE4.1 code in #ifdef __SSE4_1__.</li>
+  <li>i965/fs: Fix off-by-one in saturate propagation.</li>
+  <li>i965/fs: Don't propagate saturate modifiers into partial writes.</li>
+  <li>i965/fs: Don't propagate saturation modifiers if there are source modifiers.</li>
+</ul>
+
+<p>Michel Dänzer (1):</p>
+<ul>
+  <li>r600g: Don't leak bytecode on shader compile failure</li>
+</ul>
+
+<p>Mike Stroyan (1):</p>
+<ul>
+  <li>i965: Avoid dependency hints on math opcodes</li>
+</ul>
+
+<p>Thomas Hellstrom (5):</p>
+<ul>
+  <li>winsys/svga: Replace the query mm buffer pool with a slab pool v3</li>
+  <li>winsys/svga: Update the vmwgfx_drm.h header to latest version from kernel</li>
+  <li>winsys/svga: Fix prime surface references also for guest-backed surfaces</li>
+  <li>st/xa: Bind destination before setting new state</li>
+  <li>st/xa: Make sure unused samplers are set to NULL</li>
+</ul>
+
+<p>Tom Stellard (1):</p>
+<ul>
+  <li>configure: Use LLVM shared libraries by default</li>
+</ul>
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/10.1.2.html
+++ b/docs/relnotes/10.1.2.html
@@ -0,0 +1,179 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.1.2 Release Notes / (May 5, 2014)</h1>
+
+<p>
+Mesa 10.1.2 is a bug fix release which fixes bugs found since the 10.1.1 release.
+</p>
+<p>
+Mesa 10.1.2 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>MD5 checksums</h2>
+<pre>
+37d79f94b1f41852a89d1fc3900bea76  MesaLib-10.1.2.tar.gz
+28b60d15ac9f364da1e0155911eaf44e  MesaLib-10.1.2.tar.bz2
+05300039085a65fc53c5472c4bb5747a  MesaLib-10.1.2.zip
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=27499">Bug 27499</a> - [855GM i915] GL_LINE_STIPPLE displays incorrect colors</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=75723">Bug 75723</a> - (regression since Linux 3.14?) brw_get_graphics_reset_status: Assertion `brw-&gt;hw_ctx != ((void *)0)' failed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=76894">Bug 76894</a> - Piglit/spec/EXT_framebuffer_object/fbo-bind-renderbuffer failed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=77702">Bug 77702</a> - [i965 Bisected]Piglit spec/NV_conditional_render_blitframebuffer fails</li>
+
+</ul>
+
+<h2>Changes</h2>
+
+<p>Ander Conselvan de Oliveira (2):</p>
+<ul>
+  <li>gbm/dri: Fix out-of-memory error path in dri_device_create()</li>
+  <li>egl: Protect use of gbm_dri with ifdef HAVE_DRM_PLATFORM</li>
+</ul>
+
+<p>Anuj Phogat (27):</p>
+<ul>
+  <li>mesa: Fix glGetVertexAttribi(GL_VERTEX_ATTRIB_ARRAY_SIZE)</li>
+  <li>swrast: Add glBlitFramebuffer to commands affected by conditional rendering</li>
+  <li>mesa: Fix error condition for multisample proxy texture targets</li>
+  <li>i965: Put an assertion to check valid varying_to_slot[varying]</li>
+  <li>i965: Fix component mask and varying_to_slot mapping for gl_Layer</li>
+  <li>i965: Fix component mask and varying_to_slot mapping for gl_ViewportIndex</li>
+  <li>mesa: Add helper function _mesa_is_format_integer()</li>
+  <li>mesa: Add error condition for integer formats in glGetTexImage()</li>
+  <li>mesa: Add an error condition in glGetFramebufferAttachmentParameteriv()</li>
+  <li>mesa: Fix error code generation in glReadPixels()</li>
+  <li>glsl: Allow overlapping locations for vertex input attributes</li>
+  <li>mesa: Fix querying location of nth element of an array variable</li>
+  <li>mesa: Use location VERT_ATTRIB_GENERIC0 for vertex attribute 0</li>
+  <li>glsl: Compile error if fs defines conflicting qualifiers for gl_FragCoord</li>
+  <li>glsl: Compile error if fs uses gl_FragCoord before first redeclaration</li>
+  <li>mesa: Add entry for extension ARB_texture_stencil8</li>
+  <li>mesa: Add error condition for format=STENCIL_INDEX in glGetTexImage()</li>
+  <li>i965: Fix crash in do_blit_readpixels()</li>
+  <li>mesa: Add missing types in _mesa_texstore_xx_xx() functions</li>
+  <li>mesa: Allow srcFormat=GL_DEPTH_STENCIL in _mesa_texstore_xx_xx() functions</li>
+  <li>mesa: Add new helper function _mesa_unpack_depth_stencil_row()</li>
+  <li>mesa: Add support to unpack depth-stencil texture in to FLOAT_32_UNSIGNED_INT_24_8_REV</li>
+  <li>mesa: Allow FLOAT_32_UNSIGNED_INT_24_8_REV in get_tex_depth_stencil()</li>
+  <li>i965: Add glBlitFramebuffer to commands affected by conditional rendering</li>
+  <li>glsl: Use switch to allow adding more shader types</li>
+  <li>glsl: Link error if fs defines conflicting qualifiers for gl_FragCoord</li>
+  <li>glsl: Apply the link error conditions to GL_ARB_fragment_coord_conventions</li>
+</ul>
+
+<p>Benjamin Bellec (1):</p>
+<ul>
+  <li>mesa: fix GetStringi error message with correct function name</li>
+</ul>
+
+<p>Brian Paul (1):</p>
+<ul>
+  <li>swrast: allocate swrast_texture_image::ImageSlices array if needed</li>
+</ul>
+
+<p>Carl Worth (4):</p>
+<ul>
+  <li>docs: Add the MD5 sums for the 10.1.1 release tar files.</li>
+  <li>cherry-ignore: Ignore a patch causing a regression</li>
+  <li>cherry-ignore: Drop an ignored patch now that piglit has been updated.</li>
+  <li>Update VERSION to 10.1.2</li>
+</ul>
+
+<p>Chris Forbes (1):</p>
+<ul>
+  <li>glsl: Only allow `invariant` on shader in/out between stages.</li>
+</ul>
+
+<p>Eric Anholt (1):</p>
+<ul>
+  <li>i965: Fix render-to-texture in non-FinishRenderTexture cases.</li>
+</ul>
+
+<p>Ian Romanick (1):</p>
+<ul>
+  <li>dri3: Enable GLX_MESA_query_renderer on DRI3 too</li>
+</ul>
+
+<p>Kenneth Graunke (2):</p>
+<ul>
+  <li>i965: Don't enable reset notification support on Gen4-5.</li>
+  <li>i965: Actually emit PIPELINE_SELECT and 3DSTATE_VF_STATISTICS.</li>
+</ul>
+
+<p>Marek Olšák (10):</p>
+<ul>
+  <li>r300g: don't crash when getting NULL colorbuffers</li>
+  <li>st/mesa: remove trailing NULL colorbuffers</li>
+  <li>r600g: fix edge flags and layered rendering on R600-R700</li>
+  <li>r600g: disable async DMA on R700</li>
+  <li>r600g: fix MSAA resolve on R6xx when the destination is 1D-tiled</li>
+  <li>r600g: fix flushing on RV670, RS780, RS880 again</li>
+  <li>r600g: fix buffer copying on R600-R700</li>
+  <li>r600g: fix for broken CULL_FRONT behavior on R6xx</li>
+  <li>r600g: fix for an MSAA hang on RV770</li>
+  <li>r600g: fix hang on RV740 by using DX_RASTERIZATION_KILL instead of SX_MISC</li>
+</ul>
+
+<p>Michel Dänzer (2):</p>
+<ul>
+  <li>r600g: Disable LLVM by default at runtime for graphics</li>
+  <li>st/mesa: Fix NULL pointer dereference for incomplete framebuffers</li>
+</ul>
+
+<p>Neil Roberts (1):</p>
+<ul>
+  <li>wayland: Fix the logic in disabling the prime capability</li>
+</ul>
+
+<p>Samuel Iglesias Gonsalvez (1):</p>
+<ul>
+  <li>mesa: fix check for dummy renderbuffer in _mesa_FramebufferRenderbufferEXT()</li>
+</ul>
+
+<p>Thomas Hellstrom (1):</p>
+<ul>
+  <li>st/xa: Cache render target surface</li>
+</ul>
+
+<p>nick (1):</p>
+<ul>
+  <li>swrast: Fix vertex color in _swsetup_Translate()</li>
+</ul>
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/10.1.3.html
+++ b/docs/relnotes/10.1.3.html
@@ -0,0 +1,90 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.1.3 Release Notes / (May 9, 2014)</h1>
+
+<p>
+Mesa 10.1.3 is a bug fix release which fixes bugs found since the 10.1.2 release.
+</p>
+
+<p>
+Note: Mesa 10.1.3 is being released sooner than originally scheduled to make
+available a fix for a performance rgression that was inadvertently introduced
+to Mesa 10.1.2. The performance regression is reported to make vmware
+swapbuffers fall back to software. 
+</p>
+
+<p>
+Mesa 10.1.3 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>MD5 checksums</h2>
+<pre>
+665fe1656aaa2c37b32042068aff92cb  MesaLib-10.1.3.tar.gz
+ba6dbe2b9cab0b4de840c996b9b6a3ad  MesaLib-10.1.3.tar.bz2
+4e6f26330a63d3c47e62ac4bdead39e8  MesaLib-10.1.3.zip
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=77245">Bug 77245</a> - Bogus GL_ARB_explicit_attrib_location layout identifier warnings</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Carl Worth (3):</p>
+<ul>
+  <li>docs: Add MD5 sums for Mesa 10.1.2</li>
+  <li>get-pick-list.sh: Require explicit "10.1" for nominating stable patches</li>
+  <li>VERSION: Update to 10.1.3</li>
+</ul>
+
+<p>Kenneth Graunke (2):</p>
+<ul>
+  <li>mesa: Fix MaxNumLayers for 1D array textures.</li>
+  <li>i965: Fix depth (array slices) computation for 1D_ARRAY render targets.</li>
+</ul>
+
+<p>Tapani Pälli (1):</p>
+<ul>
+  <li>glsl: fix bogus layout qualifier warnings</li>
+</ul>
+
+<p>Thomas Hellstrom (1):</p>
+<ul>
+  <li>st/xa: Fix performance regression introduced by commit "Cache render target surface"</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/10.1.4.html
+++ b/docs/relnotes/10.1.4.html
@@ -0,0 +1,100 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.1.4 Release Notes / (May 20, 2014)</h1>
+
+<p>
+Mesa 10.1.4 is a bug fix release which fixes bugs found since the 10.1.3 release.
+</p>
+
+<p>
+Mesa 10.1.4 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>MD5 checksums</h2>
+<pre>
+e934365d77f384bfaec844999440bef8  MesaLib-10.1.4.tar.gz
+6fddee101f49b7409cd29994c34ddee7  MesaLib-10.1.4.tar.bz2
+ba5f48e7d5e373922c804c2651fec6c1  MesaLib-10.1.4.zip
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=78225">Bug 78225</a> - Compile error due to undefined reference to `gbm_dri_backend', fix attached</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=78537">Bug 78537</a> - no anisotropic filtering in a native Half-Life 2</li>
+
+</ul>
+
+<h2>Changes</h2>
+
+<p>Brian Paul (1):</p>
+<ul>
+  <li>mesa: fix double-freeing of dispatch tables inside glBegin/End.</li>
+</ul>
+
+<p>Carl Worth (3):</p>
+<ul>
+  <li>docs: Add MD5 sums for 10.1.3</li>
+  <li>cherry-ignore: Roland and Michel agreed to drop these patches.</li>
+  <li>VERSION: Update to 10.1.4</li>
+</ul>
+
+<p>Emil Velikov (1):</p>
+<ul>
+  <li>configure: error out if building GBM without dri</li>
+</ul>
+
+<p>Eric Anholt (1):</p>
+<ul>
+  <li>i965/vs: Use samplers for UBOs in the VS like we do for non-UBO pulls.</li>
+</ul>
+
+<p>Ilia Mirkin (3):</p>
+<ul>
+  <li>nv50/ir: make sure to reverse cond codes on all the OP_SET variants</li>
+  <li>nv50: fix setting of texture ms info to be per-stage</li>
+  <li>nv50/ir: fix integer mul lowering for u32 x u32 -&gt; high u32</li>
+</ul>
+
+<p>Michel Dänzer (1):</p>
+<ul>
+  <li>radeonsi: Fix anisotropic filtering state setup</li>
+</ul>
+
+<p>Tom Stellard (2):</p>
+<ul>
+  <li>configure.ac: Add LLVM_VERSION_PATCH to DEFINES</li>
+  <li>radeonsi: Enable geometry shaders with LLVM 3.4.1</li>
+</ul>
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/10.1.5.html
+++ b/docs/relnotes/10.1.5.html
@@ -0,0 +1,105 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.1.5 Release Notes / (June 6, 2014)</h1>
+
+<p>
+Mesa 10.1.5 is a bug fix release which fixes bugs found since the 10.1.4 release.
+</p>
+
+<p>
+Mesa 10.1.5 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+<h2>SHA256 checksums</h2>
+<pre>
+b0aceaa75bc9a9b2d9215a113e2ad488b5cf85c99005a7624f8cf7c37c5d0eaa  MesaLib-10.1.5.tar.gz
+bc6c5ec7836f254a49d055a29d9aa34c97c54c038f47ad3a00fa57a5fef15bbc  MesaLib-10.1.5.tar.bz2
+78b7255cab0af7918945452a84de7989096ebcdd27e99b31c56c0589274cbc77  MesaLib-10.1.5.zip
+</pre>
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=79115">Bug 79115</a> - </li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=79421">Bug 79421</a> - </li>
+
+</ul>
+
+<h2>Changes</h2>
+
+<p>Brian Paul (1):</p>
+<ul>
+  <li>glsl: fix use-after free bug/crash in ast_declarator_list::hir()</li>
+</ul>
+
+<p>Carl Worth (5):</p>
+<ul>
+  <li>docs: Add md5sums for 10.1.4 release</li>
+  <li>Merge remote-tracking branch 'origin/10.1' into 10.1</li>
+  <li>cherry-ignore: Ignore two commits.</li>
+  <li>Ignore a patch that is not needed for the 10.1 branch.</li>
+  <li>Update version to 10.1.5</li>
+</ul>
+
+<p>Emil Velikov (1):</p>
+<ul>
+  <li>glx: do not leak dri3Display</li>
+</ul>
+
+<p>Ilia Mirkin (2):</p>
+<ul>
+  <li>nv50/ir: fix s32 x s32 -&gt; high s32 multiply logic</li>
+  <li>nv50/ir: fix constant folding for OP_MUL subop HIGH</li>
+</ul>
+
+<p>James Legg (1):</p>
+<ul>
+  <li>mesa: Fix unbinding GL_DEPTH_STENCIL_ATTACHMENT</li>
+</ul>
+
+<p>Jeremy Huddleston Sequoia (2):</p>
+<ul>
+  <li>glapi: Avoid heap corruption in _glapi_table</li>
+  <li>darwin: Fix test for kCGLPFAOpenGLProfile support at runtime</li>
+</ul>
+
+<p>Pavel Popov (2):</p>
+<ul>
+  <li>i965: Properly return *RESET* status in glGetGraphicsResetStatusARB</li>
+  <li>i965: Fix Line Stipple enable bit in 3DSTATE_SF for Haswell.</li>
+</ul>
+
+<p>Roland Scheidegger (1):</p>
+<ul>
+  <li>llvmpipe: fix crash when not all attachments are populated in a fb</li>
+</ul>
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/10.1.6.html
+++ b/docs/relnotes/10.1.6.html
@@ -0,0 +1,138 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.1.6 Release Notes / (June 24, 2014)</h1>
+
+<p>
+Mesa 10.1.6 is a bug fix release which fixes bugs found since the 10.1.5 release.
+</p>
+
+<p>
+Mesa 10.1.6 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+<h2>SHA256 checksums</h2>
+<pre>
+cde60e06b340d7598802fe4a4484b3fb8befd714f9ab9caabe1f27d3149e8815  MesaLib-10.1.6.tar.bz2
+e4e726d7805a442f7ed07d12f71335e6126796ec85328a5989eb5348a8042d00  MesaLib-10.1.6.tar.gz
+bf7e3f721a7ad0c2057a034834b6fea688e64f26a66cf8d1caa2827e405e72dd  MesaLib-10.1.6.zip
+</pre>
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=54372">Bug 54372</a> - GLX_INTEL_swap_event crashes driver when swapping window buffers</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=74005">Bug 74005</a> - [i965 Bisected]Piglit/glx_glx-make-glxdrawable-current fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=78581">Bug 78581</a> - </li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=79729">Bug 79729</a> - [i965] glClear on a multisample texture doesn't work</li>
+
+</ul>
+
+<h2>Changes</h2>
+
+<p>Adrian Negreanu (7):</p>
+<ul>
+  <li>add megadriver_stub_FILES</li>
+  <li>android: adapt to the megadriver mechanism</li>
+  <li>android: add libloader to libGLES_mesa and libmesa_egl_dri2</li>
+  <li>android: add src/gallium/auxiliary as include path for libmesa_dricore</li>
+  <li>android, egl: add correct drm include for libmesa_egl_dri2</li>
+  <li>android, mesa_gen_matypes: pull in timespec POSIX definition</li>
+  <li>android, dricore: undefined reference to _mesa_streaming_load_memcpy</li>
+</ul>
+
+<p>Beren Minor (1):</p>
+<ul>
+  <li>egl/main: Fix eglMakeCurrent when releasing context from current thread.</li>
+</ul>
+
+<p>Carl Worth (3):</p>
+<ul>
+  <li>docs: Add SHA256 checksums for the 10.1.5 release</li>
+  <li>cherry-ignore: Add a patch to ignore</li>
+  <li>Update VERSION to 10.1.6</li>
+</ul>
+
+<p>Daniel Manjarres (1):</p>
+<ul>
+  <li>glx: Don't crash on swap event for a Window (non-GLXWindow)</li>
+</ul>
+
+<p>Emil Velikov (1):</p>
+<ul>
+  <li>configure: error out when building opencl without LLVM</li>
+</ul>
+
+<p>Iago Toral Quiroga (1):</p>
+<ul>
+  <li>mesa: Copy Geom.UsesEndPrimitive when cloning a geometry program.</li>
+</ul>
+
+<p>José Fonseca (3):</p>
+<ul>
+  <li>mesa/main: Make get_hash.c values constant.</li>
+  <li>mesa: Make glGetIntegerv(GL_*_ARRAY_SIZE) return GL_BGRA.</li>
+  <li>mesa/main: Prevent sefgault on glGetIntegerv(GL_ATOMIC_COUNTER_BUFFER_BINDING).</li>
+</ul>
+
+<p>Kristian Høgsberg (1):</p>
+<ul>
+  <li>mesa: Remove glClear optimization based on drawable size</li>
+</ul>
+
+<p>Michel Dänzer (1):</p>
+<ul>
+  <li>configure: Only check for OpenCL without LLVM when the latter is certain</li>
+</ul>
+
+<p>Neil Roberts (1):</p>
+<ul>
+  <li>i965: Set the fast clear color value for texture surfaces</li>
+</ul>
+
+<p>Roland Scheidegger (1):</p>
+<ul>
+  <li>draw: (trivial) fix clamping of viewport index</li>
+</ul>
+
+<p>Tobias Klausmann (1):</p>
+<ul>
+  <li>nv50/ir: clear subop when folding constant expressions</li>
+</ul>
+
+<p>Tom Stellard (2):</p>
+<ul>
+  <li>clover: Prevent Clang from printing number of errors and warnings to stderr.</li>
+  <li>clover: Don't use llvm's global context</li>
+</ul>
+
+</div>
+</body>
+</html>
--- a/include/c11/threads_posix.h
+++ b/include/c11/threads_posix.h
@@ -27,7 +27,9 @@
 * DEALINGS IN THE SOFTWARE.
 */
 #include <stdlib.h>
+#ifndef assert
 #include <assert.h>
+#endif
 #include <limits.h>
 #include <errno.h>
 #include <unistd.h>
--- a/include/c11/threads_win32.h
+++ b/include/c11/threads_win32.h
@@ -26,7 +26,9 @@
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */
+#ifndef assert
 #include <assert.h>
+#endif
 #include <limits.h>
 #include <errno.h>
 #include <process.h>  // MSVCRT
@@ -146,7 +148,7 @@ static unsigned __stdcall impl_thrd_routine(void *p)

 static DWORD impl_xtime2msec(const xtime *xt)
 {
-    return (DWORD)((xt->sec * 1000u) + (xt->nsec / 1000));
+    return (DWORD)((xt->sec * 1000U) + (xt->nsec / 1000000L));
 }

 #ifdef EMULATED_THREADS_USE_NATIVE_CALL_ONCE
@@ -492,12 +494,42 @@ thrd_create(thrd_t *thr, thrd_start_t func, void *arg)
    return thrd_success;
 }

+#if 0
 // 7.25.5.2
 static inline thrd_t
 thrd_current(void)
 {
-    return GetCurrentThread();
+    HANDLE hCurrentThread;
+    BOOL bRet;
+
+    /* GetCurrentThread() returns a pseudo-handle, which is useless.  We need
+     * to call DuplicateHandle to get a real handle.  However the handle value
+     * will not match the one returned by thread_create.
+     *
+     * Other potential solutions would be:
+     * - define thrd_t as a thread Ids, but this would mean we'd need to OpenThread for many operations
+     * - use malloc'ed memory for thrd_t. This would imply using TLS for current thread.
+     *
+     * Neither is particularly nice.
+     *
+     * Life would be much easier if C11 threads had different abstractions for
+     * threads and thread IDs, just like C++11 threads does...
+     */
+
+    bRet = DuplicateHandle(GetCurrentProcess(), // source process (pseudo) handle
+                           GetCurrentThread(), // source (pseudo) handle
+                           GetCurrentProcess(), // target process
+                           &hCurrentThread, // target handle
+                           0,
+                           FALSE,
+                           DUPLICATE_SAME_ACCESS);
+    assert(bRet);
+    if (!bRet) {
+	hCurrentThread = GetCurrentThread();
+    }
+    return hCurrentThread;
 }
+#endif

 // 7.25.5.3
 static inline int
@@ -511,7 +543,7 @@ thrd_detach(thrd_t thr)
 static inline int
 thrd_equal(thrd_t thr0, thrd_t thr1)
 {
-    return (thr0 == thr1);
+    return GetThreadId(thr0) == GetThreadId(thr1);
 }

 // 7.25.5.5
--- a/scons/gallium.py
+++ b/scons/gallium.py
@@ -269,6 +269,11 @@ def generate(env):
            cppdefines += ['HAVE_ALIAS']
        else:
            cppdefines += ['GLX_ALIAS_UNSUPPORTED']
+    if env['platform'] == 'haiku':
+        cppdefines += [
+            'HAVE_PTHREAD',
+            'HAVE_POSIX_MEMALIGN'
+        ]
    if platform == 'windows':
        cppdefines += [
            'WIN32',
--- a/src/egl/drivers/dri2/Android.mk
+++ b/src/egl/drivers/dri2/Android.mk
@@ -40,8 +40,12 @@ LOCAL_C_INCLUDES := \
 	$(MESA_TOP)/src/mapi \
 	$(MESA_TOP)/src/egl/main \
 	$(MESA_TOP)/src/loader \
+	$(DRM_TOP)/include/drm \
 	$(DRM_GRALLOC_TOP)

+LOCAL_STATIC_LIBRARIES := \
+	libloader
+
 LOCAL_MODULE := libmesa_egl_dri2

 include $(MESA_COMMON_MK)
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -626,7 +626,6 @@ dri2_initialize(_EGLDriver *drv, _EGLDisplay *disp)
      return dri2_initialize_x11(drv, disp);
 #endif

-#ifdef HAVE_LIBUDEV
 #ifdef HAVE_DRM_PLATFORM
   case _EGL_PLATFORM_DRM:
      if (disp->Options.TestOnly)
@@ -639,7 +638,6 @@ dri2_initialize(_EGLDriver *drv, _EGLDisplay *disp)
         return EGL_TRUE;
      return dri2_initialize_wayland(drv, disp);
 #endif
-#endif
 #ifdef HAVE_ANDROID_PLATFORM
   case _EGL_PLATFORM_ANDROID:
      if (disp->Options.TestOnly)
@@ -1894,10 +1892,12 @@ dri2_bind_wayland_display_wl(_EGLDriver *drv, _EGLDisplay *disp,
   if (!dri2_dpy->wl_server_drm)
 	   return EGL_FALSE;

+#ifdef HAVE_DRM_PLATFORM
   /* We have to share the wl_drm instance with gbm, so gbm can convert
    * wl_buffers to gbm bos. */
   if (dri2_dpy->gbm_dri)
      dri2_dpy->gbm_dri->wl_drm = dri2_dpy->wl_server_drm;
+#endif

   return EGL_TRUE;
 }
--- a/src/egl/drivers/dri2/platform_drm.c
+++ b/src/egl/drivers/dri2/platform_drm.c
@@ -458,7 +458,12 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp)

   gbm = disp->PlatformDisplay;
   if (gbm == NULL) {
-      fd = open("/dev/dri/card0", O_RDWR);
+      char buf[64];
+      int n = snprintf(buf, sizeof(buf), DRM_DEV_NAME, DRM_DIR_NAME, 0);
+      if (n != -1 && n < sizeof(buf))
+         fd = open(buf, O_RDWR);
+      if (fd < 0)
+         fd = open("/dev/dri/card0", O_RDWR);
      dri2_dpy->own_device = 1;
      gbm = gbm_create_device(fd);
      if (gbm == NULL)
--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -1045,7 +1045,7 @@ dri2_initialize_wayland(_EGLDriver *drv, _EGLDisplay *disp)

   if (dri2_dpy->image->base.version < 7 ||
       dri2_dpy->image->createImageFromFds == NULL)
-      dri2_dpy->capabilities &= WL_DRM_CAPABILITY_PRIME;
+      dri2_dpy->capabilities &= ~WL_DRM_CAPABILITY_PRIME;

   types = EGL_WINDOW_BIT;
   for (i = 0; dri2_dpy->driver_configs[i]; i++) {
--- a/src/egl/main/Android.mk
+++ b/src/egl/main/Android.mk
@@ -154,11 +154,14 @@ LOCAL_STATIC_LIBRARIES := \
 	libmesa_glsl \
 	libmesa_glsl_utils \
 	libmesa_gallium \
-	libloader \
 	$(LOCAL_STATIC_LIBRARIES)

 endif # MESA_BUILD_GALLIUM

+LOCAL_STATIC_LIBRARIES := \
+	$(LOCAL_STATIC_LIBRARIES) \
+	libloader
+
 LOCAL_MODULE := libGLES_mesa
 LOCAL_MODULE_PATH := $(TARGET_OUT_SHARED_LIBRARIES)/egl

--- a/src/egl/main/eglapi.c
+++ b/src/egl/main/eglapi.c
@@ -490,8 +490,12 @@ eglMakeCurrent(EGLDisplay dpy, EGLSurface draw, EGLSurface read,
   if (!context && ctx != EGL_NO_CONTEXT)
      RETURN_EGL_ERROR(disp, EGL_BAD_CONTEXT, EGL_FALSE);
   if (!draw_surf || !read_surf) {
-      /* surfaces may be NULL if surfaceless */
-      if (!disp->Extensions.KHR_surfaceless_context)
+      /* From the EGL 1.4 (20130211) spec:
+       *
+       *    To release the current context without assigning a new one, set ctx
+       *    to EGL_NO_CONTEXT and set draw and read to EGL_NO_SURFACE.
+       */
+      if (!disp->Extensions.KHR_surfaceless_context && ctx != EGL_NO_CONTEXT)
         RETURN_EGL_ERROR(disp, EGL_BAD_SURFACE, EGL_FALSE);

      if ((!draw_surf && draw != EGL_NO_SURFACE) ||
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -91,6 +91,7 @@ C_SOURCES := \
 	translate/translate_sse.c \
 	util/u_debug.c \
 	util/u_debug_describe.c \
+	util/u_debug_flush.c \
 	util/u_debug_memory.c \
 	util/u_debug_refcnt.c \
 	util/u_debug_stack.c \
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -1187,11 +1187,12 @@ cso_set_sampler_views(struct cso_context *ctx,
      pipe_sampler_view_reference(&info->views[i], NULL);
   }

-   info->nr_views = count;
-
   /* bind the new sampler views */
-   ctx->pipe->set_sampler_views(ctx->pipe, shader_stage, 0, count,
+   ctx->pipe->set_sampler_views(ctx->pipe, shader_stage, 0,
+                                MAX2(info->nr_views, count),
                                info->views);
+
+   info->nr_views = count;
 }


--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -673,7 +673,7 @@ pstip_create_fs_state(struct pipe_context *pipe,
   struct pstip_fragment_shader *pstipfs = CALLOC_STRUCT(pstip_fragment_shader);

   if (pstipfs) {
-      pstipfs->state = *fs;
+      pstipfs->state.tokens = tgsi_dup_tokens(fs->tokens);

      /* pass-through */
      pstipfs->driver_fs = pstip->driver_create_fs_state(pstip->pipe, fs);
@@ -707,6 +707,7 @@ pstip_delete_fs_state(struct pipe_context *pipe, void *fs)
   if (pstipfs->pstip_fs)
      pstip->driver_delete_fs_state(pstip->pipe, pstipfs->pstip_fs);

+   FREE((void*)pstipfs->state.tokens);
   FREE(pstipfs);
 }

--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -495,7 +495,7 @@ draw_stats_clipper_primitives(struct draw_context *draw,
 static INLINE unsigned
 draw_clamp_viewport_idx(int idx)
 {
-   return ((PIPE_MAX_VIEWPORTS > idx || idx < 0) ? idx : 0);
+   return ((PIPE_MAX_VIEWPORTS > idx && idx >= 0) ? idx : 0);
 }

 /**
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
@@ -161,7 +161,9 @@ pb_slab_range_manager_create(struct pb_manager *provider,
 */
 struct pb_manager *
 pb_cache_manager_create(struct pb_manager *provider, 
-                     	unsigned usecs); 
+                        unsigned usecs,
+                        float size_factor,
+                        unsigned bypass_usage);


 struct pb_fence_ops;
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
@@ -82,6 +82,8 @@ struct pb_cache_manager
   
   struct list_head delayed;
   pb_size numDelayed;
+   float size_factor;
+   unsigned bypass_usage;
 };


@@ -227,11 +229,14 @@ pb_cache_is_buffer_compat(struct pb_cache_buffer *buf,
                          pb_size size,
                          const struct pb_desc *desc)
 {
+   if (desc->usage & buf->mgr->bypass_usage)
+      return 0;
+
   if(buf->base.size < size)
      return 0;

   /* be lenient with size */
-   if(buf->base.size >= 2*size)
+   if(buf->base.size > (unsigned) (buf->mgr->size_factor * size))
      return 0;
   
   if(!pb_check_alignment(desc->alignment, buf->base.alignment))
@@ -338,7 +343,7 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr,
   
   assert(pipe_is_referenced(&buf->buffer->reference));
   assert(pb_check_alignment(desc->alignment, buf->buffer->alignment));
-   assert(pb_check_usage(desc->usage, buf->buffer->usage));
+   assert(pb_check_usage(desc->usage & ~mgr->bypass_usage, buf->buffer->usage));
   assert(buf->buffer->size >= size);
   
   pipe_reference_init(&buf->base.reference, 1);
@@ -384,10 +389,23 @@ pb_cache_manager_destroy(struct pb_manager *mgr)
   FREE(mgr);
 }

-
+/**
+ * Create a caching buffer manager
+ *
+ * @param provider The buffer manager to which cache miss buffer requests
+ * should be redirected.
+ * @param usecs Unused buffers may be released from the cache after this
+ * time
+ * @param size_factor Declare buffers that are size_factor times bigger than
+ * the requested size as cache hits.
+ * @param bypass_usage Bitmask. If (requested usage & bypass_usage) != 0,
+ * buffer allocation requests are redirected to the provider.
+ */
 struct pb_manager *
 pb_cache_manager_create(struct pb_manager *provider, 
-                     	unsigned usecs) 
+                        unsigned usecs,
+                        float size_factor,
+                        unsigned bypass_usage)
 {
   struct pb_cache_manager *mgr;

@@ -403,6 +421,8 @@ pb_cache_manager_create(struct pb_manager *provider,
   mgr->base.flush = pb_cache_manager_flush;
   mgr->provider = provider;
   mgr->usecs = usecs;
+   mgr->size_factor = size_factor;
+   mgr->bypass_usage = bypass_usage;
   LIST_INITHEAD(&mgr->delayed);
   mgr->numDelayed = 0;
   pipe_mutex_init(mgr->mutex);
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -364,6 +364,8 @@ void util_blitter_destroy(struct blitter_context *blitter)
   pipe->delete_vs_state(pipe, ctx->vs);
   if (ctx->vs_pos_only)
      pipe->delete_vs_state(pipe, ctx->vs_pos_only);
+   if (ctx->vs_layered)
+      pipe->delete_vs_state(pipe, ctx->vs_layered);
   pipe->delete_vertex_elements_state(pipe, ctx->velem_state);
   for (i = 0; i < 4; i++) {
      if (ctx->velem_state_readbuf[i]) {
--- a/src/gallium/auxiliary/util/u_debug_flush.c
+++ b/src/gallium/auxiliary/util/u_debug_flush.c
@@ -0,0 +1,391 @@
+/**************************************************************************
+ *
+ * Copyright 2012 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * u_debug_flush.c Debug flush and map-related issues:
+ * - Flush while synchronously mapped.
+ * - Command stream reference while synchronously mapped.
+ * - Synchronous map while referenced on command stream.
+ * - Recursive maps.
+ * - Unmap while not mapped.
+ *
+ * @author Thomas Hellstrom <thellstrom@vmware.com>
+ */
+
+#ifdef DEBUG
+#include "pipe/p_compiler.h"
+#include "util/u_debug_stack.h"
+#include "util/u_debug.h"
+#include "util/u_memory.h"
+#include "util/u_debug_flush.h"
+#include "util/u_hash_table.h"
+#include "util/u_double_list.h"
+#include "util/u_inlines.h"
+#include "os/os_thread.h"
+#include <stdio.h>
+
+struct debug_flush_buf {
+   /* Atomic */
+   struct pipe_reference reference; /* Must be the first member. */
+   pipe_mutex mutex;
+   /* Immutable */
+   boolean supports_unsync;
+   unsigned bt_depth;
+   /* Protected by mutex */
+   boolean mapped;
+   boolean mapped_sync;
+   struct debug_stack_frame *map_frame;
+};
+
+struct debug_flush_item {
+   struct debug_flush_buf *fbuf;
+   unsigned bt_depth;
+   struct debug_stack_frame *ref_frame;
+};
+
+struct debug_flush_ctx {
+   /* Contexts are used by a single thread at a time */
+   unsigned bt_depth;
+   boolean catch_map_of_referenced;
+   struct util_hash_table *ref_hash;
+   struct list_head head;
+};
+
+pipe_static_mutex(list_mutex);
+static struct list_head ctx_list = {&ctx_list, &ctx_list};
+
+static struct debug_stack_frame *
+debug_flush_capture_frame(int start, int depth)
+{
+   struct debug_stack_frame *frames;
+
+   frames = CALLOC(depth, sizeof(*frames));
+   if (!frames)
+      return NULL;
+
+   debug_backtrace_capture(frames, start, depth);
+   return frames;
+}
+
+static int
+debug_flush_pointer_compare(void *key1, void *key2)
+{
+   return (key1 == key2) ? 0 : 1;
+}
+
+static unsigned
+debug_flush_pointer_hash(void *key)
+{
+   return (unsigned) (unsigned long) key;
+}
+
+struct debug_flush_buf *
+debug_flush_buf_create(boolean supports_unsync, unsigned bt_depth)
+{
+   struct debug_flush_buf *fbuf = CALLOC_STRUCT(debug_flush_buf);
+
+   if (!fbuf)
+      goto out_no_buf;
+
+   fbuf->supports_unsync = supports_unsync;
+   fbuf->bt_depth = bt_depth;
+   pipe_reference_init(&fbuf->reference, 1);
+   pipe_mutex_init(fbuf->mutex);
+
+   return fbuf;
+out_no_buf:
+   debug_printf("Debug flush buffer creation failed.\n");
+   debug_printf("Debug flush checking for this buffer will be incomplete.\n");
+   return NULL;
+}
+
+void
+debug_flush_buf_reference(struct debug_flush_buf **dst,
+                          struct debug_flush_buf *src)
+{
+   struct debug_flush_buf *fbuf = *dst;
+
+   if (pipe_reference(&(*dst)->reference, &src->reference)) {
+      if (fbuf->map_frame)
+         FREE(fbuf->map_frame);
+
+      FREE(fbuf);
+   }
+
+   *dst = src;
+}
+
+static void
+debug_flush_item_destroy(struct debug_flush_item *item)
+{
+   debug_flush_buf_reference(&item->fbuf, NULL);
+
+   if (item->ref_frame)
+      FREE(item->ref_frame);
+
+   FREE(item);
+}
+
+struct debug_flush_ctx *
+debug_flush_ctx_create(boolean catch_reference_of_mapped, unsigned bt_depth)
+{
+   struct debug_flush_ctx *fctx = CALLOC_STRUCT(debug_flush_ctx);
+
+   if (!fctx)
+      goto out_no_ctx;
+
+   fctx->ref_hash = util_hash_table_create(debug_flush_pointer_hash,
+                                           debug_flush_pointer_compare);
+
+   if (!fctx->ref_hash)
+      goto out_no_ref_hash;
+
+   fctx->bt_depth = bt_depth;
+   pipe_mutex_lock(list_mutex);
+   list_addtail(&fctx->head, &ctx_list);
+   pipe_mutex_unlock(list_mutex);
+
+   return fctx;
+
+ out_no_ref_hash:
+   FREE(fctx);
+out_no_ctx:
+   debug_printf("Debug flush context creation failed.\n");
+   debug_printf("Debug flush checking for this context will be incomplete.\n");
+   return NULL;
+}
+
+static void
+debug_flush_alert(const char *s, const char *op,
+                  unsigned start, unsigned depth,
+                  boolean continued,
+                  boolean capture,
+                  const struct debug_stack_frame *frame)
+{
+   if (capture)
+      frame = debug_flush_capture_frame(start, depth);
+
+   if (s)
+      debug_printf("%s ", s);
+   if (frame) {
+      debug_printf("%s backtrace follows:\n", op);
+      debug_backtrace_dump(frame, depth);
+   } else
+      debug_printf("No %s backtrace was captured.\n", op);
+
+   if (continued)
+      debug_printf("**********************************\n");
+   else
+      debug_printf("*********END OF MESSAGE***********\n\n\n");
+
+   if (capture)
+      FREE((void *)frame);
+}
+
+
+void
+debug_flush_map(struct debug_flush_buf *fbuf, unsigned flags)
+{
+   boolean mapped_sync = FALSE;
+
+   if (!fbuf)
+      return;
+
+   pipe_mutex_lock(fbuf->mutex);
+   if (fbuf->mapped) {
+      debug_flush_alert("Recursive map detected.", "Map",
+                        2, fbuf->bt_depth, TRUE, TRUE, NULL);
+      debug_flush_alert(NULL, "Previous map", 0, fbuf->bt_depth, FALSE,
+                        FALSE, fbuf->map_frame);
+   } else if (!(flags & PIPE_TRANSFER_UNSYNCHRONIZED) ||
+              !fbuf->supports_unsync) {
+      fbuf->mapped_sync = mapped_sync = TRUE;
+   }
+   fbuf->map_frame = debug_flush_capture_frame(1, fbuf->bt_depth);
+   fbuf->mapped = TRUE;
+   pipe_mutex_unlock(fbuf->mutex);
+
+   if (mapped_sync) {
+      struct debug_flush_ctx *fctx;
+
+      pipe_mutex_lock(list_mutex);
+      LIST_FOR_EACH_ENTRY(fctx, &ctx_list, head) {
+         struct debug_flush_item *item =
+            util_hash_table_get(fctx->ref_hash, fbuf);
+
+         if (item && fctx->catch_map_of_referenced) {
+            debug_flush_alert("Already referenced map detected.",
+                              "Map", 2, fbuf->bt_depth, TRUE, TRUE, NULL);
+            debug_flush_alert(NULL, "Reference", 0, item->bt_depth,
+                              FALSE, FALSE, item->ref_frame);
+         }
+      }
+      pipe_mutex_unlock(list_mutex);
+   }
+}
+
+void
+debug_flush_unmap(struct debug_flush_buf *fbuf)
+{
+   if (!fbuf)
+      return;
+
+   pipe_mutex_lock(fbuf->mutex);
+   if (!fbuf->mapped)
+      debug_flush_alert("Unmap not previously mapped detected.", "Map",
+                        2, fbuf->bt_depth, FALSE, TRUE, NULL);
+
+   fbuf->mapped_sync = FALSE;
+   fbuf->mapped = FALSE;
+   if (fbuf->map_frame) {
+      FREE(fbuf->map_frame);
+      fbuf->map_frame = NULL;
+   }
+   pipe_mutex_unlock(fbuf->mutex);
+}
+
+void
+debug_flush_cb_reference(struct debug_flush_ctx *fctx,
+                         struct debug_flush_buf *fbuf)
+{
+   struct debug_flush_item *item;
+
+   if (!fctx || !fbuf)
+      return;
+
+   item = util_hash_table_get(fctx->ref_hash, fbuf);
+
+   pipe_mutex_lock(fbuf->mutex);
+   if (fbuf->mapped_sync) {
+      debug_flush_alert("Reference of mapped buffer detected.", "Reference",
+                        2, fctx->bt_depth, TRUE, TRUE, NULL);
+      debug_flush_alert(NULL, "Map", 0, fbuf->bt_depth, FALSE,
+                        FALSE, fbuf->map_frame);
+   }
+   pipe_mutex_unlock(fbuf->mutex);
+
+   if (!item) {
+      item = CALLOC_STRUCT(debug_flush_item);
+      if (item) {
+         debug_flush_buf_reference(&item->fbuf, fbuf);
+         item->bt_depth = fctx->bt_depth;
+         item->ref_frame = debug_flush_capture_frame(2, item->bt_depth);
+         if (util_hash_table_set(fctx->ref_hash, fbuf, item) != PIPE_OK) {
+            debug_flush_item_destroy(item);
+            goto out_no_item;
+         }
+         return;
+      }
+      goto out_no_item;
+   }
+   return;
+
+out_no_item:
+   debug_printf("Debug flush command buffer reference creation failed.\n");
+   debug_printf("Debug flush checking will be incomplete "
+                "for this command batch.\n");
+}
+
+static enum pipe_error
+debug_flush_might_flush_cb(void *key, void *value, void *data)
+{
+   struct debug_flush_item *item =
+      (struct debug_flush_item *) value;
+   struct debug_flush_buf *fbuf = item->fbuf;
+   const char *reason = (const char *) data;
+   char message[80];
+
+   snprintf(message, sizeof(message),
+            "%s referenced mapped buffer detected.", reason);
+
+   pipe_mutex_lock(fbuf->mutex);
+   if (fbuf->mapped_sync) {
+      debug_flush_alert(message, reason, 3, item->bt_depth, TRUE, TRUE, NULL);
+      debug_flush_alert(NULL, "Map", 0, fbuf->bt_depth, TRUE, FALSE,
+                        fbuf->map_frame);
+      debug_flush_alert(NULL, "First reference", 0, item->bt_depth, FALSE,
+                        FALSE, item->ref_frame);
+   }
+   pipe_mutex_unlock(fbuf->mutex);
+
+   return PIPE_OK;
+}
+
+void
+debug_flush_might_flush(struct debug_flush_ctx *fctx)
+{
+   if (!fctx)
+      return;
+
+   util_hash_table_foreach(fctx->ref_hash,
+                           debug_flush_might_flush_cb,
+                           "Might flush");
+}
+
+static enum pipe_error
+debug_flush_flush_cb(void *key, void *value, void *data)
+{
+   struct debug_flush_item *item =
+      (struct debug_flush_item *) value;
+
+   debug_flush_item_destroy(item);
+
+   return PIPE_OK;
+}
+
+
+void
+debug_flush_flush(struct debug_flush_ctx *fctx)
+{
+   if (!fctx)
+      return;
+
+   util_hash_table_foreach(fctx->ref_hash,
+                           debug_flush_might_flush_cb,
+                           "Flush");
+   util_hash_table_foreach(fctx->ref_hash,
+                           debug_flush_flush_cb,
+                           NULL);
+   util_hash_table_clear(fctx->ref_hash);
+}
+
+void
+debug_flush_ctx_destroy(struct debug_flush_ctx *fctx)
+{
+   if (!fctx)
+      return;
+
+   list_del(&fctx->head);
+   util_hash_table_foreach(fctx->ref_hash,
+                           debug_flush_flush_cb,
+                           NULL);
+   util_hash_table_clear(fctx->ref_hash);
+   util_hash_table_destroy(fctx->ref_hash);
+   FREE(fctx);
+}
+#endif
--- a/src/gallium/auxiliary/util/u_debug_flush.h
+++ b/src/gallium/auxiliary/util/u_debug_flush.h
@@ -0,0 +1,138 @@
+/**************************************************************************
+ *
+ * Copyright 2012 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * u_debug_flush.h - Header for debugging flush- and map- related issues.
+ * - Flush while synchronously mapped.
+ * - Command stream reference while synchronously mapped.
+ * - Synchronous map while referenced on command stream.
+ * - Recursive maps.
+ * - Unmap while not mapped.
+ *
+ * @author Thomas Hellstrom <thellstrom@vmware.com>
+ */
+#ifdef DEBUG
+
+#ifndef U_DEBUG_FLUSH_H_
+#define U_DEBUG_FLUSH_H_
+
+struct debug_flush_buf;
+struct debug_flush_ctx;
+
+/**
+ * Create a buffer (AKA allocation) representation.
+ *
+ * @param support_unsync Whether unsynchronous maps are truly supported.
+ * @param bt_depth Depth of backtrace to be captured for this buffer
+ * representation.
+ */
+struct debug_flush_buf *
+debug_flush_buf_create(boolean supports_unsync, unsigned bt_depth);
+
+/**
+ * Reference a buffer representation.
+ *
+ * @param dst Pointer copy destination
+ * @param src Pointer copy source (may be NULL).
+ *
+ * Replace a pointer to a buffer representation with proper refcounting.
+ */
+void
+debug_flush_buf_reference(struct debug_flush_buf **dst,
+                          struct debug_flush_buf *src);
+
+/**
+ * Create a context representation.
+ *
+ * @param catch_map_of_referenced Whether to catch synchronous maps of buffers
+ * already present on the command stream.
+ * @param bt_depth Depth of backtrace to be captured for this context
+ * representation.
+ */
+struct debug_flush_ctx *
+debug_flush_ctx_create(boolean catch_map_of_referenced, unsigned bt_depth);
+
+/**
+ * Destroy a context representation.
+ *
+ * @param fctx The context representation to destroy.
+ */
+void
+debug_flush_ctx_destroy(struct debug_flush_ctx *fctx);
+
+/**
+ * Map annotation
+ *
+ * @param fbuf The buffer representation to map.
+ * @param flags Pipebuffer flags for the map.
+ *
+ * Used to annotate a map of the buffer described by the buffer representation.
+ */
+void debug_flush_map(struct debug_flush_buf *fbuf, unsigned flags);
+
+/**
+ * Unmap annotation
+ *
+ * @param fbuf The buffer representation to map.
+ *
+ * Used to annotate an unmap of the buffer described by the
+ * buffer representation.
+ */
+void debug_flush_unmap(struct debug_flush_buf *fbuf);
+
+/**
+ * Might flush annotation
+ *
+ * @param fctx The context representation that might be flushed.
+ *
+ * Used to annotate a conditional (possible) flush of the given context.
+ */
+void debug_flush_might_flush(struct debug_flush_ctx *fctx);
+
+/**
+ * Flush annotation
+ *
+ * @param fctx The context representation that is flushed.
+ *
+ * Used to annotate a real flush of the given context.
+ */
+void debug_flush_flush(struct debug_flush_ctx *fctx);
+
+
+/**
+ * Flush annotation
+ *
+ * @param fctx The context representation that is flushed.
+ *
+ * Used to annotate a real flush of the given context.
+ */
+void debug_flush_cb_reference(struct debug_flush_ctx *fctx,
+                              struct debug_flush_buf *fbuf);
+
+#endif
+#endif
--- a/src/gallium/auxiliary/util/u_gen_mipmap.c
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.c
@@ -1382,7 +1382,7 @@ get_next_slot(struct gen_mipmap_state *ctx)
 static unsigned
 set_vertex_data(struct gen_mipmap_state *ctx,
                enum pipe_texture_target tex_target,
-                uint layer, float r)
+                uint face, float r)
 {
   unsigned offset;

@@ -1403,14 +1403,21 @@ set_vertex_data(struct gen_mipmap_state *ctx,
   ctx->vertices[3][0][1] = 1.0f;

   /* Setup vertex texcoords.  This is a little tricky for cube maps. */
-   if (tex_target == PIPE_TEXTURE_CUBE) {
+   if (tex_target == PIPE_TEXTURE_CUBE ||
+       tex_target == PIPE_TEXTURE_CUBE_ARRAY) {
      static const float st[4][2] = {
         {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f}
      };

-      util_map_texcoords2d_onto_cubemap(layer, &st[0][0], 2,
+      util_map_texcoords2d_onto_cubemap(face, &st[0][0], 2,
                                        &ctx->vertices[0][1][0], 8,
                                        FALSE);
+
+      /* set the layer for cube arrays */
+      ctx->vertices[0][1][3] = r;
+      ctx->vertices[1][1][3] = r;
+      ctx->vertices[2][1][3] = r;
+      ctx->vertices[3][1][3] = r;
   }
   else if (tex_target == PIPE_TEXTURE_1D_ARRAY) {
      /* 1D texture array  */
@@ -1520,29 +1527,7 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
   assert(filter == PIPE_TEX_FILTER_LINEAR ||
          filter == PIPE_TEX_FILTER_NEAREST);

-   switch (pt->target) {
-   case PIPE_TEXTURE_1D:
-      type = TGSI_TEXTURE_1D;
-      break;
-   case PIPE_TEXTURE_2D:
-      type = TGSI_TEXTURE_2D;
-      break;
-   case PIPE_TEXTURE_3D:
-      type = TGSI_TEXTURE_3D;
-      break;
-   case PIPE_TEXTURE_CUBE:
-      type = TGSI_TEXTURE_CUBE;
-      break;
-   case PIPE_TEXTURE_1D_ARRAY:
-      type = TGSI_TEXTURE_1D_ARRAY;
-      break;
-   case PIPE_TEXTURE_2D_ARRAY:
-      type = TGSI_TEXTURE_2D_ARRAY;
-      break;
-   default:
-      assert(0);
-      type = TGSI_TEXTURE_2D;
-   }
+   type = util_pipe_tex_to_tgsi_tex(pt->target, 1);

   /* check if we can render in the texture's format */
   if (!screen->is_format_supported(screen, psv->format, pt->target,
@@ -1600,7 +1585,9 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,

      if (pt->target == PIPE_TEXTURE_3D)
         nr_layers = u_minify(pt->depth0, dstLevel);
-      else if (pt->target == PIPE_TEXTURE_2D_ARRAY || pt->target == PIPE_TEXTURE_1D_ARRAY)
+      else if (pt->target == PIPE_TEXTURE_2D_ARRAY ||
+               pt->target == PIPE_TEXTURE_1D_ARRAY ||
+               pt->target == PIPE_TEXTURE_CUBE_ARRAY)
 	 nr_layers = pt->array_size;
      else
         nr_layers = 1;
@@ -1613,9 +1600,14 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
            layer = i;
            /* XXX hmm really? */
            rcoord = (float)layer / (float)nr_layers + 1.0f / (float)(nr_layers * 2);
-         } else if (pt->target == PIPE_TEXTURE_2D_ARRAY || pt->target == PIPE_TEXTURE_1D_ARRAY) {
+         } else if (pt->target == PIPE_TEXTURE_2D_ARRAY ||
+                    pt->target == PIPE_TEXTURE_1D_ARRAY) {
 	    layer = i;
 	    rcoord = (float)layer;
+         } else if (pt->target == PIPE_TEXTURE_CUBE_ARRAY) {
+            layer = i;
+            face = layer % 6;
+            rcoord = layer / 6;
 	 } else
            layer = face;

--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -112,10 +112,13 @@ static INLINE float logf( float f )
 #define logf(x) ((float)log((double)(x)))
 #endif /* logf */

+#if _MSC_VER < 1800
 #define isfinite(x) _finite((double)(x))
 #define isnan(x) _isnan((double)(x))
+#endif /* _MSC_VER < 1800 */
 #endif /* _MSC_VER < 1400 && !defined(__cplusplus) */

+#if _MSC_VER < 1800
 static INLINE double log2( double x )
 {
   const double invln2 = 1.442695041;
@@ -133,6 +136,7 @@ roundf(float x)
 {
   return x >= 0.0f ? floorf(x + 0.5f) : ceilf(x - 0.5f);
 }
+#endif

 #define INFINITY (DBL_MAX + DBL_MAX)
 #define NAN (INFINITY - INFINITY)
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -376,9 +376,15 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,

         /* color buffer */
         for (i = 0; i < scene->fb.nr_cbufs; i++){
-            stride[i] = scene->cbufs[i].stride;
-            color[i] = lp_rast_get_unswizzled_color_block_pointer(task, i, tile_x + x,
-                                                                  tile_y + y, inputs->layer);
+            if (scene->fb.cbufs[i]) {
+               stride[i] = scene->cbufs[i].stride;
+               color[i] = lp_rast_get_unswizzled_color_block_pointer(task, i, tile_x + x,
+                                                                     tile_y + y, inputs->layer);
+            }
+            else {
+               stride[i] = 0;
+               color[i] = NULL;
+            }
         }

         /* depth buffer */
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
@@ -37,18 +37,25 @@ namespace nv50_ir {
 //    ah*bl 00
 //
 // fffe0001 + fffe0001
+//
+// Note that this sort of splitting doesn't work for signed values, so we
+// compute the sign on those manually and then perform an unsigned multiply.
 static bool
 expandIntegerMUL(BuildUtil *bld, Instruction *mul)
 {
   const bool highResult = mul->subOp == NV50_IR_SUBOP_MUL_HIGH;

-   DataType fTy = mul->sType; // full type
-   DataType hTy;
+   DataType fTy; // full type
+   switch (mul->sType) {
+   case TYPE_S32: fTy = TYPE_U32; break;
+   case TYPE_S64: fTy = TYPE_U64; break;
+   default: fTy = mul->sType; break;
+   }
+
+   DataType hTy; // half type
   switch (fTy) {
-   case TYPE_S32: hTy = TYPE_S16; break;
   case TYPE_U32: hTy = TYPE_U16; break;
   case TYPE_U64: hTy = TYPE_U32; break;
-   case TYPE_S64: hTy = TYPE_S32; break;
   default:
      return false;
   }
@@ -59,15 +66,25 @@ expandIntegerMUL(BuildUtil *bld, Instruction *mul)

   bld->setPosition(mul, true);

+   Value *s[2];
   Value *a[2], *b[2];
-   Value *c[2];
   Value *t[4];
   for (int j = 0; j < 4; ++j)
      t[j] = bld->getSSA(fullSize);

+   s[0] = mul->getSrc(0);
+   s[1] = mul->getSrc(1);
+
+   if (isSignedType(mul->sType)) {
+      s[0] = bld->getSSA(fullSize);
+      s[1] = bld->getSSA(fullSize);
+      bld->mkOp1(OP_ABS, mul->sType, s[0], mul->getSrc(0));
+      bld->mkOp1(OP_ABS, mul->sType, s[1], mul->getSrc(1));
+   }
+
   // split sources into halves
-   i[0] = bld->mkSplit(a, halfSize, mul->getSrc(0));
-   i[1] = bld->mkSplit(b, halfSize, mul->getSrc(1));
+   i[0] = bld->mkSplit(a, halfSize, s[0]);
+   i[1] = bld->mkSplit(b, halfSize, s[1]);

   i[2] = bld->mkOp2(OP_MUL, fTy, t[0], a[0], b[1]);
   i[3] = bld->mkOp3(OP_MAD, fTy, t[1], a[1], b[0], t[0]);
@@ -75,23 +92,76 @@ expandIntegerMUL(BuildUtil *bld, Instruction *mul)
   i[4] = bld->mkOp3(OP_MAD, fTy, t[3], a[0], b[0], t[2]);

   if (highResult) {
-      Value *r[3];
+      Value *c[2];
+      Value *r[5];
      Value *imm = bld->loadImm(NULL, 1 << (halfSize * 8));
      c[0] = bld->getSSA(1, FILE_FLAGS);
      c[1] = bld->getSSA(1, FILE_FLAGS);
-      for (int j = 0; j < 3; ++j)
+      for (int j = 0; j < 5; ++j)
         r[j] = bld->getSSA(fullSize);

      i[8] = bld->mkOp2(OP_SHR, fTy, r[0], t[1], bld->mkImm(halfSize * 8));
      i[6] = bld->mkOp2(OP_ADD, fTy, r[1], r[0], imm);
-      bld->mkOp2(OP_UNION, TYPE_U32, r[2], r[1], r[0]);
-      i[5] = bld->mkOp3(OP_MAD, fTy, mul->getDef(0), a[1], b[1], r[2]);
+      bld->mkMov(r[3], r[0])->setPredicate(CC_NC, c[0]);
+      bld->mkOp2(OP_UNION, TYPE_U32, r[2], r[1], r[3]);
+      i[5] = bld->mkOp3(OP_MAD, fTy, r[4], a[1], b[1], r[2]);

      // set carry defs / sources
      i[3]->setFlagsDef(1, c[0]);
-      i[4]->setFlagsDef(0, c[1]); // actual result not required, just the carry
+      // actual result required in negative case, but ignored for
+      // unsigned. for some reason the compiler ends up dropping the whole
+      // instruction if the destination is unused but the flags are.
+      if (isSignedType(mul->sType))
+         i[4]->setFlagsDef(1, c[1]);
+      else
+         i[4]->setFlagsDef(0, c[1]);
      i[6]->setPredicate(CC_C, c[0]);
      i[5]->setFlagsSrc(3, c[1]);
+
+      if (isSignedType(mul->sType)) {
+         Value *cc[2];
+         Value *rr[7];
+         Value *one = bld->getSSA(fullSize);
+         bld->loadImm(one, 1);
+         for (int j = 0; j < 7; j++)
+            rr[j] = bld->getSSA(fullSize);
+
+         // NOTE: this logic uses predicates because splitting basic blocks is
+         // ~impossible during the SSA phase. The RA relies on a correlation
+         // between edge order and phi node sources.
+
+         // Set the sign of the result based on the inputs
+         bld->mkOp2(OP_XOR, fTy, NULL, mul->getSrc(0), mul->getSrc(1))
+            ->setFlagsDef(0, (cc[0] = bld->getSSA(1, FILE_FLAGS)));
+
+         // 1s complement of 64-bit value
+         bld->mkOp1(OP_NOT, fTy, rr[0], r[4])
+            ->setPredicate(CC_S, cc[0]);
+         bld->mkOp1(OP_NOT, fTy, rr[1], t[3])
+            ->setPredicate(CC_S, cc[0]);
+
+         // add to low 32-bits, keep track of the carry
+         Instruction *n = bld->mkOp2(OP_ADD, fTy, NULL, rr[1], one);
+         n->setPredicate(CC_S, cc[0]);
+         n->setFlagsDef(0, (cc[1] = bld->getSSA(1, FILE_FLAGS)));
+
+         // If there was a carry, add 1 to the upper 32 bits
+         // XXX: These get executed even if they shouldn't be
+         bld->mkOp2(OP_ADD, fTy, rr[2], rr[0], one)
+            ->setPredicate(CC_C, cc[1]);
+         bld->mkMov(rr[3], rr[0])
+            ->setPredicate(CC_NC, cc[1]);
+         bld->mkOp2(OP_UNION, fTy, rr[4], rr[2], rr[3]);
+
+         // Merge the results from the negative and non-negative paths
+         bld->mkMov(rr[5], rr[4])
+            ->setPredicate(CC_S, cc[0]);
+         bld->mkMov(rr[6], r[4])
+            ->setPredicate(CC_NS, cc[0]);
+         bld->mkOp2(OP_UNION, mul->sType, mul->getDef(0), rr[5], rr[6]);
+      } else {
+         bld->mkMov(mul->getDef(0), r[4]);
+      }
   } else {
      bld->mkMov(mul->getDef(0), t[3]);
   }
@@ -590,6 +660,10 @@ void NV50LoweringPreSSA::loadTexMsInfo(uint32_t off, Value **ms,
   Value *tmp = new_LValue(func, FILE_GPR);
   uint8_t b = prog->driver->io.resInfoCBSlot;
   off += prog->driver->io.suInfoBase;
+   if (prog->getType() > Program::TYPE_VERTEX)
+      off += 16 * 2 * 4;
+   if (prog->getType() > Program::TYPE_GEOMETRY)
+      off += 16 * 2 * 4;
   *ms_x = bld.mkLoadv(TYPE_U32, bld.mkSymbol(
                             FILE_MEMORY_CONST, b, TYPE_U32, off + 0), NULL);
   *ms_y = bld.mkLoadv(TYPE_U32, bld.mkSymbol(
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -666,8 +666,9 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
   const int dim = i->tex.target.getDim() + i->tex.target.isCube();
   const int arg = i->tex.target.getArgCount();
   const int lyr = arg - (i->tex.target.isMS() ? 2 : 1);
+   const int chipset = prog->getTarget()->getChipset();

-   if (prog->getTarget()->getChipset() >= NVISA_GK104_CHIPSET) {
+   if (chipset >= NVISA_GK104_CHIPSET) {
      if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) {
         WARN("indirect TEX not implemented\n");
      }
@@ -697,7 +698,7 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
      }
   } else
   // (nvc0) generate and move the tsc/tic/array source to the front
-   if (dim != arg || i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) {
+   if (i->tex.target.isArray() || i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) {
      LValue *src = new_LValue(func, FILE_GPR); // 0xttxsaaaa

      Value *arrayIndex = i->tex.target.isArray() ? i->getSrc(lyr) : NULL;
@@ -728,6 +729,13 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
      i->setSrc(0, src);
   }

+   // For nvc0, the sample id has to be in the second operand, as the offset
+   // does. Right now we don't know how to pass both in, and this case can't
+   // happen with OpenGL. On nve0, the sample id is part of the texture
+   // coordinate argument.
+   assert(chipset >= NVISA_GK104_CHIPSET ||
+          !i->tex.useOffsets || !i->tex.target.isMS());
+
   // offset is last source (lod 1st, dc 2nd)
   if (i->tex.useOffsets) {
      uint32_t value = 0;
@@ -741,7 +749,7 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
      i->setSrc(s, bld.loadImm(NULL, value));
   }

-   if (prog->getTarget()->getChipset() >= NVISA_GK104_CHIPSET) {
+   if (chipset >= NVISA_GK104_CHIPSET) {
      //
      // If TEX requires more than 4 sources, the 2nd register tuple must be
      // aligned to 4, even if it consists of just a single 4-byte register.
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -187,7 +187,8 @@ LoadPropagation::checkSwapSrc01(Instruction *insn)
      return;
   }

-   if (insn->op == OP_SET)
+   if (insn->op == OP_SET || insn->op == OP_SET_AND ||
+       insn->op == OP_SET_OR || insn->op == OP_SET_XOR)
      insn->asCmp()->setCond = reverseCondCode(insn->asCmp()->setCond);
   else
   if (insn->op == OP_SLCT)
@@ -417,7 +418,17 @@ ConstantFolding::expr(Instruction *i,
      case TYPE_F32: res.data.f32 = a->data.f32 * b->data.f32; break;
      case TYPE_F64: res.data.f64 = a->data.f64 * b->data.f64; break;
      case TYPE_S32:
-      case TYPE_U32: res.data.u32 = a->data.u32 * b->data.u32; break;
+         if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) {
+            res.data.s32 = ((int64_t)a->data.s32 * b->data.s32) >> 32;
+            break;
+         }
+         /* fallthrough */
+      case TYPE_U32:
+         if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) {
+            res.data.u32 = ((uint64_t)a->data.u32 * b->data.u32) >> 32;
+            break;
+         }
+         res.data.u32 = a->data.u32 * b->data.u32; break;
      default:
         return;
      }
@@ -524,6 +535,7 @@ ConstantFolding::expr(Instruction *i,
   } else {
      i->op = OP_MOV;
   }
+   i->subOp = 0;
 }

 void
@@ -625,12 +637,41 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
 {
   const int t = !s;
   const operation op = i->op;
+   Instruction *newi = i;

   switch (i->op) {
   case OP_MUL:
      if (i->dType == TYPE_F32)
         tryCollapseChainedMULs(i, s, imm0);

+      if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) {
+         assert(!isFloatType(i->sType));
+         if (imm0.isInteger(1) && i->dType == TYPE_S32) {
+            bld.setPosition(i, false);
+            // Need to set to the sign value, which is a compare.
+            newi = bld.mkCmp(OP_SET, CC_LT, TYPE_S32, i->getDef(0),
+                             TYPE_S32, i->getSrc(t), bld.mkImm(0));
+            delete_Instruction(prog, i);
+         } else if (imm0.isInteger(0) || imm0.isInteger(1)) {
+            // The high bits can't be set in this case (either mul by 0 or
+            // unsigned by 1)
+            i->op = OP_MOV;
+            i->subOp = 0;
+            i->setSrc(0, new_ImmediateValue(prog, 0u));
+            i->src(0).mod = Modifier(0);
+            i->setSrc(1, NULL);
+         } else if (!imm0.isNegative() && imm0.isPow2()) {
+            // Translate into a shift
+            imm0.applyLog2();
+            i->op = OP_SHR;
+            i->subOp = 0;
+            imm0.reg.data.u32 = 32 - imm0.reg.data.u32;
+            i->setSrc(0, i->getSrc(t));
+            i->src(0).mod = i->src(t).mod;
+            i->setSrc(1, new_ImmediateValue(prog, imm0.reg.data.u32));
+            i->src(1).mod = 0;
+         }
+      } else
      if (imm0.isInteger(0)) {
         i->op = OP_MOV;
         i->setSrc(0, new_ImmediateValue(prog, 0u));
@@ -721,7 +762,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
         else
            tA = tB;
         tB = s ? bld.getSSA() : i->getDef(0);
-         bld.mkOp2(OP_ADD, TYPE_U32, tB, mul->getDef(0), tA);
+         newi = bld.mkOp2(OP_ADD, TYPE_U32, tB, mul->getDef(0), tA);
         if (s)
            bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), tB, bld.mkImm(s));

@@ -753,7 +794,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
         tA = bld.getSSA();
         bld.mkCmp(OP_SET, CC_LT, TYPE_S32, tA, TYPE_S32, i->getSrc(0), bld.mkImm(0));
         tD = (d < 0) ? bld.getSSA() : i->getDef(0)->asLValue();
-         bld.mkOp2(OP_SUB, TYPE_U32, tD, tB, tA);
+         newi = bld.mkOp2(OP_SUB, TYPE_U32, tD, tB, tA);
         if (d < 0)
            bld.mkOp1(OP_NEG, TYPE_S32, i->getDef(0), tB);

@@ -831,7 +872,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
   default:
      return;
   }
-   if (i->op != op)
+   if (newi->op != op)
      foldCount++;
 }

--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -284,7 +284,7 @@ public:
   bool run(const std::list<ValuePair>&);

   Symbol *assignSlot(const Interval&, const unsigned int size);
-   Symbol *offsetSlot(Symbol *, const LValue *);
+   Value *offsetSlot(Value *, const LValue *);
   inline int32_t getStackSize() const { return stackSize; }

 private:
@@ -1468,12 +1468,12 @@ SpillCodeInserter::assignSlot(const Interval &livei, const unsigned int size)
   return slot.sym;
 }

-Symbol *
-SpillCodeInserter::offsetSlot(Symbol *base, const LValue *lval)
+Value *
+SpillCodeInserter::offsetSlot(Value *base, const LValue *lval)
 {
-   if (!base || !lval->compound || (lval->compMask & 0x1))
+   if (!lval->compound || (lval->compMask & 0x1))
      return base;
-   Symbol *slot = cloneShallow(func, base);
+   Value *slot = cloneShallow(func, base);

   slot->reg.data.offset += (ffs(lval->compMask) - 1) * lval->reg.size;
   slot->reg.size = lval->reg.size;
@@ -1486,7 +1486,7 @@ SpillCodeInserter::spill(Instruction *defi, Value *slot, LValue *lval)
 {
   const DataType ty = typeOfSize(lval->reg.size);

-   slot = offsetSlot(slot->asSym(), lval);
+   slot = offsetSlot(slot, lval);

   Instruction *st;
   if (slot->reg.file == FILE_MEMORY_LOCAL) {
@@ -1507,7 +1507,7 @@ SpillCodeInserter::unspill(Instruction *usei, LValue *lval, Value *slot)
 {
   const DataType ty = typeOfSize(lval->reg.size);

-   slot = offsetSlot(slot->asSym(), lval);
+   slot = offsetSlot(slot, lval);
   lval = cloneShallow(func, lval);

   Instruction *ld;
@@ -1914,7 +1914,7 @@ RegAlloc::InsertConstraintsPass::texConstraintNVC0(TexInstruction *tex)
      s = tex->srcCount(0xff);
      n = 0;
   } else {
-      s = tex->tex.target.getArgCount();
+      s = tex->tex.target.getArgCount() - tex->tex.target.isMS();
      if (!tex->tex.target.isArray() &&
          (tex->tex.rIndirectSrc >= 0 || tex->tex.sIndirectSrc >= 0))
         ++s;
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
@@ -329,6 +329,8 @@ TargetNV50::insnCanLoad(const Instruction *i, int s,
         return false;
      if (sf == FILE_IMMEDIATE)
         return false;
+      if (i->subOp == NV50_IR_SUBOP_MUL_HIGH && sf == FILE_MEMORY_CONST)
+         return false;
      ldSize = 2;
   } else {
      ldSize = typeSizeof(ld->dType);
--- a/src/gallium/drivers/nouveau/nouveau_vp3_video.c
+++ b/src/gallium/drivers/nouveau/nouveau_vp3_video.c
@@ -371,10 +371,10 @@ firmware_present(struct pipe_screen *pscreen, enum pipe_video_profile profile)
      int size, oclass;
      if (chipset < 0xc0)
         oclass = 0x85b1;
-      else if (vp5)
-         oclass = 0x95b1;
-      else
+      else if (chipset < 0xe0)
         oclass = 0x90b1;
+      else
+         oclass = 0x95b1;

      if (chipset < 0xc0) {
         data = &nv04_data;
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -302,10 +302,16 @@ nv30_screen_destroy(struct pipe_screen *pscreen)
 {
   struct nv30_screen *screen = nv30_screen(pscreen);

-   if (screen->base.fence.current &&
-       screen->base.fence.current->state >= NOUVEAU_FENCE_STATE_EMITTED) {
-      nouveau_fence_wait(screen->base.fence.current);
-      nouveau_fence_ref (NULL, &screen->base.fence.current);
+   if (screen->base.fence.current) {
+      struct nouveau_fence *current = NULL;
+
+      /* nouveau_fence_wait will create a new current fence, so wait on the
+       * _current_ one, and remove both.
+       */
+      nouveau_fence_ref(screen->base.fence.current, &current);
+      nouveau_fence_wait(current);
+      nouveau_fence_ref(NULL, &current);
+      nouveau_fence_ref(NULL, &screen->base.fence.current);
   }

   nouveau_object_del(&screen->query);
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h
@@ -77,13 +77,13 @@
 /* 8 user clip planes, at 4 32-bit floats each */
 #define NV50_CB_AUX_UCP_OFFSET    0x0000
 #define NV50_CB_AUX_UCP_SIZE      (8 * 4 * 4)
-/* 256 textures, each with ms_x, ms_y u32 pairs */
+/* 16 textures * 3 shaders, each with ms_x, ms_y u32 pairs */
 #define NV50_CB_AUX_TEX_MS_OFFSET 0x0080
-#define NV50_CB_AUX_TEX_MS_SIZE   (256 * 2 * 4)
+#define NV50_CB_AUX_TEX_MS_SIZE   (16 * 3 * 2 * 4)
 /* For each MS level (4), 8 sets of 32-bit integer pairs sample offsets */
-#define NV50_CB_AUX_MS_OFFSET     0x880
+#define NV50_CB_AUX_MS_OFFSET     0x200
 #define NV50_CB_AUX_MS_SIZE       (4 * 8 * 4 * 2)
-/* next spot: 0x980 */
+/* next spot: 0x300 */
 /* 4 32-bit floats for the vertex runout, put at the end */
 #define NV50_CB_AUX_RUNOUT_OFFSET (NV50_CB_AUX_SIZE - 0x10)

@@ -171,6 +171,8 @@ struct nv50_context {

   boolean vbo_push_hint;

+   uint32_t rt_array_mode;
+
   struct pipe_query *cond_query;
   boolean cond_cond;
   uint cond_mode;
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -290,8 +290,15 @@ nv50_screen_destroy(struct pipe_screen *pscreen)
   struct nv50_screen *screen = nv50_screen(pscreen);

   if (screen->base.fence.current) {
-      nouveau_fence_wait(screen->base.fence.current);
-      nouveau_fence_ref (NULL, &screen->base.fence.current);
+      struct nouveau_fence *current = NULL;
+
+      /* nouveau_fence_wait will create a new current fence, so wait on the
+       * _current_ one, and remove both.
+       */
+      nouveau_fence_ref(screen->base.fence.current, &current);
+      nouveau_fence_wait(current);
+      nouveau_fence_ref(NULL, &current);
+      nouveau_fence_ref(NULL, &screen->base.fence.current);
   }
   if (screen->base.pushbuf)
      screen->base.pushbuf->user_priv = NULL;
--- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
@@ -556,11 +556,12 @@ nv50_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
 {
   unsigned s, i;

-   for (s = 0; s < 3; ++s)
+   for (s = 0; s < 3; ++s) {
      assert(nv50_context(pipe)->num_samplers[s] <= PIPE_MAX_SAMPLERS);
      for (i = 0; i < nv50_context(pipe)->num_samplers[s]; ++i)
         if (nv50_context(pipe)->samplers[s][i] == hwcso)
            nv50_context(pipe)->samplers[s][i] = NULL;
+   }

   nv50_screen_tsc_free(nv50_context(pipe)->screen, nv50_tsc_entry(hwcso));

--- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
@@ -65,6 +65,7 @@ nv50_validate_fb(struct nv50_context *nv50)
         PUSH_DATA (push, sf->height);
         BEGIN_NV04(push, NV50_3D(RT_ARRAY_MODE), 1);
         PUSH_DATA (push, array_mode | array_size);
+         nv50->rt_array_mode = array_mode | array_size;
      } else {
         PUSH_DATA (push, 0);
         PUSH_DATA (push, 0);
--- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -295,7 +295,7 @@ nv50_clear_render_target(struct pipe_context *pipe,
   PUSH_DATA (push, bo->offset + sf->offset);
   PUSH_DATA (push, nv50_format_table[dst->format].rt);
   PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode);
-   PUSH_DATA (push, 0);
+   PUSH_DATA (push, mt->layer_stride >> 2);
   BEGIN_NV04(push, NV50_3D(RT_HORIZ(0)), 2);
   if (nouveau_bo_memtype(bo))
      PUSH_DATA(push, sf->width);
@@ -303,7 +303,10 @@ nv50_clear_render_target(struct pipe_context *pipe,
      PUSH_DATA(push, NV50_3D_RT_HORIZ_LINEAR | mt->level[0].pitch);
   PUSH_DATA (push, sf->height);
   BEGIN_NV04(push, NV50_3D(RT_ARRAY_MODE), 1);
-   PUSH_DATA (push, 1);
+   if (mt->layout_3d)
+      PUSH_DATA(push, NV50_3D_RT_ARRAY_MODE_MODE_3D | 512);
+   else
+      PUSH_DATA(push, 512);

   if (!nouveau_bo_memtype(bo)) {
      BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1);
@@ -366,7 +369,7 @@ nv50_clear_depth_stencil(struct pipe_context *pipe,
   PUSH_DATA (push, bo->offset + sf->offset);
   PUSH_DATA (push, nv50_format_table[dst->format].rt);
   PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode);
-   PUSH_DATA (push, 0);
+   PUSH_DATA (push, mt->layer_stride >> 2);
   BEGIN_NV04(push, NV50_3D(ZETA_ENABLE), 1);
   PUSH_DATA (push, 1);
   BEGIN_NV04(push, NV50_3D(ZETA_HORIZ), 3);
@@ -374,6 +377,9 @@ nv50_clear_depth_stencil(struct pipe_context *pipe,
   PUSH_DATA (push, sf->height);
   PUSH_DATA (push, (1 << 16) | 1);

+   BEGIN_NV04(push, NV50_3D(RT_ARRAY_MODE), 1);
+   PUSH_DATA (push, 512);
+
   BEGIN_NV04(push, NV50_3D(VIEWPORT_HORIZ(0)), 2);
   PUSH_DATA (push, (width << 16) | dstx);
   PUSH_DATA (push, (height << 16) | dsty);
@@ -402,6 +408,11 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers,
   if (!nv50_state_validate(nv50, NV50_NEW_FRAMEBUFFER, 9 + (fb->nr_cbufs * 2)))
      return;

+   /* We have to clear ALL of the layers, not up to the min number of layers
+    * of any attachment. */
+   BEGIN_NV04(push, NV50_3D(RT_ARRAY_MODE), 1);
+   PUSH_DATA (push, (nv50->rt_array_mode & NV50_3D_RT_ARRAY_MODE_MODE_3D) | 512);
+
   if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) {
      BEGIN_NV04(push, NV50_3D(CLEAR_COLOR(0)), 4);
      PUSH_DATAf(push, color->f[0]);
@@ -459,6 +470,10 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers,
                    (j << NV50_3D_CLEAR_BUFFERS_LAYER__SHIFT));
      }
   }
+
+   /* restore the array mode */
+   BEGIN_NV04(push, NV50_3D(RT_ARRAY_MODE), 1);
+   PUSH_DATA (push, nv50->rt_array_mode);
 }


@@ -962,6 +977,7 @@ nv50_blit_3d(struct nv50_context *nv50, const struct pipe_blit_info *info)
   float x0, x1, y0, y1, z;
   float dz;
   float x_range, y_range;
+   float tri_x, tri_y;

   blit->mode = nv50_blit_select_mode(info);
   blit->color_mask = nv50_blit_derive_color_mask(info);
@@ -981,11 +997,14 @@ nv50_blit_3d(struct nv50_context *nv50, const struct pipe_blit_info *info)
   x_range = (float)info->src.box.width / (float)info->dst.box.width;
   y_range = (float)info->src.box.height / (float)info->dst.box.height;

+   tri_x = 16384 << nv50_miptree(dst)->ms_x;
+   tri_y = 16384 << nv50_miptree(dst)->ms_y;
+
   x0 = (float)info->src.box.x - x_range * (float)info->dst.box.x;
   y0 = (float)info->src.box.y - y_range * (float)info->dst.box.y;

-   x1 = x0 + 16384.0f * x_range;
-   y1 = y0 + 16384.0f * y_range;
+   x1 = x0 + tri_x * x_range;
+   y1 = y0 + tri_y * y_range;

   x0 *= (float)(1 << nv50_miptree(src)->ms_x);
   x1 *= (float)(1 << nv50_miptree(src)->ms_x);
@@ -1054,7 +1073,7 @@ nv50_blit_3d(struct nv50_context *nv50, const struct pipe_blit_info *info)
      PUSH_DATAf(push, y0);
      PUSH_DATAf(push, z);
      BEGIN_NV04(push, NV50_3D(VTX_ATTR_2F_X(0)), 2);
-      PUSH_DATAf(push, 16384 << nv50_miptree(dst)->ms_x);
+      PUSH_DATAf(push, tri_x);
      PUSH_DATAf(push, 0.0f);
      BEGIN_NV04(push, NV50_3D(VTX_ATTR_3F_X(1)), 3);
      PUSH_DATAf(push, x0);
@@ -1062,7 +1081,7 @@ nv50_blit_3d(struct nv50_context *nv50, const struct pipe_blit_info *info)
      PUSH_DATAf(push, z);
      BEGIN_NV04(push, NV50_3D(VTX_ATTR_2F_X(0)), 2);
      PUSH_DATAf(push, 0.0f);
-      PUSH_DATAf(push, 16384 << nv50_miptree(dst)->ms_y);
+      PUSH_DATAf(push, tri_y);
      BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1);
      PUSH_DATA (push, 0);
   }
--- a/src/gallium/drivers/nouveau/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_tex.c
@@ -286,7 +286,7 @@ nv50_validate_tic(struct nv50_context *nv50, int s)
   }
   if (nv50->num_textures[s]) {
      BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
-      PUSH_DATA (push, (NV50_CB_AUX_TEX_MS_OFFSET << (8 - 2)) | NV50_CB_AUX);
+      PUSH_DATA (push, ((NV50_CB_AUX_TEX_MS_OFFSET + 16 * s * 2 * 4) << (8 - 2)) | NV50_CB_AUX);
      BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nv50->num_textures[s] * 2);
      for (i = 0; i < nv50->num_textures[s]; i++) {
         struct nv50_tic_entry *tic = nv50_tic_entry(nv50->textures[s][i]);
--- a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
@@ -278,7 +278,7 @@ nv50_miptree_transfer_map(struct pipe_context *pctx,

   if (util_format_is_plain(res->format)) {
      tx->nblocksx = box->width << mt->ms_x;
-      tx->nblocksy = box->height << mt->ms_x;
+      tx->nblocksy = box->height << mt->ms_y;
   } else {
      tx->nblocksx = util_format_get_nblocksx(res->format, box->width);
      tx->nblocksy = util_format_get_nblocksy(res->format, box->height);
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -334,7 +334,14 @@ nvc0_screen_destroy(struct pipe_screen *pscreen)
   struct nvc0_screen *screen = nvc0_screen(pscreen);

   if (screen->base.fence.current) {
-      nouveau_fence_wait(screen->base.fence.current);
+      struct nouveau_fence *current = NULL;
+
+      /* nouveau_fence_wait will create a new current fence, so wait on the
+       * _current_ one, and remove both.
+       */
+      nouveau_fence_ref(screen->base.fence.current, &current);
+      nouveau_fence_wait(current);
+      nouveau_fence_ref(NULL, &current);
      nouveau_fence_ref(NULL, &screen->base.fence.current);
   }
   if (screen->base.pushbuf)
--- a/src/gallium/drivers/r300/r300_blit.c
+++ b/src/gallium/drivers/r300/r300_blit.c
@@ -130,7 +130,7 @@ static boolean r300_cbzb_clear_allowed(struct r300_context *r300,
        (struct pipe_framebuffer_state*)r300->fb_state.state;

    /* Only color clear allowed, and only one colorbuffer. */
-    if ((clear_buffers & ~PIPE_CLEAR_COLOR) != 0 || fb->nr_cbufs != 1)
+    if ((clear_buffers & ~PIPE_CLEAR_COLOR) != 0 || fb->nr_cbufs != 1 || !fb->cbufs[0])
        return FALSE;

    return r300_surface(fb->cbufs[0])->cbzb_allowed;
@@ -313,7 +313,7 @@ static void r300_clear(struct pipe_context* pipe,
    /* Use fast color clear for an AA colorbuffer.
     * The CMASK is shared between all colorbuffers, so we use it
     * if there is only one colorbuffer bound. */
-    if ((buffers & PIPE_CLEAR_COLOR) && fb->nr_cbufs == 1 &&
+    if ((buffers & PIPE_CLEAR_COLOR) && fb->nr_cbufs == 1 && fb->cbufs[0] &&
        r300_resource(fb->cbufs[0]->texture)->tex.cmask_dwords) {
        /* Try to obtain the access to the CMASK if we don't have one. */
        if (!r300->cmask_access) {
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -688,6 +688,20 @@ static INLINE void r300_mark_atom_dirty(struct r300_context *r300,
    }
 }

+static INLINE struct pipe_surface *
+r300_get_nonnull_cb(struct pipe_framebuffer_state *fb, unsigned i)
+{
+    if (fb->cbufs[i])
+        return fb->cbufs[i];
+
+    /* The i-th framebuffer is NULL, return any non-NULL one. */
+    for (i = 0; i < fb->nr_cbufs; i++)
+        if (fb->cbufs[i])
+            return fb->cbufs[i];
+
+    return NULL;
+}
+
 struct pipe_context* r300_create_context(struct pipe_screen* screen,
                                         void *priv);

--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -42,15 +42,18 @@ void r300_emit_blend_state(struct r300_context* r300,
    struct r300_blend_state* blend = (struct r300_blend_state*)state;
    struct pipe_framebuffer_state* fb =
        (struct pipe_framebuffer_state*)r300->fb_state.state;
+    struct pipe_surface *cb;
    CS_LOCALS(r300);

-    if (fb->nr_cbufs) {
-        if (fb->cbufs[0]->format == PIPE_FORMAT_R16G16B16A16_FLOAT) {
+    cb = fb->nr_cbufs ? r300_get_nonnull_cb(fb, 0) : NULL;
+
+    if (cb) {
+        if (cb->format == PIPE_FORMAT_R16G16B16A16_FLOAT) {
            WRITE_CS_TABLE(blend->cb_noclamp, size);
-        } else if (fb->cbufs[0]->format == PIPE_FORMAT_R16G16B16X16_FLOAT) {
+        } else if (cb->format == PIPE_FORMAT_R16G16B16X16_FLOAT) {
            WRITE_CS_TABLE(blend->cb_noclamp_noalpha, size);
        } else {
-            unsigned swz = r300_surface(fb->cbufs[0])->colormask_swizzle;
+            unsigned swz = r300_surface(cb)->colormask_swizzle;
            WRITE_CS_TABLE(blend->cb_clamp[swz], size);
        }
    } else {
@@ -88,9 +91,11 @@ void r300_emit_dsa_state(struct r300_context* r300, unsigned size, void* state)
    /* Choose the alpha ref value between 8-bit (FG_ALPHA_FUNC.AM_VAL) and
     * 16-bit (FG_ALPHA_VALUE). */
    if (is_r500 && (alpha_func & R300_FG_ALPHA_FUNC_ENABLE)) {
-        if (fb->nr_cbufs &&
-            (fb->cbufs[0]->format == PIPE_FORMAT_R16G16B16A16_FLOAT ||
-             fb->cbufs[0]->format == PIPE_FORMAT_R16G16B16X16_FLOAT)) {
+        struct pipe_surface *cb = fb->nr_cbufs ? r300_get_nonnull_cb(fb, 0) : NULL;
+
+        if (cb &&
+            (cb->format == PIPE_FORMAT_R16G16B16A16_FLOAT ||
+             cb->format == PIPE_FORMAT_R16G16B16X16_FLOAT)) {
            alpha_func |= R500_FG_ALPHA_FUNC_FP16_ENABLE;
        } else {
            alpha_func |= R500_FG_ALPHA_FUNC_8BIT;
@@ -419,7 +424,7 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)

    /* Set up colorbuffers. */
    for (i = 0; i < fb->nr_cbufs; i++) {
-        surf = r300_surface(fb->cbufs[i]);
+        surf = r300_surface(r300_get_nonnull_cb(fb, i));

        OUT_CS_REG(R300_RB3D_COLOROFFSET0 + (4 * i), surf->offset);
        OUT_CS_RELOC(surf);
@@ -600,7 +605,7 @@ void r300_emit_fb_state_pipelined(struct r300_context *r300,
     * (must be written after unpipelined regs) */
    OUT_CS_REG_SEQ(R300_US_OUT_FMT_0, 4);
    for (i = 0; i < num_cbufs; i++) {
-        OUT_CS(r300_surface(fb->cbufs[i])->format);
+        OUT_CS(r300_surface(r300_get_nonnull_cb(fb, i))->format);
    }
    for (; i < 1; i++) {
        OUT_CS(R300_US_OUT_FMT_C4_8 |
@@ -1310,6 +1315,8 @@ validate:
    if (r300->fb_state.dirty) {
        /* Color buffers... */
        for (i = 0; i < fb->nr_cbufs; i++) {
+            if (!fb->cbufs[i])
+                continue;
            tex = r300_resource(fb->cbufs[i]->texture);
            assert(tex && tex->buf && "cbuf is marked, but NULL!");
            r300->rws->cs_add_reloc(r300->cs, tex->cs_buf,
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -579,16 +579,17 @@ static void r300_set_blend_color(struct pipe_context* pipe,
    struct r300_blend_color_state *state =
        (struct r300_blend_color_state*)r300->blend_color_state.state;
    struct pipe_blend_color c;
-    enum pipe_format format = fb->nr_cbufs ? fb->cbufs[0]->format : 0;
+    struct pipe_surface *cb;
    float tmp;
    CB_LOCALS;

    state->state = *color; /* Save it, so that we can reuse it in set_fb_state */
    c = *color;
+    cb = fb->nr_cbufs ? r300_get_nonnull_cb(fb, 0) : NULL;

    /* The blend color is dependent on the colorbuffer format. */
-    if (fb->nr_cbufs) {
-        switch (format) {
+    if (cb) {
+        switch (cb->format) {
        case PIPE_FORMAT_R8_UNORM:
        case PIPE_FORMAT_L8_UNORM:
        case PIPE_FORMAT_I8_UNORM:
@@ -623,7 +624,7 @@ static void r300_set_blend_color(struct pipe_context* pipe,
        BEGIN_CB(state->cb, 3);
        OUT_CB_REG_SEQ(R500_RB3D_CONSTANT_COLOR_AR, 2);

-        switch (format) {
+        switch (cb ? cb->format : 0) {
        case PIPE_FORMAT_R16G16B16A16_FLOAT:
        case PIPE_FORMAT_R16G16B16X16_FLOAT:
            OUT_CB(util_float_to_half(c.color[2]) |
@@ -858,6 +859,9 @@ static void r300_fb_set_tiling_flags(struct r300_context *r300,

    /* Set tiling flags for new surfaces. */
    for (i = 0; i < state->nr_cbufs; i++) {
+        if (!state->cbufs[i])
+            continue;
+
        r300_tex_set_tiling_flags(r300,
                                  r300_resource(state->cbufs[i]->texture),
                                  state->cbufs[i]->u.tex.level);
@@ -950,7 +954,8 @@ static unsigned r300_get_num_samples(struct r300_context *r300)
    num_samples = 6;

    for (i = 0; i < fb->nr_cbufs; i++)
-        num_samples = MIN2(num_samples, fb->cbufs[i]->texture->nr_samples);
+        if (fb->cbufs[i])
+            num_samples = MIN2(num_samples, fb->cbufs[i]->texture->nr_samples);

    if (fb->zsbuf)
        num_samples = MIN2(num_samples, fb->zsbuf->texture->nr_samples);
@@ -967,7 +972,7 @@ r300_set_framebuffer_state(struct pipe_context* pipe,
 {
    struct r300_context* r300 = r300_context(pipe);
    struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state;
-    struct pipe_framebuffer_state *old_state = r300->fb_state.state;
+    struct pipe_framebuffer_state *current_state = r300->fb_state.state;
    unsigned max_width, max_height, i;
    uint32_t zbuffer_bpp = 0;
    boolean unlock_zbuffer = FALSE;
@@ -986,17 +991,17 @@ r300_set_framebuffer_state(struct pipe_context* pipe,
        return;
    }

-    if (old_state->zsbuf && r300->zmask_in_use && !r300->locked_zbuffer) {
+    if (current_state->zsbuf && r300->zmask_in_use && !r300->locked_zbuffer) {
        /* There is a zmask in use, what are we gonna do? */
        if (state->zsbuf) {
-            if (!pipe_surface_equal(old_state->zsbuf, state->zsbuf)) {
+            if (!pipe_surface_equal(current_state->zsbuf, state->zsbuf)) {
                /* Decompress the currently bound zbuffer before we bind another one. */
                r300_decompress_zmask(r300);
                r300->hiz_in_use = FALSE;
            }
        } else {
            /* We don't bind another zbuffer, so lock the current one. */
-            pipe_surface_reference(&r300->locked_zbuffer, old_state->zsbuf);
+            pipe_surface_reference(&r300->locked_zbuffer, current_state->zsbuf);
        }
    } else if (r300->locked_zbuffer) {
        /* We have a locked zbuffer now, what are we gonna do? */
@@ -1014,9 +1019,20 @@ r300_set_framebuffer_state(struct pipe_context* pipe,
    }
    assert(state->zsbuf || (r300->locked_zbuffer && !unlock_zbuffer) || !r300->zmask_in_use);

+    /* If zsbuf is set from NULL to non-NULL or vice versa.. */
+    if (!!current_state->zsbuf != !!state->zsbuf) {
+        r300_mark_atom_dirty(r300, &r300->dsa_state);
+    }
+
+    util_copy_framebuffer_state(r300->fb_state.state, state);
+
+    /* Remove trailing NULL colorbuffers. */
+    while (current_state->nr_cbufs && !current_state->cbufs[current_state->nr_cbufs-1])
+        current_state->nr_cbufs--;
+
    /* Set whether CMASK can be used. */
    r300->cmask_in_use =
-        state->nr_cbufs == 1 &&
+        state->nr_cbufs == 1 && state->cbufs[0] &&
        r300->screen->cmask_resource == state->cbufs[0]->texture;

    /* Need to reset clamping or colormask. */
@@ -1025,11 +1041,6 @@ r300_set_framebuffer_state(struct pipe_context* pipe,
    /* Re-swizzle the blend color. */
    r300_set_blend_color(pipe, &((struct r300_blend_color_state*)r300->blend_color_state.state)->state);

-    /* If zsbuf is set from NULL to non-NULL or vice versa.. */
-    if (!!old_state->zsbuf != !!state->zsbuf) {
-        r300_mark_atom_dirty(r300, &r300->dsa_state);
-    }
-
    if (r300->screen->info.drm_minor < 12) {
       /* The tiling flags are dependent on the surface miplevel, unfortunately.
        * This workarounds a bad design decision in old kernels which were
@@ -1037,8 +1048,6 @@ r300_set_framebuffer_state(struct pipe_context* pipe,
        r300_fb_set_tiling_flags(r300, state);
    }

-    util_copy_framebuffer_state(r300->fb_state.state, state);
-
    if (unlock_zbuffer) {
        pipe_surface_reference(&r300->locked_zbuffer, NULL);
    }
@@ -1089,7 +1098,8 @@ r300_set_framebuffer_state(struct pipe_context* pipe,
    if (DBG_ON(r300, DBG_FB)) {
        fprintf(stderr, "r300: set_framebuffer_state:\n");
        for (i = 0; i < state->nr_cbufs; i++) {
-            r300_print_fb_surf_info(state->cbufs[i], i, "CB");
+            if (state->cbufs[i])
+                r300_print_fb_surf_info(state->cbufs[i], i, "CB");
        }
        if (state->zsbuf) {
            r300_print_fb_surf_info(state->zsbuf, 0, "ZB");
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -927,7 +927,8 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx,
 		S_028810_PS_UCP_MODE(3) |
 		S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) |
 		S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) |
-		S_028810_DX_LINEAR_ATTR_CLIP_ENA(1);
+		S_028810_DX_LINEAR_ATTR_CLIP_ENA(1) |
+		S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard);
 	rs->multisample_enable = state->multisample;

 	/* offset */
@@ -996,7 +997,6 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx,
 						  state->fill_back != PIPE_POLYGON_MODE_FILL) |
 			       S_028814_POLYMODE_FRONT_PTYPE(r600_translate_fill(state->fill_front)) |
 			       S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back)));
-	r600_store_context_reg(&rs->buffer, R_028350_SX_MISC, S_028350_MULTIPASS(state->rasterizer_discard));
 	return rs;
 }

@@ -1097,7 +1097,8 @@ struct pipe_sampler_view *
 evergreen_create_sampler_view_custom(struct pipe_context *ctx,
 				     struct pipe_resource *texture,
 				     const struct pipe_sampler_view *state,
-				     unsigned width0, unsigned height0)
+				     unsigned width0, unsigned height0,
+				     unsigned force_level)
 {
 	struct r600_screen *rscreen = (struct r600_screen*)ctx->screen;
 	struct r600_pipe_sampler_view *view = CALLOC_STRUCT(r600_pipe_sampler_view);
@@ -1109,6 +1110,8 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
 	unsigned macro_aspect, tile_split, bankh, bankw, nbanks, fmask_bankh;
 	enum pipe_format pipe_format = state->format;
 	struct radeon_surface_level *surflevel;
+	unsigned base_level, first_level, last_level;
+	uint64_t va;

 	if (view == NULL)
 		return NULL;
@@ -1165,13 +1168,26 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,

 	endian = r600_colorformat_endian_swap(format);

+	base_level = 0;
+	first_level = state->u.tex.first_level;
+	last_level = state->u.tex.last_level;
 	width = width0;
 	height = height0;
 	depth = texture->depth0;
-	pitch = surflevel[0].nblk_x * util_format_get_blockwidth(pipe_format);
+
+	if (force_level) {
+		base_level = force_level;
+		first_level = 0;
+		last_level = 0;
+		width = u_minify(width, force_level);
+		height = u_minify(height, force_level);
+		depth = u_minify(depth, force_level);
+	}
+
+	pitch = surflevel[base_level].nblk_x * util_format_get_blockwidth(pipe_format);
 	non_disp_tiling = tmp->non_disp_tiling;

-	switch (surflevel[0].mode) {
+	switch (surflevel[base_level].mode) {
 	case RADEON_SURF_MODE_LINEAR_ALIGNED:
 		array_mode = V_028C70_ARRAY_LINEAR_ALIGNED;
 		break;
@@ -1210,6 +1226,8 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
 	} else if (texture->target == PIPE_TEXTURE_CUBE_ARRAY)
 		depth = texture->array_size / 6;

+	va = r600_resource_va(ctx->screen, texture);
+
 	view->tex_resource = &tmp->resource;
 	view->tex_resource_words[0] = (S_030000_DIM(r600_tex_dim(texture->target, texture->nr_samples)) |
 				       S_030000_PITCH((pitch / 8) - 1) |
@@ -1221,7 +1239,7 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
 	view->tex_resource_words[1] = (S_030004_TEX_HEIGHT(height - 1) |
 				       S_030004_TEX_DEPTH(depth - 1) |
 				       S_030004_ARRAY_MODE(array_mode));
-	view->tex_resource_words[2] = (surflevel[0].offset + r600_resource_va(ctx->screen, texture)) >> 8;
+	view->tex_resource_words[2] = (surflevel[base_level].offset + va) >> 8;

 	/* TEX_RESOURCE_WORD3.MIP_ADDRESS */
 	if (texture->nr_samples > 1 && rscreen->has_compressed_msaa_texturing) {
@@ -1231,12 +1249,12 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
 			view->skip_mip_address_reloc = true;
 		} else {
 			/* FMASK should be in MIP_ADDRESS for multisample textures */
-			view->tex_resource_words[3] = (tmp->fmask.offset + r600_resource_va(ctx->screen, texture)) >> 8;
+			view->tex_resource_words[3] = (tmp->fmask.offset + va) >> 8;
 		}
-	} else if (state->u.tex.last_level && texture->nr_samples <= 1) {
-		view->tex_resource_words[3] = (surflevel[1].offset + r600_resource_va(ctx->screen, texture)) >> 8;
+	} else if (last_level && texture->nr_samples <= 1) {
+		view->tex_resource_words[3] = (surflevel[1].offset + va) >> 8;
 	} else {
-		view->tex_resource_words[3] = (surflevel[0].offset + r600_resource_va(ctx->screen, texture)) >> 8;
+		view->tex_resource_words[3] = (surflevel[base_level].offset + va) >> 8;
 	}

 	view->tex_resource_words[4] = (word4 |
@@ -1255,8 +1273,8 @@ evergreen_create_sampler_view_custom(struct pipe_context *ctx,
 		view->tex_resource_words[5] |= S_030014_LAST_LEVEL(log_samples);
 		view->tex_resource_words[6] |= S_030018_FMASK_BANK_HEIGHT(fmask_bankh);
 	} else {
-		view->tex_resource_words[4] |= S_030010_BASE_LEVEL(state->u.tex.first_level);
-		view->tex_resource_words[5] |= S_030014_LAST_LEVEL(state->u.tex.last_level);
+		view->tex_resource_words[4] |= S_030010_BASE_LEVEL(first_level);
+		view->tex_resource_words[5] |= S_030014_LAST_LEVEL(last_level);
 		/* aniso max 16 samples */
 		view->tex_resource_words[6] |= S_030018_MAX_ANISO(4);
 	}
@@ -1277,7 +1295,7 @@ evergreen_create_sampler_view(struct pipe_context *ctx,
 			      const struct pipe_sampler_view *state)
 {
 	return evergreen_create_sampler_view_custom(ctx, tex, state,
-						    tex->width0, tex->height0);
+						    tex->width0, tex->height0, 0);
 }

 static void evergreen_emit_clip_state(struct r600_context *rctx, struct r600_atom *atom)
@@ -1824,7 +1842,10 @@ static void evergreen_set_framebuffer_state(struct pipe_context *ctx,
 	}

 	log_samples = util_logbase2(rctx->framebuffer.nr_samples);
-	if (rctx->b.chip_class == CAYMAN && rctx->db_misc_state.log_samples != log_samples) {
+	/* This is for Cayman to program SAMPLE_RATE, and for RV770 to fix a hw bug. */
+	if ((rctx->b.chip_class == CAYMAN ||
+	     rctx->b.family == CHIP_RV770) &&
+	    rctx->db_misc_state.log_samples != log_samples) {
 		rctx->db_misc_state.log_samples = log_samples;
 		rctx->db_misc_state.atom.dirty = true;
 	}
@@ -2809,7 +2830,9 @@ void cayman_init_common_regs(struct r600_command_buffer *cb,

 	r600_store_context_reg(cb, R_028A4C_PA_SC_MODE_CNTL_1, 0);

-	r600_store_context_reg(cb, R_028354_SX_SURFACE_SYNC, S_028354_SURFACE_SYNC_MASK(0xf));
+	r600_store_context_reg_seq(cb, R_028350_SX_MISC, 2);
+	r600_store_value(cb, 0);
+	r600_store_value(cb, S_028354_SURFACE_SYNC_MASK(0xf));

 	r600_store_context_reg(cb, R_028800_DB_DEPTH_CONTROL, 0);
 }
@@ -3085,7 +3108,9 @@ void evergreen_init_common_regs(struct r600_command_buffer *cb,
 	/* The cs checker requires this register to be set. */
 	r600_store_context_reg(cb, R_028800_DB_DEPTH_CONTROL, 0);

-	r600_store_context_reg(cb, R_028354_SX_SURFACE_SYNC, S_028354_SURFACE_SYNC_MASK(0xf));
+	r600_store_context_reg_seq(cb, R_028350_SX_MISC, 2);
+	r600_store_value(cb, 0);
+	r600_store_value(cb, S_028354_SURFACE_SYNC_MASK(0xf));

 	return;
 }
@@ -3603,30 +3628,6 @@ void evergreen_update_es_state(struct pipe_context *ctx, struct r600_pipe_shader
 	/* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */
 }

-static unsigned r600_conv_prim_to_gs_out(unsigned mode)
-{
-	static const int prim_conv[] = {
-		V_028A6C_OUTPRIM_TYPE_POINTLIST,
-		V_028A6C_OUTPRIM_TYPE_LINESTRIP,
-		V_028A6C_OUTPRIM_TYPE_LINESTRIP,
-		V_028A6C_OUTPRIM_TYPE_LINESTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_LINESTRIP,
-		V_028A6C_OUTPRIM_TYPE_LINESTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP
-	};
-	assert(mode < Elements(prim_conv));
-
-	return prim_conv[mode];
-}
-
 void evergreen_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader)
 {
 	struct r600_context *rctx = (struct r600_context *)ctx;
@@ -3727,6 +3728,7 @@ void evergreen_update_vs_state(struct pipe_context *ctx, struct r600_pipe_shader
 		S_02881C_VS_OUT_CCDIST1_VEC_ENA((rshader->clip_dist_write & 0xF0) != 0) |
 		S_02881C_VS_OUT_MISC_VEC_ENA(rshader->vs_out_misc_write) |
 		S_02881C_USE_VTX_POINT_SIZE(rshader->vs_out_point_size) |
+		S_02881C_USE_VTX_EDGE_FLAG(rshader->vs_out_edgeflag) |
 		S_02881C_USE_VTX_RENDER_TARGET_INDX(rshader->vs_out_layer);
 }

--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -582,6 +582,9 @@
 #define   S_028810_VTX_KILL_OR(x)                      (((x) & 0x1) << 21)
 #define   G_028810_VTX_KILL_OR(x)                      (((x) >> 21) & 0x1)
 #define   C_028810_VTX_KILL_OR                         0xFFDFFFFF
+#define   S_028810_DX_RASTERIZATION_KILL(x)            (((x) & 0x1) << 22)
+#define   G_028810_DX_RASTERIZATION_KILL(x)            (((x) >> 22) & 0x1)
+#define   C_028810_DX_RASTERIZATION_KILL               0xFFBFFFFF
 #define   S_028810_DX_LINEAR_ATTR_CLIP_ENA(x)          (((x) & 0x1) << 24)
 #define   G_028810_DX_LINEAR_ATTR_CLIP_ENA(x)          (((x) >> 24) & 0x1)
 #define   C_028810_DX_LINEAR_ATTR_CLIP_ENA             0xFEFFFFFF
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -599,6 +599,12 @@ static void r600_copy_buffer(struct pipe_context *ctx, struct pipe_resource *dst
 	} else {
 		util_resource_copy_region(ctx, dst, 0, dstx, 0, 0, src, 0, src_box);
 	}
+
+	/* The index buffer (VGT) doesn't seem to see the result of the copying.
+	 * Can we somehow flush the index buffer cache? Starting a new IB seems
+	 * to do the trick. */
+	if (rctx->b.chip_class <= R700)
+		rctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC);
 }

 /**
@@ -679,6 +685,7 @@ static void r600_resource_copy_region(struct pipe_context *ctx,
 	struct pipe_surface *dst_view, dst_templ;
 	struct pipe_sampler_view src_templ, *src_view;
 	unsigned dst_width, dst_height, src_width0, src_height0, src_widthFL, src_heightFL;
+	unsigned src_force_level = 0;
 	struct pipe_box sbox, dstbox;

 	/* Handle buffers first. */
@@ -737,6 +744,8 @@ static void r600_resource_copy_region(struct pipe_context *ctx,
 		sbox.height = util_format_get_nblocksy(src->format, src_box->height);
 		sbox.depth = src_box->depth;
 		src_box = &sbox;
+
+		src_force_level = src_level;
 	} else if (!util_blitter_is_copy_supported(rctx->blitter, dst, src)) {
 		if (util_format_is_subsampled_2x1_32bpp(src->format)) {

@@ -789,7 +798,8 @@ static void r600_resource_copy_region(struct pipe_context *ctx,

 	if (rctx->b.chip_class >= EVERGREEN) {
 		src_view = evergreen_create_sampler_view_custom(ctx, src, &src_templ,
-								src_width0, src_height0);
+								src_width0, src_height0,
+								src_force_level);
 	} else {
 		src_view = r600_create_sampler_view_custom(ctx, src, &src_templ,
 							   src_widthFL, src_heightFL);
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -81,7 +81,7 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
 	}

 	/* SX_MISC */
-	if (ctx->b.chip_class <= R700) {
+	if (ctx->b.chip_class == R600) {
 		num_dw += 3;
 	}

@@ -210,6 +210,15 @@ void r600_flush_emit(struct r600_context *rctx)
 				S_0085F0_SMX_ACTION_ENA(1);
 	}

+	/* Workaround for buggy flushing on some R6xx chipsets. */
+	if (rctx->b.flags & R600_CONTEXT_FLUSH_AND_INV &&
+	    (rctx->b.family == CHIP_RV670 ||
+	     rctx->b.family == CHIP_RS780 ||
+	     rctx->b.family == CHIP_RS880)) {
+		cp_coher_cntl |=  S_0085F0_CB1_DEST_BASE_ENA(1) |
+				  S_0085F0_DEST_BASE_0_ENA(1);
+	}
+
 	if (cp_coher_cntl) {
 		cs->buf[cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0);
 		cs->buf[cs->cdw++] = cp_coher_cntl;   /* CP_COHER_CNTL */
@@ -260,7 +269,7 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
 	r600_flush_emit(ctx);

 	/* old kernels and userspace don't set SX_MISC, so we must reset it to 0 here */
-	if (ctx->b.chip_class <= R700) {
+	if (ctx->b.chip_class == R600) {
 		r600_write_context_reg(cs, R_028350_SX_MISC, 0);
 	}

--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -44,7 +44,7 @@
 static const struct debug_named_value r600_debug_options[] = {
 	/* features */
 #if defined(R600_USE_LLVM)
-	{ "nollvm", DBG_NO_LLVM, "Disable the LLVM shader compiler" },
+	{ "llvm", DBG_LLVM, "Enable the LLVM shader compiler" },
 #endif
 	{ "nocpdma", DBG_NO_CP_DMA, "Disable CP DMA" },
 	{ "nodma", DBG_NO_ASYNC_DMA, "Disable asynchronous DMA" },
@@ -347,7 +347,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
 	case PIPE_CAP_USER_INDEX_BUFFERS:
 	case PIPE_CAP_USER_CONSTANT_BUFFERS:
-	case PIPE_CAP_COMPUTE:
 	case PIPE_CAP_START_INSTANCE:
 	case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
 	case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
@@ -356,6 +355,9 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 	case PIPE_CAP_TEXTURE_MULTISAMPLE:
 		return 1;

+	case PIPE_CAP_COMPUTE:
+		return rscreen->b.chip_class > R700;
+
 	case PIPE_CAP_TGSI_TEXCOORD:
 		return 0;

@@ -417,15 +419,17 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)

 	/* Texturing. */
 	case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
-	case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
 	case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
 		if (family >= CHIP_CEDAR)
 			return 15;
 		else
 			return 14;
+	case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+		/* textures support 8192, but layered rendering supports 2048 */
+		return 12;
 	case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
-		return rscreen->b.info.drm_minor >= 9 ?
-			(family >= CHIP_CEDAR ? 16384 : 8192) : 0;
+		/* textures support 8192, but layered rendering supports 2048 */
+		return rscreen->b.info.drm_minor >= 9 ? 2048 : 0;
 	case PIPE_CAP_MAX_COMBINED_SAMPLERS:
 		return 48;

@@ -587,8 +591,8 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws)
 		rscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS;
 	if (debug_get_bool_option("R600_HYPERZ", FALSE))
 		rscreen->b.debug_flags |= DBG_HYPERZ;
-	if (!debug_get_bool_option("R600_LLVM", TRUE))
-		rscreen->b.debug_flags |= DBG_NO_LLVM;
+	if (debug_get_bool_option("R600_LLVM", FALSE))
+		rscreen->b.debug_flags |= DBG_LLVM;

 	if (rscreen->b.family == CHIP_UNKNOWN) {
 		fprintf(stderr, "r600: Unknown chipset 0x%04X\n", rscreen->b.info.pci_id);
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -42,7 +42,7 @@

 /* the number of CS dwords for flushing and drawing */
 #define R600_MAX_FLUSH_CS_DWORDS	16
-#define R600_MAX_DRAW_CS_DWORDS		34
+#define R600_MAX_DRAW_CS_DWORDS		37
 #define R600_TRACE_CS_DWORDS		7

 #define R600_MAX_USER_CONST_BUFFERS 13
@@ -195,7 +195,7 @@ struct r600_gs_rings_state {

 /* This must start from 16. */
 /* features */
-#define DBG_NO_LLVM		(1 << 17)
+#define DBG_LLVM		(1 << 17)
 #define DBG_NO_CP_DMA		(1 << 18)
 #define DBG_NO_ASYNC_DMA	(1 << 19)
 /* shader backend */
@@ -235,6 +235,7 @@ struct r600_rasterizer_state {
 	unsigned                        clip_plane_enable;
 	unsigned			pa_sc_line_stipple;
 	unsigned			pa_cl_clip_cntl;
+	unsigned			pa_su_sc_mode_cntl;
 	float				offset_units;
 	float				offset_scale;
 	bool				offset_enable;
@@ -511,7 +512,8 @@ struct pipe_sampler_view *
 evergreen_create_sampler_view_custom(struct pipe_context *ctx,
 				     struct pipe_resource *texture,
 				     const struct pipe_sampler_view *state,
-				     unsigned width0, unsigned height0);
+				     unsigned width0, unsigned height0,
+				     unsigned force_level);
 void evergreen_init_common_regs(struct r600_command_buffer *cb,
 				enum chip_class ctx_chip_class,
 				enum radeon_family ctx_family,
@@ -825,15 +827,6 @@ static INLINE uint32_t S_FIXED(float value, uint32_t frac_bits)
 }
 #define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y))

-static inline unsigned r600_tex_aniso_filter(unsigned filter)
-{
-	if (filter <= 1)   return 0;
-	if (filter <= 2)   return 1;
-	if (filter <= 4)   return 2;
-	if (filter <= 8)   return 3;
-	 /* else */        return 4;
-}
-
 /* 12.4 fixed-point */
 static INLINE unsigned r600_pack_float_12p4(float x)
 {
@@ -841,4 +834,32 @@ static INLINE unsigned r600_pack_float_12p4(float x)
 	       x >= 4096 ? 0xffff : x * 16;
 }

+#define     V_028A6C_OUTPRIM_TYPE_POINTLIST            0
+#define     V_028A6C_OUTPRIM_TYPE_LINESTRIP            1
+#define     V_028A6C_OUTPRIM_TYPE_TRISTRIP             2
+
+static INLINE unsigned r600_conv_prim_to_gs_out(unsigned mode)
+{
+	static const int prim_conv[] = {
+		V_028A6C_OUTPRIM_TYPE_POINTLIST,
+		V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+		V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+		V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+		V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+		V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+		V_028A6C_OUTPRIM_TYPE_TRISTRIP
+	};
+	assert(mode < Elements(prim_conv));
+
+	return prim_conv[mode];
+}
+
 #endif
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -155,7 +155,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
 	r = r600_shader_from_tgsi(rctx, shader, key);
 	if (r) {
 		R600_ERR("translation from TGSI failed !\n");
-		return r;
+		goto error;
 	}

 	/* disable SB for geom shaders - it can't handle the CF_EMIT instructions */
@@ -169,7 +169,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
 		r = r600_bytecode_build(&shader->shader.bc);
 		if (r) {
 			R600_ERR("building bytecode failed !\n");
-			return r;
+			goto error;
 		}
 	}

@@ -182,7 +182,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
 		                             dump, use_sb);
 		if (r) {
 			R600_ERR("r600_sb_bytecode_process failed !\n");
-			return r;
+			goto error;
 		}
 	}

@@ -192,16 +192,16 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
 			r = r600_sb_bytecode_process(rctx, &shader->gs_copy_shader->shader.bc,
 						     &shader->gs_copy_shader->shader, dump, 0);
 			if (r)
-				return r;
+				goto error;
 		}

 		if ((r = store_shader(ctx, shader->gs_copy_shader)))
-			return r;
+			goto error;
 	}

 	/* Store the shader in a buffer. */
 	if ((r = store_shader(ctx, shader)))
-		return r;
+		goto error;

 	/* Build state. */
 	switch (shader->shader.processor_type) {
@@ -235,9 +235,14 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
 		}
 		break;
 	default:
-		return -EINVAL;
+		r = -EINVAL;
+		goto error;
 	}
 	return 0;
+
+error:
+	r600_pipe_shader_destroy(ctx, shader);
+	return r;
 }

 void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader)
@@ -286,6 +291,7 @@ struct r600_shader_ctx {
 	int					colors_used;
 	boolean                 clip_vertex_write;
 	unsigned                cv_output;
+	unsigned		edgeflag_output;
 	int					fragcoord_input;
 	int					native_integers;
 	int					next_ring_offset;
@@ -490,9 +496,10 @@ static int r600_spi_sid(struct r600_shader_io * io)
 	 * semantic indices, so we'll use 0 for them.
 	 */
 	if (name == TGSI_SEMANTIC_POSITION ||
-		name == TGSI_SEMANTIC_PSIZE ||
-		name == TGSI_SEMANTIC_LAYER ||
-		name == TGSI_SEMANTIC_FACE)
+	    name == TGSI_SEMANTIC_PSIZE ||
+	    name == TGSI_SEMANTIC_EDGEFLAG ||
+	    name == TGSI_SEMANTIC_LAYER ||
+	    name == TGSI_SEMANTIC_FACE)
 		index = 0;
 	else {
 		if (name == TGSI_SEMANTIC_GENERIC) {
@@ -623,6 +630,11 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
 				ctx->shader->vs_out_misc_write = 1;
 				ctx->shader->vs_out_point_size = 1;
 				break;
+			case TGSI_SEMANTIC_EDGEFLAG:
+				ctx->shader->vs_out_misc_write = 1;
+				ctx->shader->vs_out_edgeflag = 1;
+				ctx->edgeflag_output = i;
+				break;
 			case TGSI_SEMANTIC_LAYER:
 				ctx->shader->vs_out_misc_write = 1;
 				ctx->shader->vs_out_layer = 1;
@@ -1157,6 +1169,35 @@ out_err:
 	return r;
 }

+static void convert_edgeflag_to_int(struct r600_shader_ctx *ctx)
+{
+	struct r600_bytecode_alu alu;
+	unsigned reg;
+
+	if (!ctx->shader->vs_out_edgeflag)
+		return;
+
+	reg = ctx->shader->output[ctx->edgeflag_output].gpr;
+
+	/* clamp(x, 0, 1) */
+	memset(&alu, 0, sizeof(alu));
+	alu.op = ALU_OP1_MOV;
+	alu.src[0].sel = reg;
+	alu.dst.sel = reg;
+	alu.dst.write = 1;
+	alu.dst.clamp = 1;
+	alu.last = 1;
+	r600_bytecode_add_alu(ctx->bc, &alu);
+
+	memset(&alu, 0, sizeof(alu));
+	alu.op = ALU_OP1_FLT_TO_INT;
+	alu.src[0].sel = reg;
+	alu.dst.sel = reg;
+	alu.dst.write = 1;
+	alu.last = 1;
+	r600_bytecode_add_alu(ctx->bc, &alu);
+}
+
 static int generate_gs_copy_shader(struct r600_context *rctx,
 				   struct r600_pipe_shader *gs,
 				   struct pipe_stream_output_info *so)
@@ -1480,7 +1521,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 	bool pos_emitted = false;

 #ifdef R600_USE_LLVM
-	use_llvm = !(rscreen->b.debug_flags & DBG_NO_LLVM);
+	use_llvm = rscreen->b.debug_flags & DBG_LLVM;
 #endif
 	ctx.bc = &shader->bc;
 	ctx.shader = shader;
@@ -1901,6 +1942,8 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 	    so.num_outputs && !use_llvm)
 		emit_streamout(&ctx, &so);

+	convert_edgeflag_to_int(&ctx);
+
 	if (ring_outputs) {
 		if (key.vs_as_es)
 			emit_gs_ring_writes(&ctx, FALSE);
@@ -1936,6 +1979,15 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 					output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
 					pos_emitted = true;
 					break;
+				case TGSI_SEMANTIC_EDGEFLAG:
+					output[j].array_base = 61;
+					output[j].swizzle_x = 7;
+					output[j].swizzle_y = 0;
+					output[j].swizzle_z = 7;
+					output[j].swizzle_w = 7;
+					output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+					pos_emitted = true;
+					break;
 				case TGSI_SEMANTIC_LAYER:
 					output[j].array_base = 61;
 					output[j].swizzle_x = 7;
@@ -4338,11 +4390,16 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 				    (inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
 				     inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA);

+	bool txf_add_offsets = inst->Texture.NumOffsets &&
+			     inst->Instruction.Opcode == TGSI_OPCODE_TXF &&
+			     inst->Texture.Texture != TGSI_TEXTURE_BUFFER;
+
 	/* Texture fetch instructions can only use gprs as source.
 	 * Also they cannot negate the source or take the absolute value */
 	const boolean src_requires_loading = (inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ &&
                                              tgsi_tex_src_requires_loading(ctx, 0)) ||
-					     read_compressed_msaa;
+					     read_compressed_msaa || txf_add_offsets;
+
 	boolean src_loaded = FALSE;
 	unsigned sampler_src_reg = inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ? 0 : 1;
 	int8_t offset_x = 0, offset_y = 0, offset_z = 0;
@@ -4375,15 +4432,6 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 		}
 	}

-	/* get offset values */
-	if (inst->Texture.NumOffsets) {
-		assert(inst->Texture.NumOffsets == 1);
-
-		offset_x = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1;
-		offset_y = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1;
-		offset_z = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1;
-	}
-
 	if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
 		/* TGSI moves the sampler to src reg 3 for TXD */
 		sampler_src_reg = 3;
@@ -4736,6 +4784,77 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 		src_gpr = ctx->temp_reg;
 	}

+	/* get offset values */
+	if (inst->Texture.NumOffsets) {
+		assert(inst->Texture.NumOffsets == 1);
+
+		if (txf_add_offsets) {
+			/* Add the offsets for texelFetch manually. */
+			const struct tgsi_texture_offset *off = inst->TexOffsets;
+
+			switch (inst->Texture.Texture) {
+			case TGSI_TEXTURE_3D:
+				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+				alu.op = ALU_OP2_ADD_INT;
+				alu.src[0].sel = src_gpr;
+				alu.src[0].chan = 2;
+				alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+				alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleZ];
+				alu.dst.sel = src_gpr;
+				alu.dst.chan = 2;
+				alu.dst.write = 1;
+				alu.last = 1;
+				r = r600_bytecode_add_alu(ctx->bc, &alu);
+				if (r)
+					return r;
+				/* fall through */
+
+			case TGSI_TEXTURE_2D:
+			case TGSI_TEXTURE_SHADOW2D:
+			case TGSI_TEXTURE_RECT:
+			case TGSI_TEXTURE_SHADOWRECT:
+			case TGSI_TEXTURE_2D_ARRAY:
+			case TGSI_TEXTURE_SHADOW2D_ARRAY:
+				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+				alu.op = ALU_OP2_ADD_INT;
+				alu.src[0].sel = src_gpr;
+				alu.src[0].chan = 1;
+				alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+				alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleY];
+				alu.dst.sel = src_gpr;
+				alu.dst.chan = 1;
+				alu.dst.write = 1;
+				alu.last = 1;
+				r = r600_bytecode_add_alu(ctx->bc, &alu);
+				if (r)
+					return r;
+				/* fall through */
+
+			case TGSI_TEXTURE_1D:
+			case TGSI_TEXTURE_SHADOW1D:
+			case TGSI_TEXTURE_1D_ARRAY:
+			case TGSI_TEXTURE_SHADOW1D_ARRAY:
+				memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+				alu.op = ALU_OP2_ADD_INT;
+				alu.src[0].sel = src_gpr;
+				alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+				alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleX];
+				alu.dst.sel = src_gpr;
+				alu.dst.write = 1;
+				alu.last = 1;
+				r = r600_bytecode_add_alu(ctx->bc, &alu);
+				if (r)
+					return r;
+				break;
+				/* texture offsets do not apply to other texture targets */
+			}
+		} else {
+			offset_x = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1;
+			offset_y = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1;
+			offset_z = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1;
+		}
+	}
+
 	/* Obtain the sample index for reading a compressed MSAA color texture.
 	 * To read the FMASK, we use the ldfptr instruction, which tells us
 	 * where the samples are stored.
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -63,6 +63,7 @@ struct r600_shader {
 	boolean			vs_out_misc_write;
 	boolean			vs_out_point_size;
 	boolean			vs_out_layer;
+	boolean			vs_out_edgeflag;
 	boolean			has_txq_cube_array_z_comp;
 	boolean			uses_tex_buffers;
 	boolean                 gs_prim_id_input;
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -911,6 +911,10 @@ static void *r600_create_rs_state(struct pipe_context *ctx,
 		S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) |
 		S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) |
 		S_028810_DX_LINEAR_ATTR_CLIP_ENA(1);
+	if (rctx->b.chip_class == R700) {
+		rs->pa_cl_clip_cntl |=
+			S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard);
+	}
 	rs->multisample_enable = state->multisample;

 	/* offset */
@@ -968,19 +972,25 @@ static void *r600_create_rs_state(struct pipe_context *ctx,
 			       S_028C08_PIX_CENTER_HALF(state->half_pixel_center) |
 			       S_028C08_QUANT_MODE(V_028C08_X_1_256TH));
 	r600_store_context_reg(&rs->buffer, R_028DFC_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp));
-	r600_store_context_reg(&rs->buffer, R_028814_PA_SU_SC_MODE_CNTL,
-			       S_028814_PROVOKING_VTX_LAST(!state->flatshade_first) |
-			       S_028814_CULL_FRONT(state->cull_face & PIPE_FACE_FRONT ? 1 : 0) |
-			       S_028814_CULL_BACK(state->cull_face & PIPE_FACE_BACK ? 1 : 0) |
-			       S_028814_FACE(!state->front_ccw) |
-			       S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) |
-			       S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) |
-			       S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri) |
-			       S_028814_POLY_MODE(state->fill_front != PIPE_POLYGON_MODE_FILL ||
-						  state->fill_back != PIPE_POLYGON_MODE_FILL) |
-			       S_028814_POLYMODE_FRONT_PTYPE(r600_translate_fill(state->fill_front)) |
-			       S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back)));
-	r600_store_context_reg(&rs->buffer, R_028350_SX_MISC, S_028350_MULTIPASS(state->rasterizer_discard));
+
+	rs->pa_su_sc_mode_cntl = S_028814_PROVOKING_VTX_LAST(!state->flatshade_first) |
+				 S_028814_CULL_FRONT(state->cull_face & PIPE_FACE_FRONT ? 1 : 0) |
+				 S_028814_CULL_BACK(state->cull_face & PIPE_FACE_BACK ? 1 : 0) |
+				 S_028814_FACE(!state->front_ccw) |
+				 S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) |
+				 S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) |
+				 S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri) |
+				 S_028814_POLY_MODE(state->fill_front != PIPE_POLYGON_MODE_FILL ||
+									 state->fill_back != PIPE_POLYGON_MODE_FILL) |
+				 S_028814_POLYMODE_FRONT_PTYPE(r600_translate_fill(state->fill_front)) |
+				 S_028814_POLYMODE_BACK_PTYPE(r600_translate_fill(state->fill_back));
+	if (rctx->b.chip_class == R700) {
+		r600_store_context_reg(&rs->buffer, R_028814_PA_SU_SC_MODE_CNTL, rs->pa_su_sc_mode_cntl);
+	}
+	if (rctx->b.chip_class == R600) {
+		r600_store_context_reg(&rs->buffer, R_028350_SX_MISC,
+				       S_028350_MULTIPASS(state->rasterizer_discard));
+	}
 	return rs;
 }

@@ -2052,6 +2062,11 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom
 		db_render_control |= S_028D0C_DEPTH_CLEAR_ENABLE(1);
 	}

+	/* RV770 workaround for a hang with 8x MSAA. */
+	if (rctx->b.family == CHIP_RV770 && a->log_samples == 3) {
+		db_render_override |= S_028D10_MAX_TILES_IN_DTT(6);
+	}
+
 	r600_write_context_reg_seq(cs, R_028D0C_DB_RENDER_CONTROL, 2);
 	radeon_emit(cs, db_render_control); /* R_028D0C_DB_RENDER_CONTROL */
 	radeon_emit(cs, db_render_override); /* R_028D10_DB_RENDER_OVERRIDE */
@@ -2817,8 +2832,11 @@ void r600_init_atom_start_cs(struct r600_context *rctx)

 	r600_store_context_reg(cb, R_0288A4_SQ_PGM_RESOURCES_FS, 0);

+	if (rctx->b.chip_class == R700)
+		r600_store_context_reg(cb, R_028350_SX_MISC, 0);
 	if (rctx->b.chip_class == R700 && rctx->screen->b.has_streamout)
 		r600_store_context_reg(cb, R_028354_SX_SURFACE_SYNC, S_028354_SURFACE_SYNC_MASK(0xf));
+
 	r600_store_context_reg(cb, R_028800_DB_DEPTH_CONTROL, 0);
 	if (rctx->screen->b.has_streamout) {
 		r600_store_context_reg(cb, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
@@ -2996,31 +3014,9 @@ void r600_update_vs_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
 		S_02881C_VS_OUT_CCDIST0_VEC_ENA((rshader->clip_dist_write & 0x0F) != 0) |
 		S_02881C_VS_OUT_CCDIST1_VEC_ENA((rshader->clip_dist_write & 0xF0) != 0) |
 		S_02881C_VS_OUT_MISC_VEC_ENA(rshader->vs_out_misc_write) |
-		S_02881C_USE_VTX_POINT_SIZE(rshader->vs_out_point_size);
-}
-
-static unsigned r600_conv_prim_to_gs_out(unsigned mode)
-{
-	static const int prim_conv[] = {
-		V_028A6C_OUTPRIM_TYPE_POINTLIST,
-		V_028A6C_OUTPRIM_TYPE_LINESTRIP,
-		V_028A6C_OUTPRIM_TYPE_LINESTRIP,
-		V_028A6C_OUTPRIM_TYPE_LINESTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_LINESTRIP,
-		V_028A6C_OUTPRIM_TYPE_LINESTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP
-	};
-	assert(mode < Elements(prim_conv));
-
-	return prim_conv[mode];
+		S_02881C_USE_VTX_POINT_SIZE(rshader->vs_out_point_size) |
+		S_02881C_USE_VTX_EDGE_FLAG(rshader->vs_out_edgeflag) |
+		S_02881C_USE_VTX_RENDER_TARGET_INDX(rshader->vs_out_layer);
 }

 void r600_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader)
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -1408,6 +1408,25 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
 		r600_emit_atom(rctx, rctx->atoms[i]);
 	}

+	/* On R6xx, CULL_FRONT=1 culls all points, lines, and rectangles,
+	 * even though it should have no effect on those. */
+	if (rctx->b.chip_class == R600 && rctx->rasterizer) {
+		unsigned su_sc_mode_cntl = rctx->rasterizer->pa_su_sc_mode_cntl;
+		unsigned prim = info.mode;
+
+		if (rctx->gs_shader) {
+			prim = rctx->gs_shader->current->shader.gs_output_prim;
+		}
+		prim = r600_conv_prim_to_gs_out(prim); /* decrease the number of types to 3 */
+
+		if (prim == V_028A6C_OUTPRIM_TYPE_POINTLIST ||
+		    prim == V_028A6C_OUTPRIM_TYPE_LINESTRIP ||
+		    info.mode == R600_PRIM_RECTANGLE_LIST) {
+			su_sc_mode_cntl &= C_028814_CULL_FRONT;
+		}
+		r600_write_context_reg(cs, R_028814_PA_SU_SC_MODE_CNTL, su_sc_mode_cntl);
+	}
+
 	/* Update start instance. */
 	if (rctx->last_start_instance != info.start_instance) {
 		r600_write_ctl_const(cs, R_03CFF4_SQ_VTX_START_INST_LOC, info.start_instance);
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -543,6 +543,9 @@
 #define   S_028810_VTX_KILL_OR(x)                      (((x) & 0x1) << 21)
 #define   G_028810_VTX_KILL_OR(x)                      (((x) >> 21) & 0x1)
 #define   C_028810_VTX_KILL_OR                         0xFFDFFFFF
+#define   S_028810_DX_RASTERIZATION_KILL(x)            (((x) & 0x1) << 22) /* R700 only? */
+#define   G_028810_DX_RASTERIZATION_KILL(x)            (((x) >> 22) & 0x1)
+#define   C_028810_DX_RASTERIZATION_KILL               0xFFBFFFFF
 #define   S_028810_DX_LINEAR_ATTR_CLIP_ENA(x)          (((x) & 0x1) << 24)
 #define   G_028810_DX_LINEAR_ATTR_CLIP_ENA(x)          (((x) >> 24) & 0x1)
 #define   C_028810_DX_LINEAR_ATTR_CLIP_ENA             0xFEFFFFFF
@@ -818,6 +821,9 @@
 #define   S_028D10_IGNORE_SC_ZRANGE(x)                 (((x) & 0x1) << 17)
 #define   G_028D10_IGNORE_SC_ZRANGE(x)                 (((x) >> 17) & 0x1)
 #define   C_028D10_IGNORE_SC_ZRANGE                    0xFFFDFFFF
+#define   S_028D10_MAX_TILES_IN_DTT(x)                 (((x) & 0x1F) << 21)
+#define   G_028D10_MAX_TILES_IN_DTT(x)                 (((x) >> 21) & 0x1F)
+#define   C_028D10_MAX_TILES_IN_DTT                    0xFC1FFFFF
 #define R_02880C_DB_SHADER_CONTROL                    0x02880C
 #define   S_02880C_Z_EXPORT_ENABLE(x)                  (((x) & 0x1) << 0)
 #define   G_02880C_Z_EXPORT_ENABLE(x)                  (((x) >> 0) & 0x1)
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -390,6 +390,15 @@ r600_resource_reference(struct r600_resource **ptr, struct r600_resource *res)
 				(struct pipe_resource *)res);
 }

+static inline unsigned r600_tex_aniso_filter(unsigned filter)
+{
+	if (filter <= 1)   return 0;
+	if (filter <= 2)   return 1;
+	if (filter <= 4)   return 2;
+	if (filter <= 8)   return 3;
+	 /* else */        return 4;
+}
+
 #define R600_ERR(fmt, args...) \
 	fprintf(stderr, "EE %s:%d %s - "fmt, __FILE__, __LINE__, __func__, ##args)

--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -296,6 +296,12 @@ void r600_texture_get_fmask_info(struct r600_common_screen *rscreen,
 	fmask.nsamples = 1;
 	fmask.flags |= RADEON_SURF_FMASK;

+	/* Force 2D tiling if it wasn't set. This may occur when creating
+	 * FMASK for MSAA resolve on R6xx. On R6xx, the single-sample
+	 * destination buffer must have an FMASK too. */
+	fmask.flags = RADEON_SURF_CLR(fmask.flags, MODE);
+	fmask.flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
+
 	if (rscreen->chip_class >= SI) {
 		fmask.flags |= RADEON_SURF_HAS_TILE_MODE_INDEX;
 	}
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -151,7 +151,7 @@ static void si_update_descriptors(struct si_context *sctx,
 			7 + /* copy */
 			(4 + desc->element_dw_size) * util_bitcount(desc->dirty_mask) + /* update */
 			4; /* pointer update */
-#if HAVE_LLVM >= 0x0305
+#if LLVM_SUPPORTS_GEOM_SHADERS
 		if (desc->shader_userdata_reg >= R_00B130_SPI_SHADER_USER_DATA_VS_0 &&
 		    desc->shader_userdata_reg < R_00B230_SPI_SHADER_USER_DATA_GS_0)
 			desc->atom.num_dw += 4; /* second pointer update */
@@ -176,7 +176,7 @@ static void si_emit_shader_pointer(struct si_context *sctx,
 	radeon_emit(cs, va);
 	radeon_emit(cs, va >> 32);

-#if HAVE_LLVM >= 0x0305
+#if LLVM_SUPPORTS_GEOM_SHADERS
 	if (desc->shader_userdata_reg >= R_00B130_SPI_SHADER_USER_DATA_VS_0 &&
 	    desc->shader_userdata_reg < R_00B230_SPI_SHADER_USER_DATA_GS_0) {
 		radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 2, 0));
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -269,7 +269,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 		return 256;

 	case PIPE_CAP_GLSL_FEATURE_LEVEL:
-		return HAVE_LLVM >= 0x0305 ? 330 : 140;
+		return (LLVM_SUPPORTS_GEOM_SHADERS) ? 330 : 140;

 	case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
 		return 1;
@@ -307,11 +307,14 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)

 	/* Texturing. */
 	case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
-	case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
 	case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
-			return 15;
+		return 15; /* 16384 */
+	case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+		/* textures support 8192, but layered rendering supports 2048 */
+		return 12;
 	case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
-		return 16384;
+		/* textures support 8192, but layered rendering supports 2048 */
+		return 2048;
 	case PIPE_CAP_MAX_COMBINED_SAMPLERS:
 		return HAVE_LLVM >= 0x0305 ? 48 : 32;

@@ -346,7 +349,7 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
 	case PIPE_SHADER_VERTEX:
 		break;
 	case PIPE_SHADER_GEOMETRY:
-#if HAVE_LLVM < 0x0305
+#if !(LLVM_SUPPORTS_GEOM_SHADERS)
 		return 0;
 #endif
 		break;
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -39,6 +39,10 @@

 #define SI_MAX_DRAW_CS_DWORDS 18

+#define LLVM_SUPPORTS_GEOM_SHADERS \
+	((HAVE_LLVM >= 0x0305) || \
+	(HAVE_LLVM == 0x0304 && LLVM_VERSION_PATCH >= 1))
+
 struct si_pipe_compute;

 struct si_screen {
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2307,7 +2307,7 @@ static void *si_create_fs_state(struct pipe_context *ctx,
 	return si_create_shader_state(ctx, state, PIPE_SHADER_FRAGMENT);
 }

-#if HAVE_LLVM >= 0x0305
+#if LLVM_SUPPORTS_GEOM_SHADERS

 static void *si_create_gs_state(struct pipe_context *ctx,
 				const struct pipe_shader_state *state)
@@ -2337,7 +2337,7 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
 	sctx->vs_shader = sel;
 }

-#if HAVE_LLVM >= 0x0305
+#if LLVM_SUPPORTS_GEOM_SHADERS

 static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
 {
@@ -2396,7 +2396,7 @@ static void si_delete_vs_shader(struct pipe_context *ctx, void *state)
 	si_delete_shader_selector(ctx, sel);
 }

-#if HAVE_LLVM >= 0x0305
+#if LLVM_SUPPORTS_GEOM_SHADERS

 static void si_delete_gs_shader(struct pipe_context *ctx, void *state)
 {
@@ -2723,16 +2723,15 @@ static void *si_create_sampler_state(struct pipe_context *ctx,
 	rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) |
 			  S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) |
 			  S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) |
-			  (state->max_anisotropy & 0x7) << 9 | /* XXX */
+			  r600_tex_aniso_filter(state->max_anisotropy) << 9 |
 			  S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) |
 			  S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) |
-			  aniso_flag_offset << 16 | /* XXX */
 			  S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map));
 	rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) |
 			  S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)));
 	rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) |
-			  S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter)) |
-			  S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter)) |
+			  S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter) | aniso_flag_offset) |
+			  S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter) | aniso_flag_offset) |
 			  S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)));
 	rstate->val[3] = S_008F3C_BORDER_COLOR_TYPE(border_color_type);

@@ -2890,7 +2889,7 @@ static void si_bind_vs_sampler_states(struct pipe_context *ctx, unsigned count,
 	si_set_sampler_states(sctx, pm4, count, states,
 			      &sctx->samplers[PIPE_SHADER_VERTEX],
 			      R_00B130_SPI_SHADER_USER_DATA_VS_0);
-#if HAVE_LLVM >= 0x0305
+#if LLVM_SUPPORTS_GEOM_SHADERS
 	si_set_sampler_states(sctx, pm4, count, states,
 			      &sctx->samplers[PIPE_SHADER_VERTEX],
 			      R_00B330_SPI_SHADER_USER_DATA_ES_0);
@@ -3166,7 +3165,7 @@ void si_init_state_functions(struct si_context *sctx)
 	sctx->b.b.bind_fs_state = si_bind_ps_shader;
 	sctx->b.b.delete_vs_state = si_delete_vs_shader;
 	sctx->b.b.delete_fs_state = si_delete_ps_shader;
-#if HAVE_LLVM >= 0x0305
+#if LLVM_SUPPORTS_GEOM_SHADERS
 	sctx->b.b.create_gs_state = si_create_gs_state;
 	sctx->b.b.bind_gs_state = si_bind_gs_shader;
 	sctx->b.b.delete_gs_state = si_delete_gs_shader;
--- a/src/gallium/drivers/softpipe/sp_texture.c
+++ b/src/gallium/drivers/softpipe/sp_texture.c
@@ -60,7 +60,7 @@ softpipe_resource_layout(struct pipe_screen *screen,
   unsigned width = pt->width0;
   unsigned height = pt->height0;
   unsigned depth = pt->depth0;
-   unsigned buffer_size = 0;
+   uint64_t buffer_size = 0;

   for (level = 0; level <= pt->last_level; level++) {
      unsigned slices;
@@ -76,8 +76,8 @@ softpipe_resource_layout(struct pipe_screen *screen,

      spr->level_offset[level] = buffer_size;

-      buffer_size += (util_format_get_nblocksy(pt->format, height) *
-                      slices * spr->stride[level]);
+      buffer_size += (uint64_t) util_format_get_nblocksy(pt->format, height) *
+                     slices * spr->stride[level];

      width  = u_minify(width, 1);
      height = u_minify(height, 1);
--- a/src/gallium/drivers/svga/Makefile.sources
+++ b/src/gallium/drivers/svga/Makefile.sources
@@ -24,6 +24,7 @@ C_SOURCES := \
 	svga_pipe_vs.c \
 	svga_screen.c \
 	svga_screen_cache.c \
+	svga_shader.c \
 	svga_state.c \
 	svga_state_need_swtnl.c \
 	svga_state_constants.c \
--- a/src/gallium/drivers/svga/include/includeCheck.h
+++ b/src/gallium/drivers/svga/include/includeCheck.h
@@ -0,0 +1 @@
+/* dummy file */
--- a/src/gallium/drivers/svga/include/svga3d_caps.h
+++ b/src/gallium/drivers/svga/include/svga3d_caps.h
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 2007-2009 VMware, Inc.  All rights reserved.
+ * Copyright 2007-2014 VMware, Inc.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
@@ -41,6 +41,14 @@
 #ifndef _SVGA3D_CAPS_H_
 #define _SVGA3D_CAPS_H_

+#define INCLUDE_ALLOW_MODULE
+#define INCLUDE_ALLOW_USERLEVEL
+
+#include "includeCheck.h"
+
+#include <string.h>
+#include "svga_reg.h"
+
 #define SVGA_FIFO_3D_CAPS_SIZE   (SVGA_FIFO_3D_CAPS_LAST - \
                                  SVGA_FIFO_3D_CAPS + 1)

@@ -72,10 +80,12 @@ typedef enum {
 */

 typedef
+#include "vmware_pack_begin.h"
 struct SVGA3dCapsRecordHeader {
   uint32 length;
   SVGA3dCapsRecordType type;
 }
+#include "vmware_pack_end.h"
 SVGA3dCapsRecordHeader;


@@ -89,51 +99,16 @@ SVGA3dCapsRecordHeader;
 */

 typedef
+#include "vmware_pack_begin.h"
 struct SVGA3dCapsRecord {
   SVGA3dCapsRecordHeader header;
   uint32 data[1];
 }
+#include "vmware_pack_end.h"
 SVGA3dCapsRecord;


 typedef uint32 SVGA3dCapPair[2];


-/*
- *----------------------------------------------------------------------
- *
- * SVGA3dCaps_FindRecord
- *
- *    Finds the record with the highest-valued type within the given range
- *    in the caps block.
- *
- *    Result: pointer to found record, or NULL if not found.
- *
- *----------------------------------------------------------------------
- */
-
-static INLINE SVGA3dCapsRecord *
-SVGA3dCaps_FindRecord(const uint32 *capsBlock,
-                      SVGA3dCapsRecordType recordTypeMin,
-                      SVGA3dCapsRecordType recordTypeMax)
-{
-   SVGA3dCapsRecord *record, *found = NULL;
-   uint32 offset;
-
-   /*
-    * Search linearly through the caps block records for the specified type.
-    */
-   for (offset = 0; capsBlock[offset] != 0; offset += capsBlock[offset]) {
-      record = (SVGA3dCapsRecord *) (capsBlock + offset);
-      if ((record->header.type >= recordTypeMin) &&
-          (record->header.type <= recordTypeMax) &&
-          (!found || (record->header.type > found->header.type))) {
-         found = record;
-      }
-   }
-
-   return found;
-}
-
-
 #endif // _SVGA3D_CAPS_H_
--- a/src/gallium/drivers/svga/include/svga3d_cmd.h
+++ b/src/gallium/drivers/svga/include/svga3d_cmd.h
--- a/src/gallium/drivers/svga/include/svga3d_devcaps.h
+++ b/src/gallium/drivers/svga/include/svga3d_devcaps.h
@@ -0,0 +1,236 @@
+/**********************************************************
+ * Copyright 1998-2014 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/*
+ * svga3d_devcaps.h --
+ *
+ *       SVGA 3d caps definitions
+ */
+
+#ifndef _SVGA3D_DEVCAPS_H_
+#define _SVGA3D_DEVCAPS_H_
+
+#define INCLUDE_ALLOW_MODULE
+#define INCLUDE_ALLOW_USERLEVEL
+#define INCLUDE_ALLOW_VMCORE
+
+#include "includeCheck.h"
+
+/*
+ * 3D Hardware Version
+ *
+ *   The hardware version is stored in the SVGA_FIFO_3D_HWVERSION fifo
+ *   register.   Is set by the host and read by the guest.  This lets
+ *   us make new guest drivers which are backwards-compatible with old
+ *   SVGA hardware revisions.  It does not let us support old guest
+ *   drivers.  Good enough for now.
+ *
+ */
+
+#define SVGA3D_MAKE_HWVERSION(major, minor)      (((major) << 16) | ((minor) & 0xFF))
+#define SVGA3D_MAJOR_HWVERSION(version)          ((version) >> 16)
+#define SVGA3D_MINOR_HWVERSION(version)          ((version) & 0xFF)
+
+typedef enum {
+   SVGA3D_HWVERSION_WS5_RC1   = SVGA3D_MAKE_HWVERSION(0, 1),
+   SVGA3D_HWVERSION_WS5_RC2   = SVGA3D_MAKE_HWVERSION(0, 2),
+   SVGA3D_HWVERSION_WS51_RC1  = SVGA3D_MAKE_HWVERSION(0, 3),
+   SVGA3D_HWVERSION_WS6_B1    = SVGA3D_MAKE_HWVERSION(1, 1),
+   SVGA3D_HWVERSION_FUSION_11 = SVGA3D_MAKE_HWVERSION(1, 4),
+   SVGA3D_HWVERSION_WS65_B1   = SVGA3D_MAKE_HWVERSION(2, 0),
+   SVGA3D_HWVERSION_WS8_B1    = SVGA3D_MAKE_HWVERSION(2, 1),
+   SVGA3D_HWVERSION_CURRENT   = SVGA3D_HWVERSION_WS8_B1,
+} SVGA3dHardwareVersion;
+
+/*
+ * DevCap indexes.
+ */
+
+typedef enum {
+   SVGA3D_DEVCAP_INVALID                           = ((uint32)-1),
+   SVGA3D_DEVCAP_3D                                = 0,
+   SVGA3D_DEVCAP_MAX_LIGHTS                        = 1,
+
+   /*
+    * SVGA3D_DEVCAP_MAX_TEXTURES reflects the maximum number of
+    * fixed-function texture units available. Each of these units
+    * work in both FFP and Shader modes, and they support texture
+    * transforms and texture coordinates. The host may have additional
+    * texture image units that are only usable with shaders.
+    */
+   SVGA3D_DEVCAP_MAX_TEXTURES                      = 2,
+   SVGA3D_DEVCAP_MAX_CLIP_PLANES                   = 3,
+   SVGA3D_DEVCAP_VERTEX_SHADER_VERSION             = 4,
+   SVGA3D_DEVCAP_VERTEX_SHADER                     = 5,
+   SVGA3D_DEVCAP_FRAGMENT_SHADER_VERSION           = 6,
+   SVGA3D_DEVCAP_FRAGMENT_SHADER                   = 7,
+   SVGA3D_DEVCAP_MAX_RENDER_TARGETS                = 8,
+   SVGA3D_DEVCAP_S23E8_TEXTURES                    = 9,
+   SVGA3D_DEVCAP_S10E5_TEXTURES                    = 10,
+   SVGA3D_DEVCAP_MAX_FIXED_VERTEXBLEND             = 11,
+   SVGA3D_DEVCAP_D16_BUFFER_FORMAT                 = 12,
+   SVGA3D_DEVCAP_D24S8_BUFFER_FORMAT               = 13,
+   SVGA3D_DEVCAP_D24X8_BUFFER_FORMAT               = 14,
+   SVGA3D_DEVCAP_QUERY_TYPES                       = 15,
+   SVGA3D_DEVCAP_TEXTURE_GRADIENT_SAMPLING         = 16,
+   SVGA3D_DEVCAP_MAX_POINT_SIZE                    = 17,
+   SVGA3D_DEVCAP_MAX_SHADER_TEXTURES               = 18,
+   SVGA3D_DEVCAP_MAX_TEXTURE_WIDTH                 = 19,
+   SVGA3D_DEVCAP_MAX_TEXTURE_HEIGHT                = 20,
+   SVGA3D_DEVCAP_MAX_VOLUME_EXTENT                 = 21,
+   SVGA3D_DEVCAP_MAX_TEXTURE_REPEAT                = 22,
+   SVGA3D_DEVCAP_MAX_TEXTURE_ASPECT_RATIO          = 23,
+   SVGA3D_DEVCAP_MAX_TEXTURE_ANISOTROPY            = 24,
+   SVGA3D_DEVCAP_MAX_PRIMITIVE_COUNT               = 25,
+   SVGA3D_DEVCAP_MAX_VERTEX_INDEX                  = 26,
+   SVGA3D_DEVCAP_MAX_VERTEX_SHADER_INSTRUCTIONS    = 27,
+   SVGA3D_DEVCAP_MAX_FRAGMENT_SHADER_INSTRUCTIONS  = 28,
+   SVGA3D_DEVCAP_MAX_VERTEX_SHADER_TEMPS           = 29,
+   SVGA3D_DEVCAP_MAX_FRAGMENT_SHADER_TEMPS         = 30,
+   SVGA3D_DEVCAP_TEXTURE_OPS                       = 31,
+   SVGA3D_DEVCAP_SURFACEFMT_X8R8G8B8               = 32,
+   SVGA3D_DEVCAP_SURFACEFMT_A8R8G8B8               = 33,
+   SVGA3D_DEVCAP_SURFACEFMT_A2R10G10B10            = 34,
+   SVGA3D_DEVCAP_SURFACEFMT_X1R5G5B5               = 35,
+   SVGA3D_DEVCAP_SURFACEFMT_A1R5G5B5               = 36,
+   SVGA3D_DEVCAP_SURFACEFMT_A4R4G4B4               = 37,
+   SVGA3D_DEVCAP_SURFACEFMT_R5G6B5                 = 38,
+   SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE16            = 39,
+   SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE8_ALPHA8      = 40,
+   SVGA3D_DEVCAP_SURFACEFMT_ALPHA8                 = 41,
+   SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE8             = 42,
+   SVGA3D_DEVCAP_SURFACEFMT_Z_D16                  = 43,
+   SVGA3D_DEVCAP_SURFACEFMT_Z_D24S8                = 44,
+   SVGA3D_DEVCAP_SURFACEFMT_Z_D24X8                = 45,
+   SVGA3D_DEVCAP_SURFACEFMT_DXT1                   = 46,
+   SVGA3D_DEVCAP_SURFACEFMT_DXT2                   = 47,
+   SVGA3D_DEVCAP_SURFACEFMT_DXT3                   = 48,
+   SVGA3D_DEVCAP_SURFACEFMT_DXT4                   = 49,
+   SVGA3D_DEVCAP_SURFACEFMT_DXT5                   = 50,
+   SVGA3D_DEVCAP_SURFACEFMT_BUMPX8L8V8U8           = 51,
+   SVGA3D_DEVCAP_SURFACEFMT_A2W10V10U10            = 52,
+   SVGA3D_DEVCAP_SURFACEFMT_BUMPU8V8               = 53,
+   SVGA3D_DEVCAP_SURFACEFMT_Q8W8V8U8               = 54,
+   SVGA3D_DEVCAP_SURFACEFMT_CxV8U8                 = 55,
+   SVGA3D_DEVCAP_SURFACEFMT_R_S10E5                = 56,
+   SVGA3D_DEVCAP_SURFACEFMT_R_S23E8                = 57,
+   SVGA3D_DEVCAP_SURFACEFMT_RG_S10E5               = 58,
+   SVGA3D_DEVCAP_SURFACEFMT_RG_S23E8               = 59,
+   SVGA3D_DEVCAP_SURFACEFMT_ARGB_S10E5             = 60,
+   SVGA3D_DEVCAP_SURFACEFMT_ARGB_S23E8             = 61,
+
+   /*
+    * There is a hole in our devcap definitions for
+    * historical reasons.
+    *
+    * Define a constant just for completeness.
+    */
+   SVGA3D_DEVCAP_MISSING62                         = 62,
+
+   SVGA3D_DEVCAP_MAX_VERTEX_SHADER_TEXTURES        = 63,
+
+   /*
+    * Note that MAX_SIMULTANEOUS_RENDER_TARGETS is a maximum count of color
+    * render targets.  This does not include the depth or stencil targets.
+    */
+   SVGA3D_DEVCAP_MAX_SIMULTANEOUS_RENDER_TARGETS   = 64,
+
+   SVGA3D_DEVCAP_SURFACEFMT_V16U16                 = 65,
+   SVGA3D_DEVCAP_SURFACEFMT_G16R16                 = 66,
+   SVGA3D_DEVCAP_SURFACEFMT_A16B16G16R16           = 67,
+   SVGA3D_DEVCAP_SURFACEFMT_UYVY                   = 68,
+   SVGA3D_DEVCAP_SURFACEFMT_YUY2                   = 69,
+   SVGA3D_DEVCAP_MULTISAMPLE_NONMASKABLESAMPLES    = 70,
+   SVGA3D_DEVCAP_MULTISAMPLE_MASKABLESAMPLES       = 71,
+   SVGA3D_DEVCAP_ALPHATOCOVERAGE                   = 72,
+   SVGA3D_DEVCAP_SUPERSAMPLE                       = 73,
+   SVGA3D_DEVCAP_AUTOGENMIPMAPS                    = 74,
+   SVGA3D_DEVCAP_SURFACEFMT_NV12                   = 75,
+   SVGA3D_DEVCAP_SURFACEFMT_AYUV                   = 76,
+
+   /*
+    * This is the maximum number of SVGA context IDs that the guest
+    * can define using SVGA_3D_CMD_CONTEXT_DEFINE.
+    */
+   SVGA3D_DEVCAP_MAX_CONTEXT_IDS                   = 77,
+
+   /*
+    * This is the maximum number of SVGA surface IDs that the guest
+    * can define using SVGA_3D_CMD_SURFACE_DEFINE*.
+    */
+   SVGA3D_DEVCAP_MAX_SURFACE_IDS                   = 78,
+
+   SVGA3D_DEVCAP_SURFACEFMT_Z_DF16                 = 79,
+   SVGA3D_DEVCAP_SURFACEFMT_Z_DF24                 = 80,
+   SVGA3D_DEVCAP_SURFACEFMT_Z_D24S8_INT            = 81,
+
+   SVGA3D_DEVCAP_SURFACEFMT_ATI1                   = 82,
+   SVGA3D_DEVCAP_SURFACEFMT_ATI2                   = 83,
+
+   /*
+    * Deprecated.
+    */
+   SVGA3D_DEVCAP_DEAD1                             = 84,
+
+   /*
+    * This contains several SVGA_3D_CAPS_VIDEO_DECODE elements
+    * ored together, one for every type of video decoding supported.
+    */
+   SVGA3D_DEVCAP_VIDEO_DECODE                      = 85,
+
+   /*
+    * This contains several SVGA_3D_CAPS_VIDEO_PROCESS elements
+    * ored together, one for every type of video processing supported.
+    */
+   SVGA3D_DEVCAP_VIDEO_PROCESS                     = 86,
+
+   SVGA3D_DEVCAP_LINE_AA                           = 87,  /* boolean */
+   SVGA3D_DEVCAP_LINE_STIPPLE                      = 88,  /* boolean */
+   SVGA3D_DEVCAP_MAX_LINE_WIDTH                    = 89,  /* float */
+   SVGA3D_DEVCAP_MAX_AA_LINE_WIDTH                 = 90,  /* float */
+
+   SVGA3D_DEVCAP_SURFACEFMT_YV12                   = 91,
+
+   /*
+    * Does the host support the SVGA logic ops commands?
+    */
+   SVGA3D_DEVCAP_LOGICOPS                          = 92,
+
+   /*
+    * Are TS_CONSTANT, TS_COLOR_KEY, and TS_COLOR_KEY_ENABLE supported?
+    */
+   SVGA3D_DEVCAP_TS_COLOR_KEY                      = 93, /* boolean */
+
+   SVGA3D_DEVCAP_MAX                       /* This must be the last index. */
+} SVGA3dDevCapIndex;
+
+typedef union {
+   Bool   b;
+   uint32 u;
+   int32  i;
+   float  f;
+} SVGA3dDevCapResult;
+
+#endif // _SVGA3D_DEVCAPS_H_
--- a/src/gallium/drivers/svga/include/svga3d_limits.h
+++ b/src/gallium/drivers/svga/include/svga3d_limits.h
@@ -0,0 +1,101 @@
+/**********************************************************
+ * Copyright 2007-2014 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/*
+ * svga3d_limits.h --
+ *
+ *       SVGA 3d hardware limits
+ */
+
+#ifndef _SVGA3D_LIMITS_H_
+#define _SVGA3D_LIMITS_H_
+
+#define INCLUDE_ALLOW_MODULE
+#define INCLUDE_ALLOW_USERLEVEL
+#define INCLUDE_ALLOW_VMCORE
+
+#include "includeCheck.h"
+
+#define SVGA3D_NUM_CLIPPLANES                   6
+#define SVGA3D_MAX_RENDER_TARGETS               8
+#define SVGA3D_MAX_SIMULTANEOUS_RENDER_TARGETS  (SVGA3D_MAX_RENDER_TARGETS)
+#define SVGA3D_MAX_CONTEXT_IDS                  256
+#define SVGA3D_MAX_SURFACE_IDS                  (32 * 1024)
+
+/*
+ * Maximum ID a shader can be assigned on a given context.
+ */
+#define SVGA3D_MAX_SHADERIDS                    5000
+/*
+ * Maximum number of shaders of a given type that can be defined
+ * (including all contexts).
+ */
+#define SVGA3D_MAX_SIMULTANEOUS_SHADERS         20000
+
+#define SVGA3D_NUM_TEXTURE_UNITS                32
+#define SVGA3D_NUM_LIGHTS                       8
+#define SVGA3D_MAX_VIDEODECODERS                8
+#define SVGA3D_MAX_VIDEOPROCESSORS              8
+#define SVGA3D_MAX_VIDEODECODER_FRAMES          400
+
+/*
+ * Maximum size in dwords of shader text the SVGA device will allow.
+ * Currently 8 MB.
+ */
+#define SVGA3D_MAX_SHADER_MEMORY  (8 * 1024 * 1024 / sizeof(uint32))
+
+#define SVGA3D_MAX_CLIP_PLANES    6
+
+/*
+ * This is the limit to the number of fixed-function texture
+ * transforms and texture coordinates we can support. It does *not*
+ * correspond to the number of texture image units (samplers) we
+ * support!
+ */
+#define SVGA3D_MAX_TEXTURE_COORDS 8
+
+/*
+ * Number of faces in a cubemap.
+ */
+#define SVGA3D_MAX_SURFACE_FACES 6
+
+/*
+ * Maximum number of array indexes in a GB surface (with DX enabled).
+ */
+#define SVGA3D_MAX_SURFACE_ARRAYSIZE 512
+
+/*
+ * The maximum number of vertex arrays we're guaranteed to support in
+ * SVGA_3D_CMD_DRAWPRIMITIVES.
+ */
+#define SVGA3D_MAX_VERTEX_ARRAYS   32
+
+/*
+ * The maximum number of primitive ranges we're guaranteed to support
+ * in SVGA_3D_CMD_DRAWPRIMITIVES.
+ */
+#define SVGA3D_MAX_DRAW_PRIMITIVE_RANGES 32
+
+#endif // _SVGA3D_LIMITS_H_
--- a/src/gallium/drivers/svga/include/svga3d_reg.h
+++ b/src/gallium/drivers/svga/include/svga3d_reg.h
--- a/src/gallium/drivers/svga/include/svga3d_shaderdefs.h
+++ b/src/gallium/drivers/svga/include/svga3d_shaderdefs.h
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 2007-2009 VMware, Inc.  All rights reserved.
+ * Copyright 2007-2014 VMware, Inc.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
@@ -78,6 +78,7 @@ typedef struct {
 #define SVGA3D_VS_10 ((SVGA3D_VS_TYPE << 16) | 1 << 8)
 #define SVGA3D_VS_11 (SVGA3D_VS_10 | 1)
 #define SVGA3D_VS_20 ((SVGA3D_VS_TYPE << 16) | 2 << 8)
+#define SVGA3D_VS_21 (SVGA3D_VS_20 | 1)
 #define SVGA3D_VS_30 ((SVGA3D_VS_TYPE << 16) | 3 << 8)

 #define SVGA3D_PS_10 ((SVGA3D_PS_TYPE << 16) | 1 << 8)
@@ -86,6 +87,7 @@ typedef struct {
 #define SVGA3D_PS_13 (SVGA3D_PS_10 | 3)
 #define SVGA3D_PS_14 (SVGA3D_PS_10 | 4)
 #define SVGA3D_PS_20 ((SVGA3D_PS_TYPE << 16) | 2 << 8)
+#define SVGA3D_PS_21 (SVGA3D_PS_20 | 1)
 #define SVGA3D_PS_30 ((SVGA3D_PS_TYPE << 16) | 3 << 8)

 /* The *_ENABLED are for backwards compatibility with old drivers */
@@ -268,28 +270,13 @@ typedef enum {

 typedef enum {
   SVGA3DSAMP_UNKNOWN = 0, /* Uninitialized value */
-   SVGA3DSAMP_2D = 2,      /* dcl_2d s# (for declaring a 2-D texture) */
+   SVGA3DSAMP_2D = 2,      /* dcl_2d s# (for declaring a 2D texture) */
   SVGA3DSAMP_CUBE,        /* dcl_cube s# (for declaring a cube texture) */
   SVGA3DSAMP_VOLUME,      /* dcl_volume s# (for declaring a volume texture) */
+   SVGA3DSAMP_2D_SHADOW,   /* dcl_2d s# (for declaring a 2D shadow texture) */
+   SVGA3DSAMP_MAX,
 } SVGA3dShaderSamplerType;

-/* SVGA3D sampler format classes */
-
-typedef enum {
-   SVGA3DSAMPFORMAT_ARGB,        /* ARGB formats */
-   SVGA3DSAMPFORMAT_V8U8,        /* Sign and normalize (SNORM) V & U */
-   SVGA3DSAMPFORMAT_Q8W8V8U8,    /* SNORM all */
-   SVGA3DSAMPFORMAT_CxV8U8,      /* SNORM V & U, C=SQRT(1-U^2-V^2) */
-   SVGA3DSAMPFORMAT_X8L8V8U8,    /* SNORM V & U */
-   SVGA3DSAMPFORMAT_A2W10V10U10, /* SNORM W, V & U */
-   SVGA3DSAMPFORMAT_DXT_PMA,     /* DXT pre-multiplied alpha */
-   SVGA3DSAMPFORMAT_YUV,         /* YUV video format */
-   SVGA3DSAMPFORMAT_UYVY,        /* UYVY video format */
-   SVGA3DSAMPFORMAT_Rx,          /* R16F/32F */
-   SVGA3DSAMPFORMAT_RxGx,        /* R16FG16F, R32FG32F */
-   SVGA3DSAMPFORMAT_V16U16,      /* SNORM all */
-} SVGA3DShaderSamplerFormatClass;
-
 /* SVGA3D write mask */

 #define SVGA3DWRITEMASK_0    1 /* Component 0 (X;Red) */
@@ -315,6 +302,18 @@ typedef enum {

 #define SVGA3DDSTMOD_MSAMPCENTROID     4

+/* SVGA3D destination shift scale */
+
+typedef enum {
+   SVGA3DDSTSHFSCALE_X1 = 0,  /* 1.0 */
+   SVGA3DDSTSHFSCALE_X2 = 1,  /* 2.0 */
+   SVGA3DDSTSHFSCALE_X4 = 2,  /* 4.0 */
+   SVGA3DDSTSHFSCALE_X8 = 3,  /* 8.0 */
+   SVGA3DDSTSHFSCALE_D8 = 13, /* 0.125 */
+   SVGA3DDSTSHFSCALE_D4 = 14, /* 0.25 */
+   SVGA3DDSTSHFSCALE_D2 = 15  /* 0.5 */
+} SVGA3dShaderDstShfScaleType;
+
 /* SVGA3D source swizzle */

 #define SVGA3DSWIZZLE_REPLICATEX 0x00
--- a/src/gallium/drivers/svga/include/svga3d_surfacedefs.h
+++ b/src/gallium/drivers/svga/include/svga3d_surfacedefs.h
@@ -0,0 +1,911 @@
+/**********************************************************
+ * Copyright 1998-2014 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/*
+ * svga3d_surfacedefs.h --
+ *
+ *       Surface/format/image helper code.
+ */
+
+#include "svga3d_reg.h"
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(A) (sizeof(A) / sizeof(A[0]))
+#endif
+
+#define DIV_ROUND_UP(x, y)  (((x) + (y) - 1) / (y))
+
+#define max_t(type, x, y)  ((x) > (y) ? (x) : (y))
+
+/*
+ * enum svga3d_block_desc describes the active data channels in a block.
+ *
+ * There can be at-most four active channels in a block:
+ *    1. Red, bump W, luminance and depth are stored in the first channel.
+ *    2. Green, bump V and stencil are stored in the second channel.
+ *    3. Blue and bump U are stored in the third channel.
+ *    4. Alpha and bump Q are stored in the fourth channel.
+ *
+ * Block channels can be used to store compressed and buffer data:
+ *    1. For compressed formats, only the data channel is used and its size
+ *       is equal to that of a singular block in the compression scheme.
+ *    2. For buffer formats, only the data channel is used and its size is
+ *       exactly one byte in length.
+ *    3. In each case the bit depth represent the size of a singular block.
+ *
+ * Note: Compressed and IEEE formats do not use the bitMask structure.
+ */
+
+enum svga3d_block_desc {
+	SVGA3DBLOCKDESC_NONE        = 0,         /* No channels are active */
+	SVGA3DBLOCKDESC_BLUE        = 1 << 0,    /* Block with red channel
+						    data */
+	SVGA3DBLOCKDESC_U           = 1 << 0,    /* Block with bump U channel
+						    data */
+	SVGA3DBLOCKDESC_UV_VIDEO    = 1 << 7,    /* Block with alternating video
+						    U and V */
+	SVGA3DBLOCKDESC_GREEN       = 1 << 1,    /* Block with green channel
+						    data */
+	SVGA3DBLOCKDESC_V           = 1 << 1,    /* Block with bump V channel
+						    data */
+	SVGA3DBLOCKDESC_STENCIL     = 1 << 1,    /* Block with a stencil
+						    channel */
+	SVGA3DBLOCKDESC_RED         = 1 << 2,    /* Block with blue channel
+						    data */
+	SVGA3DBLOCKDESC_W           = 1 << 2,    /* Block with bump W channel
+						    data */
+	SVGA3DBLOCKDESC_LUMINANCE   = 1 << 2,    /* Block with luminance channel
+						    data */
+	SVGA3DBLOCKDESC_Y           = 1 << 2,    /* Block with video luminance
+						    data */
+	SVGA3DBLOCKDESC_DEPTH       = 1 << 2,    /* Block with depth channel */
+	SVGA3DBLOCKDESC_ALPHA       = 1 << 3,    /* Block with an alpha
+						    channel */
+	SVGA3DBLOCKDESC_Q           = 1 << 3,    /* Block with bump Q channel
+						    data */
+	SVGA3DBLOCKDESC_BUFFER      = 1 << 4,    /* Block stores 1 byte of
+						    data */
+	SVGA3DBLOCKDESC_COMPRESSED  = 1 << 5,    /* Block stores n bytes of
+						    data depending on the
+						    compression method used */
+	SVGA3DBLOCKDESC_IEEE_FP     = 1 << 6,    /* Block stores data in an IEEE
+						    floating point
+						    representation in
+						    all channels */
+	SVGA3DBLOCKDESC_PLANAR_YUV  = 1 << 8,    /* Three separate blocks store
+						    data. */
+	SVGA3DBLOCKDESC_U_VIDEO     = 1 << 9,    /* Block with U video data */
+	SVGA3DBLOCKDESC_V_VIDEO     = 1 << 10,   /* Block with V video data */
+	SVGA3DBLOCKDESC_EXP         = 1 << 11,   /* Shared exponent */
+	SVGA3DBLOCKDESC_SRGB        = 1 << 12,   /* Data is in sRGB format */
+	SVGA3DBLOCKDESC_2PLANAR_YUV = 1 << 13,   /* 2 planes of Y, UV,
+						    e.g., NV12. */
+	SVGA3DBLOCKDESC_3PLANAR_YUV = 1 << 14,   /* 3 planes of separate
+						    Y, U, V, e.g., YV12. */
+
+	SVGA3DBLOCKDESC_RG         = SVGA3DBLOCKDESC_RED |
+	SVGA3DBLOCKDESC_GREEN,
+	SVGA3DBLOCKDESC_RGB        = SVGA3DBLOCKDESC_RG |
+	SVGA3DBLOCKDESC_BLUE,
+	SVGA3DBLOCKDESC_RGB_SRGB   = SVGA3DBLOCKDESC_RGB |
+	SVGA3DBLOCKDESC_SRGB,
+	SVGA3DBLOCKDESC_RGBA       = SVGA3DBLOCKDESC_RGB |
+	SVGA3DBLOCKDESC_ALPHA,
+	SVGA3DBLOCKDESC_RGBA_SRGB  = SVGA3DBLOCKDESC_RGBA |
+	SVGA3DBLOCKDESC_SRGB,
+	SVGA3DBLOCKDESC_UV         = SVGA3DBLOCKDESC_U |
+	SVGA3DBLOCKDESC_V,
+	SVGA3DBLOCKDESC_UVL        = SVGA3DBLOCKDESC_UV |
+	SVGA3DBLOCKDESC_LUMINANCE,
+	SVGA3DBLOCKDESC_UVW        = SVGA3DBLOCKDESC_UV |
+	SVGA3DBLOCKDESC_W,
+	SVGA3DBLOCKDESC_UVWA       = SVGA3DBLOCKDESC_UVW |
+	SVGA3DBLOCKDESC_ALPHA,
+	SVGA3DBLOCKDESC_UVWQ       = SVGA3DBLOCKDESC_U |
+	SVGA3DBLOCKDESC_V |
+	SVGA3DBLOCKDESC_W |
+	SVGA3DBLOCKDESC_Q,
+	SVGA3DBLOCKDESC_LA         = SVGA3DBLOCKDESC_LUMINANCE |
+	SVGA3DBLOCKDESC_ALPHA,
+	SVGA3DBLOCKDESC_R_FP       = SVGA3DBLOCKDESC_RED |
+	SVGA3DBLOCKDESC_IEEE_FP,
+	SVGA3DBLOCKDESC_RG_FP      = SVGA3DBLOCKDESC_R_FP |
+	SVGA3DBLOCKDESC_GREEN,
+	SVGA3DBLOCKDESC_RGB_FP     = SVGA3DBLOCKDESC_RG_FP |
+	SVGA3DBLOCKDESC_BLUE,
+	SVGA3DBLOCKDESC_RGBA_FP    = SVGA3DBLOCKDESC_RGB_FP |
+	SVGA3DBLOCKDESC_ALPHA,
+	SVGA3DBLOCKDESC_DS         = SVGA3DBLOCKDESC_DEPTH |
+	SVGA3DBLOCKDESC_STENCIL,
+	SVGA3DBLOCKDESC_YUV        = SVGA3DBLOCKDESC_UV_VIDEO |
+	SVGA3DBLOCKDESC_Y,
+	SVGA3DBLOCKDESC_AYUV       = SVGA3DBLOCKDESC_ALPHA |
+	SVGA3DBLOCKDESC_Y |
+	SVGA3DBLOCKDESC_U_VIDEO |
+	SVGA3DBLOCKDESC_V_VIDEO,
+	SVGA3DBLOCKDESC_RGBE       = SVGA3DBLOCKDESC_RGB |
+	SVGA3DBLOCKDESC_EXP,
+	SVGA3DBLOCKDESC_COMPRESSED_SRGB = SVGA3DBLOCKDESC_COMPRESSED |
+	SVGA3DBLOCKDESC_SRGB,
+	SVGA3DBLOCKDESC_NV12       = SVGA3DBLOCKDESC_PLANAR_YUV |
+	SVGA3DBLOCKDESC_2PLANAR_YUV,
+	SVGA3DBLOCKDESC_YV12       = SVGA3DBLOCKDESC_PLANAR_YUV |
+	SVGA3DBLOCKDESC_3PLANAR_YUV,
+};
+
+/*
+ * SVGA3dSurfaceDesc describes the actual pixel data.
+ *
+ * This structure provides the following information:
+ *    1. Block description.
+ *    2. Dimensions of a block in the surface.
+ *    3. Size of block in bytes.
+ *    4. Bit depth of the pixel data.
+ *    5. Channel bit depths and masks (if applicable).
+ */
+#define SVGA3D_CHANNEL_DEF(type)		\
+	struct {				\
+		union {				\
+			type blue;              \
+			type u;                 \
+			type uv_video;          \
+			type u_video;           \
+		};				\
+		union {				\
+			type green;             \
+			type v;                 \
+			type stencil;           \
+			type v_video;           \
+		};				\
+		union {				\
+			type red;               \
+			type w;                 \
+			type luminance;         \
+			type y;                 \
+			type depth;             \
+			type data;              \
+		};				\
+		union {				\
+			type alpha;             \
+			type q;                 \
+			type exp;               \
+		};				\
+	}
+
+struct svga3d_surface_desc {
+	enum svga3d_block_desc block_desc;
+	SVGA3dSize block_size;
+	uint32 bytes_per_block;
+	uint32 pitch_bytes_per_block;
+
+	struct {
+		uint32 total;
+		SVGA3D_CHANNEL_DEF(uint8);
+	} bit_depth;
+
+	struct {
+		SVGA3D_CHANNEL_DEF(uint8);
+	} bit_offset;
+};
+
+static const struct svga3d_surface_desc svga3d_surface_descs[] = {
+	{SVGA3DBLOCKDESC_NONE,
+	 {1, 1, 1},  0, 0, {0, {{0}, {0}, {0}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_FORMAT_INVALID */
+
+	{SVGA3DBLOCKDESC_RGB,
+	 {1, 1, 1},  4, 4, {24, {{8}, {8}, {8}, {0} } },
+	 {{{0}, {8}, {16}, {24} } } },   /* SVGA3D_X8R8G8B8 */
+
+	{SVGA3DBLOCKDESC_RGBA,
+	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
+	 {{{0}, {8}, {16}, {24} } } },   /* SVGA3D_A8R8G8B8 */
+
+	{SVGA3DBLOCKDESC_RGB,
+	 {1, 1, 1},  2, 2, {16, {{5}, {6}, {5}, {0} } },
+	 {{{0}, {5}, {11}, {0} } } },    /* SVGA3D_R5G6B5 */
+
+	{SVGA3DBLOCKDESC_RGB,
+	 {1, 1, 1},  2, 2, {15, {{5}, {5}, {5}, {0} } },
+	 {{{0}, {5}, {10}, {0} } } },    /* SVGA3D_X1R5G5B5 */
+
+	{SVGA3DBLOCKDESC_RGBA,
+	 {1, 1, 1},  2, 2, {16, {{5}, {5}, {5}, {1} } },
+	 {{{0}, {5}, {10}, {15} } } },   /* SVGA3D_A1R5G5B5 */
+
+	{SVGA3DBLOCKDESC_RGBA,
+	 {1, 1, 1},  2, 2, {16, {{4}, {4}, {4}, {4} } },
+	 {{{0}, {4}, {8}, {12} } } },    /* SVGA3D_A4R4G4B4 */
+
+	{SVGA3DBLOCKDESC_DEPTH,
+	 {1, 1, 1},  4, 4, {32, {{0}, {0}, {32}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_Z_D32 */
+
+	{SVGA3DBLOCKDESC_DEPTH,
+	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_Z_D16 */
+
+	{SVGA3DBLOCKDESC_DS,
+	 {1, 1, 1},  4, 4, {32, {{0}, {8}, {24}, {0} } },
+	 {{{0}, {24}, {0}, {0} } } },    /* SVGA3D_Z_D24S8 */
+
+	{SVGA3DBLOCKDESC_DS,
+	 {1, 1, 1},  2, 2, {16, {{0}, {1}, {15}, {0} } },
+	 {{{0}, {15}, {0}, {0} } } },    /* SVGA3D_Z_D15S1 */
+
+	{SVGA3DBLOCKDESC_LUMINANCE,
+	 {1, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_LUMINANCE8 */
+
+	{SVGA3DBLOCKDESC_LA,
+	 {1, 1, 1},  1, 1, {8, {{0}, {0}, {4}, {4} } },
+	 {{{0}, {0}, {0}, {4} } } },     /* SVGA3D_LUMINANCE4_ALPHA4 */
+
+	{SVGA3DBLOCKDESC_LUMINANCE,
+	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_LUMINANCE16 */
+
+	{SVGA3DBLOCKDESC_LA,
+	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {8}, {8} } },
+	 {{{0}, {0}, {0}, {8} } } },     /* SVGA3D_LUMINANCE8_ALPHA8 */
+
+	{SVGA3DBLOCKDESC_COMPRESSED,
+	 {4, 4, 1},  8, 8, {64, {{0}, {0}, {64}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_DXT1 */
+
+	{SVGA3DBLOCKDESC_COMPRESSED,
+	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_DXT2 */
+
+	{SVGA3DBLOCKDESC_COMPRESSED,
+	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_DXT3 */
+
+	{SVGA3DBLOCKDESC_COMPRESSED,
+	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_DXT4 */
+
+	{SVGA3DBLOCKDESC_COMPRESSED,
+	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_DXT5 */
+
+	{SVGA3DBLOCKDESC_UV,
+	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {8}, {8} } },
+	 {{{0}, {0}, {0}, {8} } } },     /* SVGA3D_BUMPU8V8 */
+
+	{SVGA3DBLOCKDESC_UVL,
+	 {1, 1, 1},  2, 2, {16, {{5}, {5}, {6}, {0} } },
+	 {{{11}, {6}, {0}, {0} } } },    /* SVGA3D_BUMPL6V5U5 */
+
+	{SVGA3DBLOCKDESC_UVL,
+	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {0} } },
+	 {{{16}, {8}, {0}, {0} } } },    /* SVGA3D_BUMPX8L8V8U8 */
+
+	{SVGA3DBLOCKDESC_UVL,
+	 {1, 1, 1},  3, 3, {24, {{8}, {8}, {8}, {0} } },
+	 {{{16}, {8}, {0}, {0} } } },    /* SVGA3D_BUMPL8V8U8 */
+
+	{SVGA3DBLOCKDESC_RGBA_FP,
+	 {1, 1, 1},  8, 8, {64, {{16}, {16}, {16}, {16} } },
+	 {{{32}, {16}, {0}, {48} } } },  /* SVGA3D_ARGB_S10E5 */
+
+	{SVGA3DBLOCKDESC_RGBA_FP,
+	 {1, 1, 1},  16, 16, {128, {{32}, {32}, {32}, {32} } },
+	 {{{64}, {32}, {0}, {96} } } },  /* SVGA3D_ARGB_S23E8 */
+
+	{SVGA3DBLOCKDESC_RGBA,
+	 {1, 1, 1},  4, 4, {32, {{10}, {10}, {10}, {2} } },
+	 {{{0}, {10}, {20}, {30} } } },  /* SVGA3D_A2R10G10B10 */
+
+	{SVGA3DBLOCKDESC_UV,
+	 {1, 1, 1},  2, 2, {16, {{8}, {8}, {0}, {0} } },
+	 {{{8}, {0}, {0}, {0} } } },     /* SVGA3D_V8U8 */
+
+	{SVGA3DBLOCKDESC_UVWQ,
+	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
+	 {{{24}, {16}, {8}, {0} } } },   /* SVGA3D_Q8W8V8U8 */
+
+	{SVGA3DBLOCKDESC_UV,
+	 {1, 1, 1},  2, 2, {16, {{8}, {8}, {0}, {0} } },
+	 {{{8}, {0}, {0}, {0} } } },     /* SVGA3D_CxV8U8 */
+
+	{SVGA3DBLOCKDESC_UVL,
+	 {1, 1, 1},  4, 4, {24, {{8}, {8}, {8}, {0} } },
+	 {{{16}, {8}, {0}, {0} } } },    /* SVGA3D_X8L8V8U8 */
+
+	{SVGA3DBLOCKDESC_UVWA,
+	 {1, 1, 1},  4, 4, {32, {{10}, {10}, {10}, {2} } },
+	 {{{0}, {10}, {20}, {30} } } },  /* SVGA3D_A2W10V10U10 */
+
+	{SVGA3DBLOCKDESC_ALPHA,
+	 {1, 1, 1},  1, 1, {8, {{0}, {0}, {0}, {8} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_ALPHA8 */
+
+	{SVGA3DBLOCKDESC_R_FP,
+	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R_S10E5 */
+
+	{SVGA3DBLOCKDESC_R_FP,
+	 {1, 1, 1},  4, 4, {32, {{0}, {0}, {32}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R_S23E8 */
+
+	{SVGA3DBLOCKDESC_RG_FP,
+	 {1, 1, 1},  4, 4, {32, {{0}, {16}, {16}, {0} } },
+	 {{{0}, {16}, {0}, {0} } } },    /* SVGA3D_RG_S10E5 */
+
+	{SVGA3DBLOCKDESC_RG_FP,
+	 {1, 1, 1},  8, 8, {64, {{0}, {32}, {32}, {0} } },
+	 {{{0}, {32}, {0}, {0} } } },    /* SVGA3D_RG_S23E8 */
+
+	{SVGA3DBLOCKDESC_BUFFER,
+	 {1, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BUFFER */
+
+	{SVGA3DBLOCKDESC_DEPTH,
+	 {1, 1, 1},  4, 4, {32, {{0}, {0}, {24}, {0} } },
+	 {{{0}, {24}, {0}, {0} } } },    /* SVGA3D_Z_D24X8 */
+
+	{SVGA3DBLOCKDESC_UV,
+	 {1, 1, 1},  4, 4, {32, {{16}, {16}, {0}, {0} } },
+	 {{{16}, {0}, {0}, {0} } } },    /* SVGA3D_V16U16 */
+
+	{SVGA3DBLOCKDESC_RG,
+	 {1, 1, 1},  4, 4, {32, {{0}, {16}, {16}, {0} } },
+	 {{{0}, {0}, {16}, {0} } } },    /* SVGA3D_G16R16 */
+
+	{SVGA3DBLOCKDESC_RGBA,
+	 {1, 1, 1},  8, 8, {64, {{16}, {16}, {16}, {16} } },
+	 {{{32}, {16}, {0}, {48} } } },  /* SVGA3D_A16B16G16R16 */
+
+	{SVGA3DBLOCKDESC_YUV,
+	 {1, 1, 1},  2, 2, {16, {{8}, {0}, {8}, {0} } },
+	 {{{0}, {0}, {8}, {0} } } },     /* SVGA3D_UYVY */
+
+	{SVGA3DBLOCKDESC_YUV,
+	 {1, 1, 1},  2, 2, {16, {{8}, {0}, {8}, {0} } },
+	 {{{8}, {0}, {0}, {0} } } },     /* SVGA3D_YUY2 */
+
+	{SVGA3DBLOCKDESC_NV12,
+	 {2, 2, 1},  6, 2, {48, {{0}, {0}, {48}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_NV12 */
+
+	{SVGA3DBLOCKDESC_AYUV,
+	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
+	 {{{0}, {8}, {16}, {24} } } },   /* SVGA3D_AYUV */
+
+	{SVGA3DBLOCKDESC_RGBA,
+	 {1, 1, 1},  16, 16, {128, {{32}, {32}, {32}, {32} } },
+	 {{{64}, {32}, {0}, {96} } } },  /* SVGA3D_R32G32B32A32_TYPELESS */
+
+	{SVGA3DBLOCKDESC_RGBA,
+	 {1, 1, 1},  16, 16, {128, {{32}, {32}, {32}, {32} } },
+	 {{{64}, {32}, {0}, {96} } } },  /* SVGA3D_R32G32B32A32_UINT */
+
+	{SVGA3DBLOCKDESC_UVWQ,
+	 {1, 1, 1},  16, 16, {128, {{32}, {32}, {32}, {32} } },
+	 {{{64}, {32}, {0}, {96} } } },  /* SVGA3D_R32G32B32A32_SINT */
+
+	{SVGA3DBLOCKDESC_RGB,
+	 {1, 1, 1},  12, 12, {96, {{32}, {32}, {32}, {0} } },
+	 {{{64}, {32}, {0}, {0} } } },   /* SVGA3D_R32G32B32_TYPELESS */
+
+	{SVGA3DBLOCKDESC_RGB_FP,
+	 {1, 1, 1},  12, 12, {96, {{32}, {32}, {32}, {0} } },
+	 {{{64}, {32}, {0}, {0} } } },   /* SVGA3D_R32G32B32_FLOAT */
+
+	{SVGA3DBLOCKDESC_RGB,
+	 {1, 1, 1},  12, 12, {96, {{32}, {32}, {32}, {0} } },
+	 {{{64}, {32}, {0}, {0} } } },   /* SVGA3D_R32G32B32_UINT */
+
+	{SVGA3DBLOCKDESC_UVW,
+	 {1, 1, 1},  12, 12, {96, {{32}, {32}, {32}, {0} } },
+	 {{{64}, {32}, {0}, {0} } } },   /* SVGA3D_R32G32B32_SINT */
+
+	{SVGA3DBLOCKDESC_RGBA,
+	 {1, 1, 1},  8, 8, {64, {{16}, {16}, {16}, {16} } },
+	 {{{32}, {16}, {0}, {48} } } },  /* SVGA3D_R16G16B16A16_TYPELESS */
+
+	{SVGA3DBLOCKDESC_RGBA,
+	 {1, 1, 1},  8, 8, {64, {{16}, {16}, {16}, {16} } },
+	 {{{32}, {16}, {0}, {48} } } },  /* SVGA3D_R16G16B16A16_UINT */
+
+	{SVGA3DBLOCKDESC_UVWQ,
+	 {1, 1, 1},  8, 8, {64, {{16}, {16}, {16}, {16} } },
+	 {{{32}, {16}, {0}, {48} } } },  /* SVGA3D_R16G16B16A16_SNORM */
+
+	{SVGA3DBLOCKDESC_UVWQ,
+	 {1, 1, 1},  8, 8, {64, {{16}, {16}, {16}, {16} } },
+	 {{{32}, {16}, {0}, {48} } } },  /* SVGA3D_R16G16B16A16_SINT */
+
+	{SVGA3DBLOCKDESC_RG,
+	 {1, 1, 1},  8, 8, {64, {{0}, {32}, {32}, {0} } },
+	 {{{0}, {32}, {0}, {0} } } },    /* SVGA3D_R32G32_TYPELESS */
+
+	{SVGA3DBLOCKDESC_RG,
+	 {1, 1, 1},  8, 8, {64, {{0}, {32}, {32}, {0} } },
+	 {{{0}, {32}, {0}, {0} } } },    /* SVGA3D_R32G32_UINT */
+
+	{SVGA3DBLOCKDESC_UV,
+	 {1, 1, 1},  8, 8, {64, {{0}, {32}, {32}, {0} } },
+	 {{{0}, {32}, {0}, {0} } } },    /* SVGA3D_R32G32_SINT */
+
+	{SVGA3DBLOCKDESC_RG,
+	 {1, 1, 1},  8, 8, {64, {{0}, {8}, {32}, {0} } },
+	 {{{0}, {32}, {0}, {0} } } },    /* SVGA3D_R32G8X24_TYPELESS */
+
+	{SVGA3DBLOCKDESC_DS,
+	 {1, 1, 1},  8, 8, {64, {{0}, {8}, {32}, {0} } },
+	 {{{0}, {32}, {0}, {0} } } },    /* SVGA3D_D32_FLOAT_S8X24_UINT */
+
+	{SVGA3DBLOCKDESC_R_FP,
+	 {1, 1, 1},  8, 8, {64, {{0}, {0}, {32}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },    /* SVGA3D_R32_FLOAT_X8_X24_TYPELESS */
+
+	{SVGA3DBLOCKDESC_GREEN,
+	 {1, 1, 1},  8, 8, {64, {{0}, {8}, {0}, {0} } },
+	 {{{0}, {32}, {0}, {0} } } },    /* SVGA3D_X32_TYPELESS_G8X24_UINT */
+
+	{SVGA3DBLOCKDESC_RGBA,
+	 {1, 1, 1},  4, 4, {32, {{10}, {10}, {10}, {2} } },
+	 {{{0}, {10}, {20}, {30} } } },  /* SVGA3D_R10G10B10A2_TYPELESS */
+
+	{SVGA3DBLOCKDESC_RGBA,
+	 {1, 1, 1},  4, 4, {32, {{10}, {10}, {10}, {2} } },
+	 {{{0}, {10}, {20}, {30} } } },  /* SVGA3D_R10G10B10A2_UINT */
+
+	{SVGA3DBLOCKDESC_RGB_FP,
+	 {1, 1, 1},  4, 4, {32, {{10}, {11}, {11}, {0} } },
+	 {{{0}, {10}, {21}, {0} } } },  /* SVGA3D_R11G11B10_FLOAT */
+
+	{SVGA3DBLOCKDESC_RGBA,
+	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
+	 {{{16}, {8}, {0}, {24} } } },   /* SVGA3D_R8G8B8A8_TYPELESS */
+
+	{SVGA3DBLOCKDESC_RGBA,
+	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
+	 {{{16}, {8}, {0}, {24} } } },   /* SVGA3D_R8G8B8A8_UNORM */
+
+	{SVGA3DBLOCKDESC_RGBA_SRGB,
+	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
+	 {{{16}, {8}, {0}, {24} } } },   /* SVGA3D_R8G8B8A8_UNORM_SRGB */
+
+	{SVGA3DBLOCKDESC_RGBA,
+	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
+	 {{{16}, {8}, {0}, {24} } } },   /* SVGA3D_R8G8B8A8_UINT */
+
+	{SVGA3DBLOCKDESC_RGBA,
+	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
+	 {{{16}, {8}, {0}, {24} } } },   /* SVGA3D_R8G8B8A8_SINT */
+
+	{SVGA3DBLOCKDESC_RG,
+	 {1, 1, 1},  4, 4, {32, {{0}, {16}, {16}, {0} } },
+	 {{{0}, {16}, {0}, {0} } } },    /* SVGA3D_R16G16_TYPELESS */
+
+	{SVGA3DBLOCKDESC_RG_FP,
+	 {1, 1, 1},  4, 4, {32, {{0}, {16}, {16}, {0} } },
+	 {{{0}, {16}, {0}, {0} } } },    /* SVGA3D_R16G16_UINT */
+
+	{SVGA3DBLOCKDESC_UV,
+	 {1, 1, 1},  4, 4, {32, {{0}, {16}, {16}, {0} } },
+	 {{{0}, {16}, {0}, {0} } } },    /* SVGA3D_R16G16_SINT */
+
+	{SVGA3DBLOCKDESC_RED,
+	 {1, 1, 1},  4, 4, {32, {{0}, {0}, {32}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R32_TYPELESS */
+
+	{SVGA3DBLOCKDESC_DEPTH,
+	 {1, 1, 1},  4, 4, {32, {{0}, {0}, {32}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_D32_FLOAT */
+
+	{SVGA3DBLOCKDESC_RED,
+	 {1, 1, 1},  4, 4, {32, {{0}, {0}, {32}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R32_UINT */
+
+	{SVGA3DBLOCKDESC_RED,
+	 {1, 1, 1},  4, 4, {32, {{0}, {0}, {32}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R32_SINT */
+
+	{SVGA3DBLOCKDESC_RG,
+	 {1, 1, 1},  4, 4, {32, {{0}, {8}, {24}, {0} } },
+	 {{{0}, {24}, {0}, {0} } } },    /* SVGA3D_R24G8_TYPELESS */
+
+	{SVGA3DBLOCKDESC_DS,
+	 {1, 1, 1},  4, 4, {32, {{0}, {8}, {24}, {0} } },
+	 {{{0}, {24}, {0}, {0} } } },    /* SVGA3D_D24_UNORM_S8_UINT */
+
+	{SVGA3DBLOCKDESC_RED,
+	 {1, 1, 1},  4, 4, {32, {{0}, {0}, {24}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R24_UNORM_X8_TYPELESS */
+
+	{SVGA3DBLOCKDESC_GREEN,
+	 {1, 1, 1},  4, 4, {32, {{0}, {8}, {0}, {0} } },
+	 {{{0}, {24}, {0}, {0} } } },    /* SVGA3D_X24_TYPELESS_G8_UINT */
+
+	{SVGA3DBLOCKDESC_RG,
+	 {1, 1, 1},  2, 2, {16, {{0}, {8}, {8}, {0} } },
+	 {{{0}, {8}, {0}, {0} } } },     /* SVGA3D_R8G8_TYPELESS */
+
+	{SVGA3DBLOCKDESC_RG,
+	 {1, 1, 1},  2, 2, {16, {{0}, {8}, {8}, {0} } },
+	 {{{0}, {8}, {0}, {0} } } },     /* SVGA3D_R8G8_UNORM */
+
+	{SVGA3DBLOCKDESC_RG,
+	 {1, 1, 1},  2, 2, {16, {{0}, {8}, {8}, {0} } },
+	 {{{0}, {8}, {0}, {0} } } },     /* SVGA3D_R8G8_UINT */
+
+	{SVGA3DBLOCKDESC_UV,
+	 {1, 1, 1},  2, 2, {16, {{0}, {8}, {8}, {0} } },
+	 {{{0}, {8}, {0}, {0} } } },     /* SVGA3D_R8G8_SINT */
+
+	{SVGA3DBLOCKDESC_RED,
+	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R16_TYPELESS */
+
+	{SVGA3DBLOCKDESC_RED,
+	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R16_UNORM */
+
+	{SVGA3DBLOCKDESC_RED,
+	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R16_UINT */
+
+	{SVGA3DBLOCKDESC_U,
+	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R16_SNORM */
+
+	{SVGA3DBLOCKDESC_U,
+	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R16_SINT */
+
+	{SVGA3DBLOCKDESC_RED,
+	 {1, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R8_TYPELESS */
+
+	{SVGA3DBLOCKDESC_RED,
+	 {1, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R8_UNORM */
+
+	{SVGA3DBLOCKDESC_RED,
+	 {1, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R8_UINT */
+
+	{SVGA3DBLOCKDESC_U,
+	 {1, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R8_SNORM */
+
+	{SVGA3DBLOCKDESC_U,
+	 {1, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R8_SINT */
+
+	{SVGA3DBLOCKDESC_RED,
+	 {8, 1, 1},  1, 1, {8, {{0}, {0}, {8}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_R1_UNORM */
+
+	{SVGA3DBLOCKDESC_RGBE,
+	 {1, 1, 1},  4, 4, {32, {{9}, {9}, {9}, {5} } },
+	 {{{18}, {9}, {0}, {27} } } },   /* SVGA3D_R9G9B9E5_SHAREDEXP */
+
+	{SVGA3DBLOCKDESC_RG,
+	 {1, 1, 1},  2, 2, {16, {{0}, {8}, {8}, {0} } },
+	 {{{0}, {8}, {0}, {0} } } },     /* SVGA3D_R8G8_B8G8_UNORM */
+
+	{SVGA3DBLOCKDESC_RG,
+	 {1, 1, 1},  2, 2, {16, {{0}, {8}, {8}, {0} } },
+	 {{{0}, {8}, {0}, {0} } } },     /* SVGA3D_G8R8_G8B8_UNORM */
+
+	{SVGA3DBLOCKDESC_COMPRESSED,
+	 {4, 4, 1},  8, 8, {64, {{0}, {0}, {64}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC1_TYPELESS */
+
+	{SVGA3DBLOCKDESC_COMPRESSED_SRGB,
+	 {4, 4, 1},  8, 8, {64, {{0}, {0}, {64}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC1_UNORM_SRGB */
+
+	{SVGA3DBLOCKDESC_COMPRESSED,
+	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC2_TYPELESS */
+
+	{SVGA3DBLOCKDESC_COMPRESSED_SRGB,
+	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC2_UNORM_SRGB */
+
+	{SVGA3DBLOCKDESC_COMPRESSED,
+	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC3_TYPELESS */
+
+	{SVGA3DBLOCKDESC_COMPRESSED_SRGB,
+	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC3_UNORM_SRGB */
+
+	{SVGA3DBLOCKDESC_COMPRESSED,
+	 {4, 4, 1},  8, 8, {64, {{0}, {0}, {64}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC4_TYPELESS */
+
+	{SVGA3DBLOCKDESC_COMPRESSED,
+	 {4, 4, 1},  8, 8, {64, {{0}, {0}, {64}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC4_UNORM */
+
+	{SVGA3DBLOCKDESC_COMPRESSED,
+	 {4, 4, 1},  8, 8, {64, {{0}, {0}, {64}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC4_SNORM */
+
+	{SVGA3DBLOCKDESC_COMPRESSED,
+	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC5_TYPELESS */
+
+	{SVGA3DBLOCKDESC_COMPRESSED,
+	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC5_UNORM */
+
+	{SVGA3DBLOCKDESC_COMPRESSED,
+	 {4, 4, 1},  16, 16, {128, {{0}, {0}, {128}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_BC5_SNORM */
+
+	{SVGA3DBLOCKDESC_RGBA,
+	 {1, 1, 1},  4, 4, {32, {{10}, {10}, {10}, {2} } },
+	 {{{0}, {10}, {20}, {30} } } },  /* SVGA3D_R10G10B10_XR_BIAS_A2_UNORM */
+
+	{SVGA3DBLOCKDESC_RGBA,
+	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
+	 {{{0}, {8}, {16}, {24} } } },   /* SVGA3D_B8G8R8A8_TYPELESS */
+
+	{SVGA3DBLOCKDESC_RGBA_SRGB,
+	 {1, 1, 1},  4, 4, {32, {{8}, {8}, {8}, {8} } },
+	 {{{0}, {8}, {16}, {24} } } },   /* SVGA3D_B8G8R8A8_UNORM_SRGB */
+
+	{SVGA3DBLOCKDESC_RGB,
+	 {1, 1, 1},  4, 4, {24, {{8}, {8}, {8}, {0} } },
+	 {{{0}, {8}, {16}, {24} } } },   /* SVGA3D_B8G8R8X8_TYPELESS */
+
+	{SVGA3DBLOCKDESC_RGB_SRGB,
+	 {1, 1, 1},  4, 4, {24, {{8}, {8}, {8}, {0} } },
+	 {{{0}, {8}, {16}, {24} } } },   /* SVGA3D_B8G8R8X8_UNORM_SRGB */
+
+	{SVGA3DBLOCKDESC_DEPTH,
+	 {1, 1, 1},  2, 2, {16, {{0}, {0}, {16}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_Z_DF16 */
+
+	{SVGA3DBLOCKDESC_DS,
+	 {1, 1, 1},  4, 4, {32, {{0}, {8}, {24}, {0} } },
+	 {{{0}, {24}, {0}, {0} } } },    /* SVGA3D_Z_DF24 */
+
+	{SVGA3DBLOCKDESC_DS,
+	 {1, 1, 1},  4, 4, {32, {{0}, {8}, {24}, {0} } },
+	 {{{0}, {24}, {0}, {0} } } },    /* SVGA3D_Z_D24S8_INT */
+
+	{SVGA3DBLOCKDESC_YV12,
+	 {2, 2, 1},  6, 2, {48, {{0}, {0}, {48}, {0} } },
+	 {{{0}, {0}, {0}, {0} } } },     /* SVGA3D_YV12 */
+};
+
+
+extern const struct svga3d_surface_desc g_SVGA3dSurfaceDescs[];
+extern int g_SVGA3dSurfaceDescs_size;
+
+static inline uint32 clamped_umul32(uint32 a, uint32 b)
+{
+	uint64_t tmp = (uint64_t) a*b;
+	return (tmp > (uint64_t) ((uint32) -1)) ? (uint32) -1 : tmp;
+}
+
+static inline const struct svga3d_surface_desc *
+svga3dsurface_get_desc(SVGA3dSurfaceFormat format)
+{
+	if (format < ARRAY_SIZE(svga3d_surface_descs))
+		return &svga3d_surface_descs[format];
+
+	return &svga3d_surface_descs[SVGA3D_FORMAT_INVALID];
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * svga3dsurface_get_mip_size --
+ *
+ *      Given a base level size and the mip level, compute the size of
+ *      the mip level.
+ *
+ * Results:
+ *      See above.
+ *
+ * Side effects:
+ *      None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static inline SVGA3dSize
+svga3dsurface_get_mip_size(SVGA3dSize base_level, uint32 mip_level)
+{
+	SVGA3dSize size;
+
+	size.width = max_t(uint32, base_level.width >> mip_level, 1);
+	size.height = max_t(uint32, base_level.height >> mip_level, 1);
+	size.depth = max_t(uint32, base_level.depth >> mip_level, 1);
+	return size;
+}
+
+static inline void
+svga3dsurface_get_size_in_blocks(const struct svga3d_surface_desc *desc,
+				 const SVGA3dSize *pixel_size,
+				 SVGA3dSize *block_size)
+{
+	block_size->width = DIV_ROUND_UP(pixel_size->width,
+					 desc->block_size.width);
+	block_size->height = DIV_ROUND_UP(pixel_size->height,
+					  desc->block_size.height);
+	block_size->depth = DIV_ROUND_UP(pixel_size->depth,
+					 desc->block_size.depth);
+}
+
+static inline bool
+svga3dsurface_is_planar_surface(const struct svga3d_surface_desc *desc)
+{
+	return (desc->block_desc & SVGA3DBLOCKDESC_PLANAR_YUV) != 0;
+}
+
+static inline uint32
+svga3dsurface_calculate_pitch(const struct svga3d_surface_desc *desc,
+			      const SVGA3dSize *size)
+{
+	uint32 pitch;
+	SVGA3dSize blocks;
+
+	svga3dsurface_get_size_in_blocks(desc, size, &blocks);
+
+	pitch = blocks.width * desc->pitch_bytes_per_block;
+
+	return pitch;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ *
+ * svga3dsurface_get_image_buffer_size --
+ *
+ *      Return the number of bytes of buffer space required to store
+ *      one image of a surface, optionally using the specified pitch.
+ *
+ *      If pitch is zero, it is assumed that rows are tightly packed.
+ *
+ *      This function is overflow-safe. If the result would have
+ *      overflowed, instead we return MAX_UINT32.
+ *
+ * Results:
+ *      Byte count.
+ *
+ * Side effects:
+ *      None.
+ *
+ *-----------------------------------------------------------------------------
+ */
+
+static inline uint32
+svga3dsurface_get_image_buffer_size(const struct svga3d_surface_desc *desc,
+				    const SVGA3dSize *size,
+				    uint32 pitch)
+{
+	SVGA3dSize image_blocks;
+	uint32 slice_size, total_size;
+
+	svga3dsurface_get_size_in_blocks(desc, size, &image_blocks);
+
+	if (svga3dsurface_is_planar_surface(desc)) {
+		total_size = clamped_umul32(image_blocks.width,
+					    image_blocks.height);
+		total_size = clamped_umul32(total_size, image_blocks.depth);
+		total_size = clamped_umul32(total_size, desc->bytes_per_block);
+		return total_size;
+	}
+
+	if (pitch == 0)
+		pitch = svga3dsurface_calculate_pitch(desc, size);
+
+	slice_size = clamped_umul32(image_blocks.height, pitch);
+	total_size = clamped_umul32(slice_size, image_blocks.depth);
+
+	return total_size;
+}
+
+
+static inline uint32
+svga3dsurface_get_image_offset(SVGA3dSurfaceFormat format,
+                               SVGA3dSize baseLevelSize,
+                               uint32 numMipLevels,
+                               uint32 face,
+                               uint32 mip)
+
+{
+   uint32 offset;
+   uint32 mipChainBytes;
+   uint32 mipChainBytesToLevel;
+   uint32 i;
+   const struct svga3d_surface_desc *desc;
+   SVGA3dSize mipSize;
+   uint32 bytes;
+
+   desc = svga3dsurface_get_desc(format);
+
+   mipChainBytes = 0;
+   mipChainBytesToLevel = 0;
+   for (i = 0; i < numMipLevels; i++) {
+      mipSize = svga3dsurface_get_mip_size(baseLevelSize, i);
+      bytes = svga3dsurface_get_image_buffer_size(desc, &mipSize, 0);
+      mipChainBytes += bytes;
+      if (i < mip) {
+         mipChainBytesToLevel += bytes;
+      }
+   }
+
+   offset = mipChainBytes * face + mipChainBytesToLevel;
+
+   return offset;
+}
+
+
+static inline uint32
+svga3dsurface_get_serialized_size(SVGA3dSurfaceFormat format,
+				  SVGA3dSize base_level_size,
+				  uint32 num_mip_levels,
+				  bool cubemap)
+{
+	const struct svga3d_surface_desc *desc = svga3dsurface_get_desc(format);
+	uint32 total_size = 0;
+	uint32 mip;
+
+	for (mip = 0; mip < num_mip_levels; mip++) {
+		SVGA3dSize size =
+			svga3dsurface_get_mip_size(base_level_size, mip);
+		total_size += svga3dsurface_get_image_buffer_size(desc,
+								  &size, 0);
+	}
+
+	if (cubemap)
+		total_size *= SVGA3D_MAX_SURFACE_FACES;
+
+	return total_size;
+}
+
+
+/**
+ * Compute the offset (in bytes) to a pixel in an image (or volume).
+ * 'width' is the image width in pixels
+ * 'height' is the image height in pixels
+ */
+static inline uint32
+svga3dsurface_get_pixel_offset(SVGA3dSurfaceFormat format,
+                               uint32 width, uint32 height,
+                               uint32 x, uint32 y, uint32 z)
+{
+   const struct svga3d_surface_desc *desc = svga3dsurface_get_desc(format);
+   const uint32 bw = desc->block_size.width, bh = desc->block_size.height;
+   const uint32 bd = desc->block_size.depth;
+   const uint32 rowstride = DIV_ROUND_UP(width, bw) * desc->bytes_per_block;
+   const uint32 imgstride = DIV_ROUND_UP(height, bh) * rowstride;
+   const uint32 offset = (z / bd * imgstride +
+                          y / bh * rowstride +
+                          x / bw * desc->bytes_per_block);
+   return offset;
+}
--- a/src/gallium/drivers/svga/include/svga3d_types.h
+++ b/src/gallium/drivers/svga/include/svga3d_types.h
--- a/src/gallium/drivers/svga/include/svga_escape.h
+++ b/src/gallium/drivers/svga/include/svga_escape.h
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 2007-2009 VMware, Inc.  All rights reserved.
+ * Copyright 2007-2014 VMware, Inc.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
--- a/src/gallium/drivers/svga/include/svga_overlay.h
+++ b/src/gallium/drivers/svga/include/svga_overlay.h
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 2007-2009 VMware, Inc.  All rights reserved.
+ * Copyright 2007-2014 VMware, Inc.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
@@ -152,19 +152,17 @@ VMwareVideoGetAttributes(const SVGAOverlayFormat format,    // IN
    switch (format) {
    case VMWARE_FOURCC_YV12:
       *height = (*height + 1) & ~1;
-       *size = (*width + 3) & ~3;
+       *size = (*width) * (*height);

       if (pitches) {
-          pitches[0] = *size;
+          pitches[0] = *width;
       }

-       *size *= *height;
-
       if (offsets) {
          offsets[1] = *size;
       }

-       tmp = ((*width >> 1) + 3) & ~3;
+       tmp = *width >> 1;

       if (pitches) {
          pitches[1] = pitches[2] = tmp;
--- a/src/gallium/drivers/svga/include/svga_reg.h
+++ b/src/gallium/drivers/svga/include/svga_reg.h
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 1998-2009 VMware, Inc.  All rights reserved.
+ * Copyright 1998-2014 VMware, Inc.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
@@ -32,11 +32,26 @@
 #ifndef _SVGA_REG_H_
 #define _SVGA_REG_H_

+#include "svga_types.h"
+
 /*
- * PCI device IDs.
+ * SVGA_REG_ENABLE bit definitions.
 */
-#define PCI_VENDOR_ID_VMWARE            0x15AD
-#define PCI_DEVICE_ID_VMWARE_SVGA2      0x0405
+typedef enum {
+   SVGA_REG_ENABLE_DISABLE = 0,
+   SVGA_REG_ENABLE_ENABLE = (1 << 0),
+   SVGA_REG_ENABLE_HIDE = (1 << 1),
+} SvgaRegEnable;
+
+/*
+ * Arbitrary and meaningless limits. Please ignore these when writing
+ * new drivers.
+ */
+#define SVGA_MAX_WIDTH                  2560
+#define SVGA_MAX_HEIGHT                 1600
+#define SVGA_MAX_BITS_PER_PIXEL         32
+#define SVGA_MAX_DEPTH                  24
+#define SVGA_MAX_DISPLAYS               10

 /*
 * Legal values for the SVGA_REG_CURSOR_ON register in old-fashioned
@@ -98,6 +113,8 @@
 #define SVGA_IRQFLAG_ANY_FENCE            0x1    /* Any fence was passed */
 #define SVGA_IRQFLAG_FIFO_PROGRESS        0x2    /* Made forward progress in the FIFO */
 #define SVGA_IRQFLAG_FENCE_GOAL           0x4    /* SVGA_FIFO_FENCE_GOAL reached */
+#define SVGA_IRQFLAG_COMMAND_BUFFER       0x8    /* Command buffer completed */
+#define SVGA_IRQFLAG_ERROR                0x10   /* Error while processing commands */

 /*
 * Registers
@@ -123,6 +140,7 @@ enum {
   SVGA_REG_FB_SIZE = 16,

   /* ID 0 implementation only had the above registers, then the palette */
+   SVGA_REG_ID_0_TOP = 17,

   SVGA_REG_CAPABILITIES = 17,
   SVGA_REG_MEM_START = 18,           /* (Deprecated) */
@@ -158,11 +176,14 @@ enum {
   SVGA_REG_GMR_MAX_DESCRIPTOR_LENGTH = 44,

   SVGA_REG_TRACES = 45,            /* Enable trace-based updates even when FIFO is on */
-   SVGA_REG_TOP = 46,               /* Must be 1 more than the last register */
+   SVGA_REG_GMRS_MAX_PAGES = 46,    /* Maximum number of 4KB pages for all GMRs */
+   SVGA_REG_MEMORY_SIZE = 47,       /* Total dedicated device memory excluding FIFO */
+   SVGA_REG_COMMAND_LOW = 48,       /* Lower 32 bits and submits commands */
+   SVGA_REG_COMMAND_HIGH = 49,      /* Upper 32 bits of command buffer PA */
+   SVGA_REG_TOP = 50,               /* Must be 1 more than the last register */

   SVGA_PALETTE_BASE = 1024,        /* Base of SVGA color map */
   /* Next 768 (== 256*3) registers exist for colormap */
-
   SVGA_SCRATCH_BASE = SVGA_PALETTE_BASE + SVGA_NUM_PALETTE_REGS
                                    /* Base of scratch registers */
   /* Next reg[SVGA_REG_SCRATCH_SIZE] registers exist for scratch usage:
@@ -170,7 +191,6 @@ enum {
      the use of the current SVGA driver. */
 };

-
 /*
 * Guest memory regions (GMRs):
 *
@@ -279,6 +299,163 @@ struct SVGAGuestPtr {
   uint32 offset;
 } SVGAGuestPtr;

+/*
+ * Register based command buffers --
+ *
+ * Provide an SVGA device interface that allows the guest to submit
+ * command buffers to the SVGA device through an SVGA device register.
+ * The metadata for each command buffer is contained in the
+ * SVGACBHeader structure along with the return status codes.
+ *
+ * The SVGA device supports command buffers if
+ * SVGA_CAP_COMMAND_BUFFERS is set in the device caps register.  The
+ * fifo must be enabled for command buffers to be submitted.
+ *
+ * Command buffers are submitted when the guest writing the 64 byte
+ * aligned physical address into the SVGA_REG_COMMAND_LOW and
+ * SVGA_REG_COMMAND_HIGH.  SVGA_REG_COMMAND_HIGH contains the upper 32
+ * bits of the physical address.  SVGA_REG_COMMAND_LOW contains the
+ * lower 32 bits of the physical address, since the command buffer
+ * headers are required to be 64 byte aligned the lower 6 bits are
+ * used for the SVGACBContext value.  Writing to SVGA_REG_COMMAND_LOW
+ * submits the command buffer to the device and queues it for
+ * execution.  The SVGA device supports at least
+ * SVGA_CB_MAX_QUEUED_PER_CONTEXT command buffers that can be queued
+ * per context and if that limit is reached the device will write the
+ * status SVGA_CB_STATUS_QUEUE_FULL to the status value of the command
+ * buffer header synchronously and not raise any IRQs.
+ *
+ * It is invalid to submit a command buffer without a valid physical
+ * address and results are undefined.
+ *
+ * The device guarantees that command buffers of size SVGA_CB_MAX_SIZE
+ * will be supported.  If a larger command buffer is submitted results
+ * are unspecified and the device will either complete the command
+ * buffer or return an error.
+ *
+ * The device guarantees that any individual command in a command
+ * buffer can be up to SVGA_CB_MAX_COMMAND_SIZE in size which is
+ * enough to fit a 64x64 color-cursor definition.  If the command is
+ * too large the device is allowed to process the command or return an
+ * error.
+ *
+ * The device context is a special SVGACBContext that allows for
+ * synchronous register like accesses with the flexibility of
+ * commands.  There is a different command set defined by
+ * SVGADeviceContextCmdId.  The commands in each command buffer is not
+ * allowed to straddle physical pages.
+ */
+
+#define SVGA_CB_MAX_SIZE (512 * 1024)  // 512 KB
+#define SVGA_CB_MAX_QUEUED_PER_CONTEXT 32
+#define SVGA_CB_MAX_COMMAND_SIZE (32 * 1024) // 32 KB
+
+#define SVGA_CB_CONTEXT_MASK 0x3f
+typedef enum {
+   SVGA_CB_CONTEXT_DEVICE = 0x3f,
+   SVGA_CB_CONTEXT_0      = 0x0,
+   SVGA_CB_CONTEXT_MAX    = 0x1,
+} SVGACBContext;
+
+
+typedef enum {
+   /*
+    * The guest is supposed to write SVGA_CB_STATUS_NONE to the status
+    * field before submitting the command buffer header, the host will
+    * change the value when it is done with the command buffer.
+    */
+   SVGA_CB_STATUS_NONE             = 0,
+
+   /*
+    * Written by the host when a command buffer completes successfully.
+    * The device raises an IRQ with SVGA_IRQFLAG_COMMAND_BUFFER unless
+    * the SVGA_CB_FLAG_NO_IRQ flag is set.
+    */
+   SVGA_CB_STATUS_COMPLETED        = 1,
+
+   /*
+    * Written by the host synchronously with the command buffer
+    * submission to indicate the command buffer was not submitted.  No
+    * IRQ is raised.
+    */
+   SVGA_CB_STATUS_QUEUE_FULL       = 2,
+
+   /*
+    * Written by the host when an error was detected parsing a command
+    * in the command buffer, errorOffset is written to contain the
+    * offset to the first byte of the failing command.  The device
+    * raises the IRQ with both SVGA_IRQFLAG_ERROR and
+    * SVGA_IRQFLAG_COMMAND_BUFFER.  Some of the commands may have been
+    * processed.
+    */
+   SVGA_CB_STATUS_COMMAND_ERROR    = 3,
+
+   /*
+    * Written by the host if there is an error parsing the command
+    * buffer header.  The device raises the IRQ with both
+    * SVGA_IRQFLAG_ERROR and SVGA_IRQFLAG_COMMAND_BUFFER.  The device
+    * did not processes any of the command buffer.
+    */
+   SVGA_CB_STATUS_CB_HEADER_ERROR  = 4,
+
+   /*
+    * Written by the host if the guest requested the host to preempt
+    * the command buffer.  The device will not raise any IRQs and the
+    * command buffer was not processed.
+    */
+   SVGA_CB_STATUS_PREEMPTED        = 5,
+} SVGACBStatus;
+
+typedef enum {
+   SVGA_CB_FLAG_NONE     = 0,
+   SVGA_CB_FLAG_NO_IRQ   = 1 << 0,
+} SVGACBFlags;
+
+typedef
+struct {
+   volatile SVGACBStatus status;
+   volatile uint32 errorOffset;
+   uint64 id;
+   SVGACBFlags flags;
+   uint32 length;
+   union {
+      PA pa;
+   } ptr;
+   uint32 mustBeZero[8];
+} SVGACBHeader;
+
+typedef enum {
+   SVGA_DC_CMD_NOP                   = 0,
+   SVGA_DC_CMD_START_STOP_CONTEXT    = 1,
+   SVGA_DC_CMD_PREEMPT               = 2,
+   SVGA_DC_CMD_MAX                   = 3,
+   SVGA_DC_CMD_FORCE_UINT            = MAX_UINT32,
+} SVGADeviceContextCmdId;
+
+typedef struct {
+   uint32 enable;
+   SVGACBContext context;
+} SVGADCCmdStartStop;
+
+/*
+ * SVGADCCmdPreempt --
+ *
+ * This command allows the guest to request that all command buffers
+ * on the specified context be preempted that can be.  After execution
+ * of this command all command buffers that were preempted will
+ * already have SVGA_CB_STATUS_PREEMPTED written into the status
+ * field.  The device might still be processing a command buffer,
+ * assuming execution of it started before the preemption request was
+ * received.  Specifying the ignoreIDZero flag to TRUE will cause the
+ * device to not preempt command buffers with the id field in the
+ * command buffer header set to zero.
+ */
+
+typedef struct {
+   SVGACBContext context;
+   uint32 ignoreIDZero;
+} SVGADCCmdPreempt;
+

 /*
 * SVGAGMRImageFormat --
@@ -300,8 +477,7 @@ struct SVGAGuestPtr {
 *
 */

-typedef
-struct SVGAGMRImageFormat {
+typedef struct SVGAGMRImageFormat {
   union {
      struct {
         uint32 bitsPerPixel : 8;
@@ -313,6 +489,28 @@ struct SVGAGMRImageFormat {
   };
 } SVGAGMRImageFormat;

+typedef
+struct SVGAGuestImage {
+   SVGAGuestPtr         ptr;
+
+   /*
+    * A note on interpretation of pitch: This value of pitch is the
+    * number of bytes between vertically adjacent image
+    * blocks. Normally this is the number of bytes between the first
+    * pixel of two adjacent scanlines. With compressed textures,
+    * however, this may represent the number of bytes between
+    * compression blocks rather than between rows of pixels.
+    *
+    * XXX: Compressed textures currently must be tightly packed in guest memory.
+    *
+    * If the image is 1-dimensional, pitch is ignored.
+    *
+    * If 'pitch' is zero, the SVGA3D device calculates a pitch value
+    * assuming each row of blocks is tightly packed.
+    */
+   uint32 pitch;
+} SVGAGuestImage;
+
 /*
 * SVGAColorBGRX --
 *
@@ -321,8 +519,7 @@ struct SVGAGMRImageFormat {
 *    GMRFB state.
 */

-typedef
-struct SVGAColorBGRX {
+typedef struct SVGAColorBGRX {
   union {
      struct {
         uint32 b : 8;
@@ -350,7 +547,7 @@ struct SVGAColorBGRX {
 */

 typedef
-struct SVGASignedRect {
+struct {
   int32  left;
   int32  top;
   int32  right;
@@ -358,18 +555,51 @@ struct SVGASignedRect {
 } SVGASignedRect;

 typedef
-struct SVGASignedPoint {
+struct {
   int32  x;
   int32  y;
 } SVGASignedPoint;


 /*
- *  Capabilities
+ * SVGA Device Capabilities
+ *
+ * Note the holes in the bitfield. Missing bits have been deprecated,
+ * and must not be reused. Those capabilities will never be reported
+ * by new versions of the SVGA device.
+ *
+ * XXX: Add longer descriptions for each capability, including a list
+ *      of the new features that each capability provides.
+ *
+ * SVGA_CAP_IRQMASK --
+ *    Provides device interrupts.  Adds device register SVGA_REG_IRQMASK
+ *    to set interrupt mask and direct I/O port SVGA_IRQSTATUS_PORT to
+ *    set/clear pending interrupts.
+ *
+ * SVGA_CAP_GMR --
+ *    Provides synchronous mapping of guest memory regions (GMR).
+ *    Adds device registers SVGA_REG_GMR_ID, SVGA_REG_GMR_DESCRIPTOR,
+ *    SVGA_REG_GMR_MAX_IDS, and SVGA_REG_GMR_MAX_DESCRIPTOR_LENGTH.
+ *
+ * SVGA_CAP_TRACES --
+ *    Allows framebuffer trace-based updates even when FIFO is enabled.
+ *    Adds device register SVGA_REG_TRACES.
+ *
+ * SVGA_CAP_GMR2 --
+ *    Provides asynchronous commands to define and remap guest memory
+ *    regions.  Adds device registers SVGA_REG_GMRS_MAX_PAGES and
+ *    SVGA_REG_MEMORY_SIZE.
+ *
+ * SVGA_CAP_SCREEN_OBJECT_2 --
+ *    Allow screen object support, and require backing stores from the
+ *    guest for each screen object.
+ *
+ * SVGA_CAP_COMMAND_BUFFERS --
+ *    Enable register based command buffer submission.
+ *
+ * SVGA_CAP_GBOBJECTS --
+ *    Enable guest-backed objects and surfaces.
 *
- *  Note the holes in the bitfield. Missing bits have been deprecated,
- *  and must not be reused. Those capabilities will never be reported
- *  by new versions of the SVGA device.
 */

 #define SVGA_CAP_NONE               0x00000000
@@ -387,6 +617,35 @@ struct SVGASignedPoint {
 #define SVGA_CAP_DISPLAY_TOPOLOGY   0x00080000   // Legacy multi-monitor support
 #define SVGA_CAP_GMR                0x00100000
 #define SVGA_CAP_TRACES             0x00200000
+#define SVGA_CAP_GMR2               0x00400000
+#define SVGA_CAP_SCREEN_OBJECT_2    0x00800000
+#define SVGA_CAP_COMMAND_BUFFERS    0x01000000
+#define SVGA_CAP_DEAD1              0x02000000
+#define SVGA_CAP_CMD_BUFFERS_2      0x04000000
+#define SVGA_CAP_GBOBJECTS          0x08000000
+
+
+/*
+ * The Guest can optionally read some SVGA device capabilities through
+ * the backdoor with command BDOOR_CMD_GET_SVGA_CAPABILITIES before
+ * the SVGA device is initialized.  The type of capability the guest
+ * is requesting from the SVGABackdoorCapType enum should be placed in
+ * the upper 16 bits of the backdoor command id (ECX).  On success the
+ * the value of EBX will be set to BDOOR_MAGIC and EAX will be set to
+ * the requested capability.  If the command is not supported then EBX
+ * will be left unchanged and EAX will be set to -1.  Because it is
+ * possible that -1 is the value of the requested cap the correct way
+ * to check if the command was successful is to check if EBX was changed
+ * to BDOOR_MAGIC making sure to initialize the register to something
+ * else first.
+ */
+
+typedef enum {
+   SVGABackdoorCapDeviceCaps = 0,
+   SVGABackdoorCapFifoCaps = 1,
+   SVGABackdoorCap3dHWVersion = 2,
+   SVGABackdoorCapMax = 3,
+} SVGABackdoorCapType;


 /*
@@ -443,7 +702,7 @@ enum {
    * These in block 3a, the VMX currently considers mandatory for the
    * extended FIFO.
    */
-
+   
   // Valid if exists (i.e. if extended FIFO enabled):
   SVGA_FIFO_3D_HWVERSION,       /* See SVGA3dHardwareVersion in svga3d_reg.h */
   // Valid with SVGA_FIFO_CAP_PITCHLOCK:
@@ -460,16 +719,30 @@ enum {
   SVGA_FIFO_RESERVED,           /* Bytes past NEXT_CMD with real contents */

   /*
-    * Valid with SVGA_FIFO_CAP_SCREEN_OBJECT:
+    * Valid with SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2:
    *
    * By default this is SVGA_ID_INVALID, to indicate that the cursor
    * coordinates are specified relative to the virtual root. If this
    * is set to a specific screen ID, cursor position is reinterpreted
-    * as a signed offset relative to that screen's origin. This is the
-    * only way to place the cursor on a non-rooted screen.
+    * as a signed offset relative to that screen's origin.
    */
   SVGA_FIFO_CURSOR_SCREEN_ID,

+   /*
+    * Valid with SVGA_FIFO_CAP_DEAD
+    *
+    * An arbitrary value written by the host, drivers should not use it.
+    */
+   SVGA_FIFO_DEAD,
+
+   /*
+    * Valid with SVGA_FIFO_CAP_3D_HWVERSION_REVISED:
+    *
+    * Contains 3D HWVERSION (see SVGA3dHardwareVersion in svga3d_reg.h)
+    * on platforms that can enforce graphics resource limits.
+    */
+   SVGA_FIFO_3D_HWVERSION_REVISED,
+
   /*
    * XXX: The gap here, up until SVGA_FIFO_3D_CAPS, can be used for new
    * registers, but this must be done carefully and with judicious use of
@@ -709,6 +982,37 @@ enum {
 *
 *       - When a screen is resized, either using Screen Object commands or
 *         legacy multimon registers, its contents are preserved.
+ *
+ * SVGA_FIFO_CAP_GMR2 --
+ *
+ *    Provides new commands to define and remap guest memory regions (GMR).
+ *
+ *    New 2D commands:
+ *       DEFINE_GMR2, REMAP_GMR2.
+ *
+ * SVGA_FIFO_CAP_3D_HWVERSION_REVISED --
+ *
+ *    Indicates new register SVGA_FIFO_3D_HWVERSION_REVISED exists.
+ *    This register may replace SVGA_FIFO_3D_HWVERSION on platforms
+ *    that enforce graphics resource limits.  This allows the platform
+ *    to clear SVGA_FIFO_3D_HWVERSION and disable 3D in legacy guest
+ *    drivers that do not limit their resources.
+ *
+ *    Note this is an alias to SVGA_FIFO_CAP_GMR2 because these indicators
+ *    are codependent (and thus we use a single capability bit).
+ *
+ * SVGA_FIFO_CAP_SCREEN_OBJECT_2 --
+ *
+ *    Modifies the DEFINE_SCREEN command to include a guest provided
+ *    backing store in GMR memory and the bytesPerLine for the backing
+ *    store.  This capability requires the use of a backing store when
+ *    creating screen objects.  However if SVGA_FIFO_CAP_SCREEN_OBJECT
+ *    is present then backing stores are optional.
+ *
+ * SVGA_FIFO_CAP_DEAD --
+ *
+ *    Drivers should not use this cap bit.  This cap bit can not be
+ *    reused since some hosts already expose it.
 */

 #define SVGA_FIFO_CAP_NONE                  0
@@ -720,6 +1024,10 @@ enum {
 #define SVGA_FIFO_CAP_ESCAPE            (1<<5)
 #define SVGA_FIFO_CAP_RESERVE           (1<<6)
 #define SVGA_FIFO_CAP_SCREEN_OBJECT     (1<<7)
+#define SVGA_FIFO_CAP_GMR2              (1<<8)
+#define SVGA_FIFO_CAP_3D_HWVERSION_REVISED  SVGA_FIFO_CAP_GMR2
+#define SVGA_FIFO_CAP_SCREEN_OBJECT_2   (1<<9)
+#define SVGA_FIFO_CAP_DEAD              (1<<10)


 /*
@@ -814,6 +1122,24 @@ typedef struct SVGAOverlayUnit {
 } SVGAOverlayUnit;


+/*
+ * Guest display topology
+ *
+ * XXX: This structure is not part of the SVGA device's interface, and
+ * doesn't really belong here.
+ */
+#define SVGA_INVALID_DISPLAY_ID ((uint32)-1)
+
+typedef struct SVGADisplayTopology {
+   uint16 displayId;
+   uint16 isPrimary;
+   uint32 width;
+   uint32 height;
+   uint32 positionX;
+   uint32 positionY;
+} SVGADisplayTopology;
+
+
 /*
 * SVGAScreenObject --
 *
@@ -829,14 +1155,50 @@ typedef struct SVGAOverlayUnit {
 *    compatibility. New flags can be added, and the struct may grow,
 *    but existing fields must retain their meaning.
 *
+ *    Added with SVGA_FIFO_CAP_SCREEN_OBJECT_2 are required fields of
+ *    a SVGAGuestPtr that is used to back the screen contents.  This
+ *    memory must come from the GFB.  The guest is not allowed to
+ *    access the memory and doing so will have undefined results.  The
+ *    backing store is required to be page aligned and the size is
+ *    padded to the next page boundry.  The number of pages is:
+ *       (bytesPerLine * size.width * 4 + PAGE_SIZE - 1) / PAGE_SIZE
+ *
+ *    The pitch in the backingStore is required to be at least large
+ *    enough to hold a 32bbp scanline.  It is recommended that the
+ *    driver pad bytesPerLine for a potential performance win.
+ *
+ *    The cloneCount field is treated as a hint from the guest that
+ *    the user wants this display to be cloned, countCount times.  A
+ *    value of zero means no cloning should happen.
 */

-#define SVGA_SCREEN_HAS_ROOT    (1 << 0)  // Screen is present in the virtual coord space
-#define SVGA_SCREEN_IS_PRIMARY  (1 << 1)  // Guest considers this screen to be 'primary'
-#define SVGA_SCREEN_FULLSCREEN_HINT (1 << 2)   // Guest is running a fullscreen app here
+#define SVGA_SCREEN_MUST_BE_SET     (1 << 0) // Must be set or results undefined
+#define SVGA_SCREEN_HAS_ROOT SVGA_SCREEN_MUST_BE_SET // Deprecated
+#define SVGA_SCREEN_IS_PRIMARY      (1 << 1) // Guest considers this screen to be 'primary'
+#define SVGA_SCREEN_FULLSCREEN_HINT (1 << 2)  // Guest is running a fullscreen app here
+
+/*
+ * Added with SVGA_FIFO_CAP_SCREEN_OBJECT_2.  When the screen is
+ * deactivated the base layer is defined to lose all contents and
+ * become black.  When a screen is deactivated the backing store is
+ * optional.  When set backingPtr and bytesPerLine will be ignored.
+ */
+#define SVGA_SCREEN_DEACTIVATE  (1 << 3)
+
+/*
+ * Added with SVGA_FIFO_CAP_SCREEN_OBJECT_2.  When this flag is set
+ * the screen contents will be outputted as all black to the user
+ * though the base layer contents is preserved.  The screen base layer
+ * can still be read and written to like normal though the no visible
+ * effect will be seen by the user.  When the flag is changed the
+ * screen will be blanked or redrawn to the current contents as needed
+ * without any extra commands from the driver.  This flag only has an
+ * effect when the screen is not deactivated.
+ */
+#define SVGA_SCREEN_BLANKING (1 << 4)

 typedef
-struct SVGAScreenObject {
+struct {
   uint32 structSize;   // sizeof(SVGAScreenObject)
   uint32 id;
   uint32 flags;
@@ -847,7 +1209,14 @@ struct SVGAScreenObject {
   struct {
      int32 x;
      int32 y;
-   } root;              // Only used if SVGA_SCREEN_HAS_ROOT is set.
+   } root;
+
+   /*
+    * Added and required by SVGA_FIFO_CAP_SCREEN_OBJECT_2, optional
+    * with SVGA_FIFO_CAP_SCREEN_OBJECT.
+    */
+   SVGAGuestImage backingStore;
+   uint32 cloneCount;
 } SVGAScreenObject;


@@ -872,6 +1241,7 @@ typedef enum {
   SVGA_CMD_INVALID_CMD           = 0,
   SVGA_CMD_UPDATE                = 1,
   SVGA_CMD_RECT_COPY             = 3,
+   SVGA_CMD_RECT_ROP_COPY         = 14,
   SVGA_CMD_DEFINE_CURSOR         = 19,
   SVGA_CMD_DEFINE_ALPHA_CURSOR   = 22,
   SVGA_CMD_UPDATE_VERBOSE        = 25,
@@ -885,10 +1255,16 @@ typedef enum {
   SVGA_CMD_BLIT_SCREEN_TO_GMRFB  = 38,
   SVGA_CMD_ANNOTATION_FILL       = 39,
   SVGA_CMD_ANNOTATION_COPY       = 40,
+   SVGA_CMD_DEFINE_GMR2           = 41,
+   SVGA_CMD_REMAP_GMR2            = 42,
+   SVGA_CMD_DEAD                  = 43,
+   SVGA_CMD_DEAD_2                = 44,
   SVGA_CMD_MAX
 } SVGAFifoCmdId;

+#define SVGA_CMD_MAX_DATASIZE       (256 * 1024)
 #define SVGA_CMD_MAX_ARGS           64
+#define SVGA_CB_MAX_COMMAND_SIZE (32 * 1024) // 32 KB


 /*
@@ -949,6 +1325,30 @@ struct {
 } SVGAFifoCmdRectCopy;


+/*
+ * SVGA_CMD_RECT_ROP_COPY --
+ *
+ *    Perform a rectangular DMA transfer from one area of the GFB to
+ *    another, and copy the result to any screens which intersect it.
+ *    The value of ROP may only be SVGA_ROP_COPY, and this command is
+ *    only supported for backwards compatibility reasons.
+ *
+ * Availability:
+ *    SVGA_CAP_RECT_COPY
+ */
+
+typedef
+struct {
+   uint32 srcX;
+   uint32 srcY;
+   uint32 destX;
+   uint32 destY;
+   uint32 width;
+   uint32 height;
+   uint32 rop;
+} SVGAFifoCmdRectRopCopy;
+
+
 /*
 * SVGA_CMD_DEFINE_CURSOR --
 *
@@ -1113,7 +1513,7 @@ struct {
 *    registers (SVGA_REG_NUM_GUEST_DISPLAYS, SVGA_REG_DISPLAY_*).
 *
 * Availability:
- *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2
 */

 typedef
@@ -1129,7 +1529,7 @@ struct {
 *    re-use.
 *
 * Availability:
- *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2
 */

 typedef
@@ -1182,7 +1582,7 @@ struct {
 *    GMRFB.
 *
 * Availability:
- *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2
 */

 typedef
@@ -1219,7 +1619,7 @@ struct {
 *    SVGA_CMD_ANNOTATION_* commands for details.
 *
 * Availability:
- *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2
 */

 typedef
@@ -1267,7 +1667,7 @@ struct {
 *    the time any subsequent FENCE commands are reached.
 *
 * Availability:
- *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2
 */

 typedef
@@ -1302,7 +1702,7 @@ struct {
 *    user's display is being remoted over a network connection.
 *
 * Availability:
- *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2
 */

 typedef
@@ -1334,7 +1734,7 @@ struct {
 *    undefined.
 *
 * Availability:
- *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2
 */

 typedef
@@ -1343,4 +1743,113 @@ struct {
   uint32           srcScreenId;
 } SVGAFifoCmdAnnotationCopy;

+
+/*
+ * SVGA_CMD_DEFINE_GMR2 --
+ *
+ *    Define guest memory region v2.  See the description of GMRs above.
+ *
+ * Availability:
+ *    SVGA_CAP_GMR2
+ */
+
+typedef
+struct {
+   uint32 gmrId;
+   uint32 numPages;
+} SVGAFifoCmdDefineGMR2;
+
+
+/*
+ * SVGA_CMD_REMAP_GMR2 --
+ *
+ *    Remap guest memory region v2.  See the description of GMRs above.
+ *
+ *    This command allows guest to modify a portion of an existing GMR by
+ *    invalidating it or reassigning it to different guest physical pages.
+ *    The pages are identified by physical page number (PPN).  The pages
+ *    are assumed to be pinned and valid for DMA operations.
+ *
+ *    Description of command flags:
+ *
+ *    SVGA_REMAP_GMR2_VIA_GMR: If enabled, references a PPN list in a GMR.
+ *       The PPN list must not overlap with the remap region (this can be
+ *       handled trivially by referencing a separate GMR).  If flag is
+ *       disabled, PPN list is appended to SVGARemapGMR command.
+ *
+ *    SVGA_REMAP_GMR2_PPN64: If set, PPN list is in PPN64 format, otherwise
+ *       it is in PPN32 format.
+ *
+ *    SVGA_REMAP_GMR2_SINGLE_PPN: If set, PPN list contains a single entry.
+ *       A single PPN can be used to invalidate a portion of a GMR or
+ *       map it to to a single guest scratch page.
+ *
+ * Availability:
+ *    SVGA_CAP_GMR2
+ */
+
+typedef enum {
+   SVGA_REMAP_GMR2_PPN32         = 0,
+   SVGA_REMAP_GMR2_VIA_GMR       = (1 << 0),
+   SVGA_REMAP_GMR2_PPN64         = (1 << 1),
+   SVGA_REMAP_GMR2_SINGLE_PPN    = (1 << 2),
+} SVGARemapGMR2Flags;
+
+typedef
+struct {
+   uint32 gmrId;
+   SVGARemapGMR2Flags flags;
+   uint32 offsetPages; // offset in pages to begin remap
+   uint32 numPages; // number of pages to remap
+   /*
+    * Followed by additional data depending on SVGARemapGMR2Flags.
+    *
+    * If flag SVGA_REMAP_GMR2_VIA_GMR is set, single SVGAGuestPtr follows.
+    * Otherwise an array of page descriptors in PPN32 or PPN64 format
+    * (according to flag SVGA_REMAP_GMR2_PPN64) follows.  If flag
+    * SVGA_REMAP_GMR2_SINGLE_PPN is set, array contains a single entry.
+    */
+} SVGAFifoCmdRemapGMR2;
+
+
+/*
+ * Size of SVGA device memory such as frame buffer and FIFO.
+ */
+#define SVGA_VRAM_MIN_SIZE             (4 * 640 * 480) // bytes
+#define SVGA_VRAM_MIN_SIZE_3D       (16 * 1024 * 1024)
+#define SVGA_VRAM_MAX_SIZE         (128 * 1024 * 1024)
+#define SVGA_MEMORY_SIZE_MAX      (1024 * 1024 * 1024)
+#define SVGA_FIFO_SIZE_MAX           (2 * 1024 * 1024)
+#define SVGA_GRAPHICS_MEMORY_KB_MIN       (32 * 1024)
+#define SVGA_GRAPHICS_MEMORY_KB_MAX       (2 * 1024 * 1024)
+#define SVGA_GRAPHICS_MEMORY_KB_DEFAULT   (256 * 1024)
+
+#define SVGA_VRAM_SIZE_W2K          (64 * 1024 * 1024) // 64 MB
+
+/*
+ * To simplify autoDetect display configuration, support a minimum of
+ * two 1920x1200 monitors, 32bpp, side-by-side, optionally rotated:
+ *   numDisplays = 2
+ *   maxWidth = numDisplay * 1920 = 3840
+ *   maxHeight = rotated width of single monitor = 1920
+ *   vramSize = maxWidth * maxHeight * 4 = 29491200
+ */
+#define SVGA_VRAM_SIZE_AUTODETECT   (32 * 1024 * 1024)
+
+#if defined(VMX86_SERVER)
+#define SVGA_VRAM_SIZE               (4 * 1024 * 1024)
+#define SVGA_VRAM_SIZE_3D           (64 * 1024 * 1024)
+#define SVGA_FIFO_SIZE                    (256 * 1024)
+#define SVGA_FIFO_SIZE_3D                 (516 * 1024) // Bump to 516KB to workaround WDDM driver issue (see bug# 744318)
+#define SVGA_MEMORY_SIZE_DEFAULT   (160 * 1024 * 1024)
+#define SVGA_AUTODETECT_DEFAULT                  FALSE
+#else
+#define SVGA_VRAM_SIZE              (16 * 1024 * 1024)
+#define SVGA_VRAM_SIZE_3D           SVGA_VRAM_MAX_SIZE
+#define SVGA_FIFO_SIZE               (2 * 1024 * 1024)
+#define SVGA_FIFO_SIZE_3D               SVGA_FIFO_SIZE
+#define SVGA_MEMORY_SIZE_DEFAULT   (768 * 1024 * 1024)
+#define SVGA_AUTODETECT_DEFAULT                   TRUE
+#endif
+
 #endif
--- a/src/gallium/drivers/svga/include/svga_types.h
+++ b/src/gallium/drivers/svga/include/svga_types.h
@@ -1,5 +1,5 @@
 /**********************************************************
- * Copyright 1998-2009 VMware, Inc.  All rights reserved.
+ * Copyright 1998-2014 VMware, Inc.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
@@ -46,5 +46,12 @@ typedef uint8_t uint8;

 typedef uint8_t Bool;

+typedef uint64    PA;
+typedef uint32    PPN;
+typedef uint64    PPN64;
+
+#undef MAX_UINT32
+#define MAX_UINT32 0xffffffffU
+
 #endif /* _SVGA_TYPES_H_ */

--- a/src/gallium/drivers/svga/include/vmware_pack_begin.h
+++ b/src/gallium/drivers/svga/include/vmware_pack_begin.h
@@ -0,0 +1 @@
+/* dummy file */
--- a/src/gallium/drivers/svga/include/vmware_pack_end.h
+++ b/src/gallium/drivers/svga/include/vmware_pack_end.h
@@ -0,0 +1 @@
+/* dummy file */
--- a/src/gallium/drivers/svga/svga_cmd.c
+++ b/src/gallium/drivers/svga/svga_cmd.c
@@ -45,6 +45,9 @@
 *      Can handle null surface. Does a surface_reallocation so you need
 *      to have allocated the fifo space before converting.
 *
+ *
+ * param flags  mask of SVGA_RELOC_READ / _WRITE
+ *
 * Results:
 *      id is filled out.
 *
@@ -62,12 +65,12 @@ surface_to_surfaceid(struct svga_winsys_context *swc, // IN
 {
   if (surface) {
      struct svga_surface *s = svga_surface(surface);
-      swc->surface_relocation(swc, &id->sid, s->handle, flags);
+      swc->surface_relocation(swc, &id->sid, NULL, s->handle, flags);
      id->face = s->real_face; /* faces have the same order */
      id->mipmap = s->real_level;
   }
   else {
-      swc->surface_relocation(swc, &id->sid, NULL, flags);
+      swc->surface_relocation(swc, &id->sid, NULL, NULL, flags);
      id->face = 0;
      id->mipmap = 0;
   }
@@ -280,7 +283,8 @@ SVGA3D_BeginDefineSurface(struct svga_winsys_context *swc,
   if (!cmd)
      return PIPE_ERROR_OUT_OF_MEMORY;

-   swc->surface_relocation(swc, &cmd->sid, sid, SVGA_RELOC_WRITE);
+   swc->surface_relocation(swc, &cmd->sid, NULL, sid,
+                           SVGA_RELOC_WRITE | SVGA_RELOC_INTERNAL);
   cmd->surfaceFlags = flags;
   cmd->format = format;

@@ -365,8 +369,9 @@ SVGA3D_DestroySurface(struct svga_winsys_context *swc,
                            SVGA_3D_CMD_SURFACE_DESTROY, sizeof *cmd, 1);
   if (!cmd)
      return PIPE_ERROR_OUT_OF_MEMORY;
-
-   swc->surface_relocation(swc, &cmd->sid, sid, SVGA_RELOC_READ);
+   
+   swc->surface_relocation(swc, &cmd->sid, NULL, sid,
+                           SVGA_RELOC_WRITE | SVGA_RELOC_INTERNAL);
   swc->commit(swc);;

   return PIPE_OK;
@@ -453,7 +458,8 @@ SVGA3D_SurfaceDMA(struct svga_winsys_context *swc,
   swc->region_relocation(swc, &cmd->guest.ptr, st->hwbuf, 0, region_flags);
   cmd->guest.pitch = st->base.stride;

-   swc->surface_relocation(swc, &cmd->host.sid, texture->handle, surface_flags);
+   swc->surface_relocation(swc, &cmd->host.sid, NULL,
+                           texture->handle, surface_flags);
   cmd->host.face = st->face; /* PIPE_TEX_FACE_* and SVGA3D_CUBEFACE_* match */
   cmd->host.mipmap = st->base.level;

@@ -487,6 +493,8 @@ SVGA3D_BufferDMA(struct svga_winsys_context *swc,
   SVGA3dCmdSurfaceDMASuffix *pSuffix;
   unsigned region_flags;
   unsigned surface_flags;
+   
+   assert(!swc->have_gb_objects);

   if (transfer == SVGA3D_WRITE_HOST_VRAM) {
      region_flags = SVGA_RELOC_READ;
@@ -511,7 +519,8 @@ SVGA3D_BufferDMA(struct svga_winsys_context *swc,
   swc->region_relocation(swc, &cmd->guest.ptr, guest, 0, region_flags);
   cmd->guest.pitch = 0;

-   swc->surface_relocation(swc, &cmd->host.sid, host, surface_flags);
+   swc->surface_relocation(swc, &cmd->host.sid,
+                           NULL, host, surface_flags);
   cmd->host.face = 0;
   cmd->host.mipmap = 0;

@@ -1347,6 +1356,44 @@ SVGA3D_BeginSetRenderState(struct svga_winsys_context *swc,
 }


+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_BeginGBQuery--
+ *
+ *      GB resource version of SVGA3D_BeginQuery.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      Commits space in the FIFO memory.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static enum pipe_error
+SVGA3D_BeginGBQuery(struct svga_winsys_context *swc,
+		    SVGA3dQueryType type) // IN
+{
+   SVGA3dCmdBeginGBQuery *cmd;
+
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_BEGIN_GB_QUERY,
+                            sizeof *cmd,
+                            1);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->context_relocation(swc, &cmd->cid);
+   cmd->type = type;
+
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
 /*
 *----------------------------------------------------------------------
 *
@@ -1369,6 +1416,9 @@ SVGA3D_BeginQuery(struct svga_winsys_context *swc,
 {
   SVGA3dCmdBeginQuery *cmd;

+   if (swc->have_gb_objects)
+      return SVGA3D_BeginGBQuery(swc, type);
+
   cmd = SVGA3D_FIFOReserve(swc,
                            SVGA_3D_CMD_BEGIN_QUERY,
                            sizeof *cmd,
@@ -1385,6 +1435,48 @@ SVGA3D_BeginQuery(struct svga_winsys_context *swc,
 }


+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_EndGBQuery--
+ *
+ *      GB resource version of SVGA3D_EndQuery.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      Commits space in the FIFO memory.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static enum pipe_error
+SVGA3D_EndGBQuery(struct svga_winsys_context *swc,
+		  SVGA3dQueryType type,              // IN
+		  struct svga_winsys_buffer *buffer) // IN/OUT
+{
+   SVGA3dCmdEndGBQuery *cmd;
+
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_END_GB_QUERY,
+                            sizeof *cmd,
+                            2);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->context_relocation(swc, &cmd->cid);
+   cmd->type = type;
+
+   swc->mob_relocation(swc, &cmd->mobid, &cmd->offset, buffer,
+		       0, SVGA_RELOC_READ | SVGA_RELOC_WRITE);
+
+   swc->commit(swc);
+   
+   return PIPE_OK;
+}
+
+
 /*
 *----------------------------------------------------------------------
 *
@@ -1408,6 +1500,9 @@ SVGA3D_EndQuery(struct svga_winsys_context *swc,
 {
   SVGA3dCmdEndQuery *cmd;

+   if (swc->have_gb_objects)
+      return SVGA3D_EndGBQuery(swc, type, buffer);
+
   cmd = SVGA3D_FIFOReserve(swc,
                            SVGA_3D_CMD_END_QUERY,
                            sizeof *cmd,
@@ -1419,7 +1514,49 @@ SVGA3D_EndQuery(struct svga_winsys_context *swc,
   cmd->type = type;

   swc->region_relocation(swc, &cmd->guestResult, buffer, 0,
-                          SVGA_RELOC_WRITE);
+                          SVGA_RELOC_READ | SVGA_RELOC_WRITE);
+
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_WaitForGBQuery--
+ *
+ *      GB resource version of SVGA3D_WaitForQuery.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      Commits space in the FIFO memory.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static enum pipe_error
+SVGA3D_WaitForGBQuery(struct svga_winsys_context *swc,
+		      SVGA3dQueryType type,              // IN
+		      struct svga_winsys_buffer *buffer) // IN/OUT
+{
+   SVGA3dCmdWaitForGBQuery *cmd;
+
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_WAIT_FOR_GB_QUERY,
+                            sizeof *cmd,
+                            2);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->context_relocation(swc, &cmd->cid);
+   cmd->type = type;
+
+   swc->mob_relocation(swc, &cmd->mobid, &cmd->offset, buffer,
+		       0, SVGA_RELOC_READ | SVGA_RELOC_WRITE);

   swc->commit(swc);

@@ -1455,6 +1592,9 @@ SVGA3D_WaitForQuery(struct svga_winsys_context *swc,
 {
   SVGA3dCmdWaitForQuery *cmd;

+   if (swc->have_gb_objects)
+      return SVGA3D_WaitForGBQuery(swc, type, buffer);
+
   cmd = SVGA3D_FIFOReserve(swc,
                            SVGA_3D_CMD_WAIT_FOR_QUERY,
                            sizeof *cmd,
@@ -1466,7 +1606,415 @@ SVGA3D_WaitForQuery(struct svga_winsys_context *swc,
   cmd->type = type;

   swc->region_relocation(swc, &cmd->guestResult, buffer, 0,
-                          SVGA_RELOC_WRITE);
+                          SVGA_RELOC_READ | SVGA_RELOC_WRITE);
+
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_DefineGBShader(struct svga_winsys_context *swc,
+                      struct svga_winsys_gb_shader *gbshader,
+                      SVGA3dShaderType type,
+                      uint32 sizeInBytes)
+{
+   SVGA3dCmdDefineGBShader *cmd;
+
+   assert(sizeInBytes % 4 == 0);
+   assert(type == SVGA3D_SHADERTYPE_VS ||
+          type == SVGA3D_SHADERTYPE_PS);
+
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_DEFINE_GB_SHADER,
+                            sizeof *cmd,
+                            1); /* one relocation */
+
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->shader_relocation(swc, &cmd->shid, NULL, NULL, gbshader);
+   cmd->type = type;
+   cmd->sizeInBytes = sizeInBytes;
+
+   swc->commit(swc);
+   
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_BindGBShader(struct svga_winsys_context *swc,
+                    struct svga_winsys_gb_shader *gbshader)
+{
+   SVGA3dCmdBindGBShader *cmd = 
+      SVGA3D_FIFOReserve(swc,
+                         SVGA_3D_CMD_BIND_GB_SHADER,
+                         sizeof *cmd,
+                         1);  /* one relocation */
+
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->shader_relocation(swc, &cmd->shid, &cmd->mobid,
+			  &cmd->offsetInBytes, gbshader);
+
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_SetGBShader(struct svga_winsys_context *swc,
+                   SVGA3dShaderType type,  // IN
+                   struct svga_winsys_gb_shader *gbshader)
+{
+   SVGA3dCmdSetShader *cmd;
+   
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_SET_SHADER,
+                            sizeof *cmd,
+                            2);  /* two relocations */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+   
+   swc->context_relocation(swc, &cmd->cid);
+   cmd->type = type;
+   if (gbshader)
+      swc->shader_relocation(swc, &cmd->shid, NULL, NULL, gbshader);
+   else
+      cmd->shid = SVGA_ID_INVALID;
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_DestroyGBShader(struct svga_winsys_context *swc,
+                       struct svga_winsys_gb_shader *gbshader)
+{
+   SVGA3dCmdDestroyGBShader *cmd = 
+      SVGA3D_FIFOReserve(swc,
+                         SVGA_3D_CMD_DESTROY_GB_SHADER,
+                         sizeof *cmd,
+                         1); /* one relocation */
+
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->shader_relocation(swc, &cmd->shid, NULL, NULL, gbshader);
+
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+/**
+ * \param flags  mask of SVGA_RELOC_READ / _WRITE
+ */
+enum pipe_error
+SVGA3D_BindGBSurface(struct svga_winsys_context *swc,
+                     struct svga_winsys_surface *surface)
+{
+   SVGA3dCmdBindGBSurface *cmd = 
+      SVGA3D_FIFOReserve(swc,
+                         SVGA_3D_CMD_BIND_GB_SURFACE,
+                         sizeof *cmd,
+                         2);  /* two relocations */
+
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->surface_relocation(swc, &cmd->sid, &cmd->mobid, surface,
+                           SVGA_RELOC_READ | SVGA_RELOC_INTERNAL);
+
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_DefineGBContext(struct svga_winsys_context *swc)
+{
+   SVGA3dCmdDefineGBContext *cmd = 
+      SVGA3D_FIFOReserve(swc,
+                         SVGA_3D_CMD_DEFINE_GB_CONTEXT,
+                         sizeof *cmd,
+                         1);  /* one relocation */
+
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->context_relocation(swc, &cmd->cid);
+
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_DestroyGBContext(struct svga_winsys_context *swc)
+{
+   SVGA3dCmdDestroyGBContext *cmd = 
+      SVGA3D_FIFOReserve(swc,
+                         SVGA_3D_CMD_DESTROY_GB_CONTEXT,
+                         sizeof *cmd,
+                         1);  /* one relocation */
+
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->context_relocation(swc, &cmd->cid);
+
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_BindGBContext(struct svga_winsys_context *swc)
+{
+   SVGA3dCmdBindGBContext *cmd = 
+      SVGA3D_FIFOReserve(swc,
+                         SVGA_3D_CMD_BIND_GB_CONTEXT,
+                         sizeof *cmd,
+                         2);  /* two relocations */
+
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->context_relocation(swc, &cmd->cid);
+   swc->context_relocation(swc, &cmd->mobid);
+   cmd->validContents = 0;  /* XXX pass as a parameter? */
+
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_InvalidateGBContext(struct svga_winsys_context *swc)
+{
+   SVGA3dCmdInvalidateGBContext *cmd =
+      SVGA3D_FIFOReserve(swc,
+                         SVGA_3D_CMD_INVALIDATE_GB_CONTEXT,
+                         sizeof *cmd,
+                         1);  /* one relocation */
+
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->context_relocation(swc, &cmd->cid);
+
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+
+/**
+ * Update an image in a guest-backed surface.
+ * (Inform the device that the guest-contents have been updated.)
+ */
+enum pipe_error
+SVGA3D_UpdateGBImage(struct svga_winsys_context *swc,
+                     struct svga_winsys_surface *surface,
+                     const SVGA3dBox *box,
+                     unsigned face, unsigned mipLevel)
+
+{
+   SVGA3dCmdUpdateGBImage *cmd =
+      SVGA3D_FIFOReserve(swc,
+                         SVGA_3D_CMD_UPDATE_GB_IMAGE,
+                         sizeof *cmd,
+                         1);  /* one relocation */
+
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->surface_relocation(swc, &cmd->image.sid, NULL, surface,
+                           SVGA_RELOC_WRITE | SVGA_RELOC_INTERNAL);
+   cmd->image.face = face;
+   cmd->image.mipmap = mipLevel;
+   cmd->box = *box;
+
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+/**
+ * Update an entire guest-backed surface.
+ * (Inform the device that the guest-contents have been updated.)
+ */
+enum pipe_error
+SVGA3D_UpdateGBSurface(struct svga_winsys_context *swc,
+                       struct svga_winsys_surface *surface)
+{
+   SVGA3dCmdUpdateGBSurface *cmd =
+      SVGA3D_FIFOReserve(swc,
+                         SVGA_3D_CMD_UPDATE_GB_SURFACE,
+                         sizeof *cmd,
+                         1);  /* one relocation */
+
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->surface_relocation(swc, &cmd->sid, NULL, surface,
+                           SVGA_RELOC_WRITE | SVGA_RELOC_INTERNAL);
+
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+/**
+ * Readback an image in a guest-backed surface.
+ * (Request the device to flush the dirty contents into the guest.)
+ */
+enum pipe_error
+SVGA3D_ReadbackGBImage(struct svga_winsys_context *swc,
+                       struct svga_winsys_surface *surface,
+                       unsigned face, unsigned mipLevel)
+{
+   SVGA3dCmdReadbackGBImage *cmd =
+      SVGA3D_FIFOReserve(swc,
+                         SVGA_3D_CMD_READBACK_GB_IMAGE,
+                         sizeof *cmd,
+                         1);  /* one relocation */
+
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->surface_relocation(swc, &cmd->image.sid, NULL, surface,
+                           SVGA_RELOC_READ | SVGA_RELOC_INTERNAL);
+   cmd->image.face = face;
+   cmd->image.mipmap = mipLevel;
+
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+/**
+ * Readback an entire guest-backed surface.
+ * (Request the device to flush the dirty contents into the guest.)
+ */
+enum pipe_error
+SVGA3D_ReadbackGBSurface(struct svga_winsys_context *swc,
+                         struct svga_winsys_surface *surface)
+{
+   SVGA3dCmdReadbackGBSurface *cmd =
+      SVGA3D_FIFOReserve(swc,
+                         SVGA_3D_CMD_READBACK_GB_SURFACE,
+                         sizeof *cmd,
+                         1);  /* one relocation */
+
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->surface_relocation(swc, &cmd->sid, NULL, surface,
+                           SVGA_RELOC_READ | SVGA_RELOC_INTERNAL);
+
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_ReadbackGBImagePartial(struct svga_winsys_context *swc,
+                              struct svga_winsys_surface *surface,
+                              unsigned face, unsigned mipLevel,
+                              const SVGA3dBox *box,
+                              bool invertBox)
+{
+   SVGA3dCmdReadbackGBImagePartial *cmd =
+      SVGA3D_FIFOReserve(swc,
+                         SVGA_3D_CMD_READBACK_GB_IMAGE_PARTIAL,
+                         sizeof *cmd,
+                         1);  /* one relocation */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->surface_relocation(swc, &cmd->image.sid, NULL, surface,
+                           SVGA_RELOC_READ | SVGA_RELOC_INTERNAL);
+   cmd->image.face = face;
+   cmd->image.mipmap = mipLevel;
+   cmd->box = *box;
+   cmd->invertBox = invertBox;
+
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_InvalidateGBImagePartial(struct svga_winsys_context *swc,
+                                struct svga_winsys_surface *surface,
+                                unsigned face, unsigned mipLevel,
+                                const SVGA3dBox *box,
+                                bool invertBox)
+{
+   SVGA3dCmdInvalidateGBImagePartial *cmd =
+      SVGA3D_FIFOReserve(swc,
+                         SVGA_3D_CMD_INVALIDATE_GB_IMAGE_PARTIAL,
+                         sizeof *cmd,
+                         1);  /* one relocation */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->surface_relocation(swc, &cmd->image.sid, NULL, surface,
+                           SVGA_RELOC_READ | SVGA_RELOC_INTERNAL);
+   cmd->image.face = face;
+   cmd->image.mipmap = mipLevel;
+   cmd->box = *box;
+   cmd->invertBox = invertBox;
+
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_SetGBShaderConstsInline(struct svga_winsys_context *swc,
+                              unsigned regStart,
+                              unsigned numRegs,
+                              SVGA3dShaderType shaderType,
+                              SVGA3dShaderConstType constType,
+                              const void *values)
+{
+   SVGA3dCmdSetGBShaderConstInline *cmd;
+
+   assert(numRegs > 0);
+
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_SET_GB_SHADERCONSTS_INLINE,
+                            sizeof *cmd + numRegs * sizeof(float[4]),
+                            0); /* no relocations */
+   if (!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->cid = swc->cid;
+   cmd->regStart = regStart;
+   cmd->shaderType = shaderType;
+   cmd->constType = constType;
+
+   memcpy(&cmd[1], values, numRegs * sizeof(float[4]));

   swc->commit(swc);

--- a/src/gallium/drivers/svga/svga_cmd.h
+++ b/src/gallium/drivers/svga/svga_cmd.h
@@ -46,6 +46,7 @@ struct svga_transfer;
 struct svga_winsys_context;
 struct svga_winsys_buffer;
 struct svga_winsys_surface;
+struct svga_winsys_gb_shader;


 /*
@@ -223,6 +224,91 @@ SVGA3D_SetShader(struct svga_winsys_context *swc,
                 SVGA3dShaderType type, uint32 shid);


+/*
+ * Guest-backed surface functions
+ */
+
+enum pipe_error
+SVGA3D_DefineGBShader(struct svga_winsys_context *swc,
+                      struct svga_winsys_gb_shader *gbshader,
+                      SVGA3dShaderType type,
+                      uint32 sizeInBytes);
+
+enum pipe_error
+SVGA3D_BindGBShader(struct svga_winsys_context *swc,
+                    struct svga_winsys_gb_shader *gbshader);
+
+enum pipe_error
+SVGA3D_SetGBShader(struct svga_winsys_context *swc,
+                   SVGA3dShaderType type,
+                   struct svga_winsys_gb_shader *gbshader);
+
+enum pipe_error
+SVGA3D_DestroyGBShader(struct svga_winsys_context *swc,
+                       struct svga_winsys_gb_shader *gbshader);
+
+enum pipe_error
+SVGA3D_BindGBSurface(struct svga_winsys_context *swc,
+                     struct svga_winsys_surface *surface);
+
+enum pipe_error
+SVGA3D_DefineGBContext(struct svga_winsys_context *swc);
+
+enum pipe_error
+SVGA3D_DestroyGBContext(struct svga_winsys_context *swc);
+
+enum pipe_error
+SVGA3D_BindGBContext(struct svga_winsys_context *swc);
+
+enum pipe_error
+SVGA3D_InvalidateGBContext(struct svga_winsys_context *swc);
+
+enum pipe_error
+SVGA3D_UpdateGBImage(struct svga_winsys_context *swc,
+                     struct svga_winsys_surface *surface,
+                     const SVGA3dBox *box,
+                     unsigned face, unsigned mipLevel);
+
+enum pipe_error
+SVGA3D_UpdateGBSurface(struct svga_winsys_context *swc,
+                       struct svga_winsys_surface *surface);
+
+
+enum pipe_error
+SVGA3D_ReadbackGBImage(struct svga_winsys_context *swc,
+                       struct svga_winsys_surface *surface,
+                       unsigned face, unsigned mipLevel);
+
+
+enum pipe_error
+SVGA3D_ReadbackGBSurface(struct svga_winsys_context *swc,
+                         struct svga_winsys_surface *surface);
+
+
+enum pipe_error
+SVGA3D_ReadbackGBImagePartial(struct svga_winsys_context *swc,
+                              struct svga_winsys_surface *surface,
+                              unsigned face, unsigned mipLevel,
+                              const SVGA3dBox *box,
+                              bool invertBox);
+
+
+enum pipe_error
+SVGA3D_InvalidateGBImagePartial(struct svga_winsys_context *swc,
+                                struct svga_winsys_surface *surface,
+                                unsigned face, unsigned mipLevel,
+                                const SVGA3dBox *box,
+                                bool invertBox);
+
+
+enum pipe_error
+SVGA3D_SetGBShaderConstsInline(struct svga_winsys_context *swc,
+                               unsigned regStart,
+                               unsigned numRegs,
+                               SVGA3dShaderType shaderType,
+                               SVGA3dShaderConstType constType,
+                               const void *values);
+
 /*
 * Queries
 */
--- a/src/gallium/drivers/svga/svga_context.c
+++ b/src/gallium/drivers/svga/svga_context.c
@@ -53,6 +53,7 @@ DEBUG_GET_ONCE_BOOL_OPTION(force_hw_line_stipple, "SVGA_FORCE_HW_LINE_STIPPLE",
 static void svga_destroy( struct pipe_context *pipe )
 {
   struct svga_context *svga = svga_context( pipe );
+   struct svga_winsys_screen *sws = svga_screen(pipe->screen)->sws;
   unsigned shader;

   util_blitter_destroy(svga->blitter);
@@ -68,11 +69,12 @@ static void svga_destroy( struct pipe_context *pipe )
   
   svga_destroy_swtnl( svga );

-   util_bitmask_destroy( svga->vs_bm );
-   util_bitmask_destroy( svga->fs_bm );
+   util_bitmask_destroy( svga->shader_id_bm );

-   for(shader = 0; shader < PIPE_SHADER_TYPES; ++shader)
-      pipe_resource_reference( &svga->curr.cb[shader], NULL );
+   for (shader = 0; shader < PIPE_SHADER_TYPES; ++shader) {
+      pipe_resource_reference( &svga->curr.cbufs[shader].buffer, NULL );
+      sws->surface_reference(sws, &svga->state.hw_draw.hw_cb[shader], NULL);
+   }

   FREE( svga );
 }
@@ -90,6 +92,8 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen,
   if (svga == NULL)
      goto no_svga;

+   LIST_INITHEAD(&svga->dirty_buffers);
+
   svga->pipe.screen = screen;
   svga->pipe.priv = priv;
   svga->pipe.destroy = svga_destroy;
@@ -124,13 +128,9 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen,
   svga->debug.no_line_width = debug_get_option_no_line_width();
   svga->debug.force_hw_line_stipple = debug_get_option_force_hw_line_stipple();

-   svga->fs_bm = util_bitmask_create();
-   if (svga->fs_bm == NULL)
-      goto no_fs_bm;
-
-   svga->vs_bm = util_bitmask_create();
-   if (svga->vs_bm == NULL)
-      goto no_vs_bm;
+   svga->shader_id_bm = util_bitmask_create();
+   if (svga->shader_id_bm == NULL)
+      goto no_shader_bm;

   svga->hwtnl = svga_hwtnl_create(svga);
   if (svga->hwtnl == NULL)
@@ -152,11 +152,10 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen,
   memset(&svga->state.hw_draw, 0xcd, sizeof(svga->state.hw_draw));
   memset(&svga->state.hw_draw.views, 0x0, sizeof(svga->state.hw_draw.views));
   svga->state.hw_draw.num_views = 0;
+   memset(&svga->state.hw_draw.hw_cb, 0x0, sizeof(svga->state.hw_draw.hw_cb));

   svga->dirty = ~0;

-   LIST_INITHEAD(&svga->dirty_buffers);
-
   return &svga->pipe;

 no_state:
@@ -164,10 +163,8 @@ no_state:
 no_swtnl:
   svga_hwtnl_destroy( svga->hwtnl );
 no_hwtnl:
-   util_bitmask_destroy( svga->vs_bm );
-no_vs_bm:
-   util_bitmask_destroy( svga->fs_bm );
-no_fs_bm:
+   util_bitmask_destroy( svga->shader_id_bm );
+no_shader_bm:
   svga->swc->destroy(svga->swc);
 no_swc:
   FREE(svga);
@@ -200,6 +197,10 @@ void svga_context_flush( struct svga_context *svga,
    */
   svga->rebind.rendertargets = TRUE;
   svga->rebind.texture_samplers = TRUE;
+   if (svga_have_gb_objects(svga)) {
+      svga->rebind.vs = TRUE;
+      svga->rebind.fs = TRUE;
+   }

   if (SVGA_DEBUG & DEBUG_SYNC) {
      if (fence)
--- a/src/gallium/drivers/svga/svga_context.h
+++ b/src/gallium/drivers/svga/svga_context.h
@@ -36,8 +36,10 @@

 #include "tgsi/tgsi_scan.h"

+#include "svga_screen.h"
 #include "svga_state.h"
 #include "svga_tgsi.h"
+#include "svga_winsys.h"
 #include "svga_hw_reg.h"
 #include "svga3d_shaderdefs.h"

@@ -217,7 +219,7 @@ struct svga_state

   struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS];
   struct pipe_index_buffer ib;
-   struct pipe_resource *cb[PIPE_SHADER_TYPES];
+   struct pipe_constant_buffer cbufs[PIPE_SHADER_TYPES];

   struct pipe_framebuffer_state framebuffer;
   float depthscale;
@@ -285,6 +287,11 @@ struct svga_hw_draw_state
   unsigned ts[SVGA3D_PIXEL_SAMPLERREG_MAX][SVGA3D_TS_MAX];
   float cb[PIPE_SHADER_TYPES][SVGA3D_CONSTREG_MAX][4];

+   /**
+    * For guest backed shader constants only.
+    */
+   struct svga_winsys_surface *hw_cb[PIPE_SHADER_TYPES];
+
   struct svga_shader_variant *fs;
   struct svga_shader_variant *vs;
   struct svga_hw_view_state views[PIPE_MAX_SAMPLERS];
@@ -347,8 +354,7 @@ struct svga_context
   } swtnl;

   /* Bitmask of used shader IDs */
-   struct util_bitmask *fs_bm;
-   struct util_bitmask *vs_bm;
+   struct util_bitmask *shader_id_bm;

   struct {
      unsigned dirty[SVGA_STATE_MAX];
@@ -368,6 +374,8 @@ struct svga_context
   struct {
      unsigned rendertargets:1;
      unsigned texture_samplers:1;
+      unsigned vs:1;
+      unsigned fs:1;
   } rebind;

   struct svga_hwtnl *hwtnl;
@@ -482,5 +490,17 @@ svga_context( struct pipe_context *pipe )
 }


+static INLINE boolean
+svga_have_gb_objects(const struct svga_context *svga)
+{
+   return svga_screen(svga->pipe.screen)->sws->have_gb_objects;
+}
+
+static INLINE boolean
+svga_have_gb_dma(const struct svga_context *svga)
+{
+   return svga_screen(svga->pipe.screen)->sws->have_gb_dma;
+}
+

 #endif
--- a/src/gallium/drivers/svga/svga_draw.c
+++ b/src/gallium/drivers/svga/svga_draw.c
@@ -213,6 +213,20 @@ svga_hwtnl_flush(struct svga_hwtnl *hwtnl)
         }
      }

+      if (svga->rebind.vs) {
+         ret = svga_reemit_vs_bindings(svga);
+         if (ret != PIPE_OK) {
+            return ret;
+         }
+      }
+
+      if (svga->rebind.fs) {
+         ret = svga_reemit_fs_bindings(svga);
+         if (ret != PIPE_OK) {
+            return ret;
+         }
+      }
+
      SVGA_DBG(DEBUG_DMA, "draw to sid %p, %d prims\n",
               svga->curr.framebuffer.cbufs[0] ?
               svga_surface(svga->curr.framebuffer.cbufs[0])->handle : NULL,
@@ -240,7 +254,7 @@ svga_hwtnl_flush(struct svga_hwtnl *hwtnl)
            vdecl[i].rangeHint.last = 0;
         }

-         swc->surface_relocation(swc, &vdecl[i].array.surfaceId,
+         swc->surface_relocation(swc, &vdecl[i].array.surfaceId, NULL,
                                 vb_handle[i], SVGA_RELOC_READ);
      }

@@ -248,7 +262,7 @@ svga_hwtnl_flush(struct svga_hwtnl *hwtnl)
             hwtnl->cmd.prim_count * sizeof hwtnl->cmd.prim[0]);

      for (i = 0; i < hwtnl->cmd.prim_count; i++) {
-         swc->surface_relocation(swc, &prim[i].indexArray.surfaceId,
+         swc->surface_relocation(swc, &prim[i].indexArray.surfaceId, NULL,
                                 ib_handle[i], SVGA_RELOC_READ);
         pipe_resource_reference(&hwtnl->cmd.prim_ib[i], NULL);
      }
--- a/src/gallium/drivers/svga/svga_format.c
+++ b/src/gallium/drivers/svga/svga_format.c
@@ -523,16 +523,6 @@ static const struct format_cap format_cap_table[] = {
      SVGA3D_DEVCAP_SURFACEFMT_AYUV,
      0
   },
-   {
-      SVGA3D_BC4_UNORM,
-      SVGA3D_DEVCAP_SURFACEFMT_BC4_UNORM,
-      0
-   },
-   {
-      SVGA3D_BC5_UNORM,
-      SVGA3D_DEVCAP_SURFACEFMT_BC5_UNORM,
-      0
-   },
   {
      SVGA3D_Z_DF16,
      SVGA3D_DEVCAP_SURFACEFMT_Z_DF16,
--- a/src/gallium/drivers/svga/svga_pipe_constants.c
+++ b/src/gallium/drivers/svga/svga_pipe_constants.c
@@ -61,8 +61,10 @@ static void svga_set_constant_buffer(struct pipe_context *pipe,
   assert(shader < PIPE_SHADER_TYPES);
   assert(index == 0);

-   pipe_resource_reference( &svga->curr.cb[shader],
-                          buf );
+   pipe_resource_reference(&svga->curr.cbufs[shader].buffer, buf);
+   svga->curr.cbufs[shader].buffer_size = cb ? cb->buffer_size : 0;
+   svga->curr.cbufs[shader].buffer_offset = cb ? cb->buffer_offset : 0;
+   svga->curr.cbufs[shader].user_buffer = NULL; /* not used */

   if (shader == PIPE_SHADER_FRAGMENT)
      svga->dirty |= SVGA_NEW_FS_CONST_BUFFER;
--- a/src/gallium/drivers/svga/svga_pipe_fs.c
+++ b/src/gallium/drivers/svga/svga_pipe_fs.c
@@ -35,6 +35,7 @@
 #include "svga_hw_reg.h"
 #include "svga_cmd.h"
 #include "svga_debug.h"
+#include "svga_shader.h"


 static void *
@@ -98,17 +99,8 @@ svga_delete_fs_state(struct pipe_context *pipe, void *shader)
   for (variant = fs->base.variants; variant; variant = tmp) {
      tmp = variant->next;

-      ret = SVGA3D_DestroyShader(svga->swc, variant->id, SVGA3D_SHADERTYPE_PS);
-      if (ret != PIPE_OK) {
-         svga_context_flush(svga, NULL);
-         ret = SVGA3D_DestroyShader(svga->swc, variant->id,
-                                    SVGA3D_SHADERTYPE_PS);
-         assert(ret == PIPE_OK);
-      }
-
-      util_bitmask_clear(svga->fs_bm, variant->id);
-
-      svga_destroy_shader_variant(variant);
+      ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_PS, variant);
+      (void) ret;  /* PIPE_ERROR_ not handled yet */

      /*
       * Remove stale references to this variant to ensure a new variant on the
--- a/src/gallium/drivers/svga/svga_pipe_misc.c
+++ b/src/gallium/drivers/svga/svga_pipe_misc.c
@@ -25,11 +25,13 @@

 #include "svga_cmd.h"

+#include "util/u_framebuffer.h"
 #include "util/u_inlines.h"

 #include "svga_context.h"
 #include "svga_screen.h"
 #include "svga_surface.h"
+#include "svga_resource_texture.h"


 static void svga_set_scissor_states( struct pipe_context *pipe,
@@ -86,19 +88,25 @@ static void svga_set_framebuffer_state(struct pipe_context *pipe,
   dst->nr_cbufs = fb->nr_cbufs;

   /* check if we need to propagate any of the target surfaces */
-   for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
-      if (dst->cbufs[i] && dst->cbufs[i] != fb->cbufs[i])
-         if (svga_surface_needs_propagation(dst->cbufs[i]))
+   for (i = 0; i < dst->nr_cbufs; i++) {
+      struct pipe_surface *s = i < fb->nr_cbufs ? fb->cbufs[i] : NULL;
+      if (dst->cbufs[i] && dst->cbufs[i] != s) {
+         if (svga_surface_needs_propagation(dst->cbufs[i])) {
            propagate = TRUE;
+            break;
+         }
+      }
   }

   if (propagate) {
      /* make sure that drawing calls comes before propagation calls */
      svga_hwtnl_flush_retry( svga );
   
-      for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++)
-         if (dst->cbufs[i] && dst->cbufs[i] != fb->cbufs[i])
+      for (i = 0; i < dst->nr_cbufs; i++) {
+         struct pipe_surface *s = i < fb->nr_cbufs ? fb->cbufs[i] : NULL;
+         if (dst->cbufs[i] && dst->cbufs[i] != s)
            svga_propagate_surface(svga, dst->cbufs[i]);
+      }
   }

   /* XXX: Actually the virtual hardware may support rendertargets with
@@ -111,12 +119,16 @@ static void svga_set_framebuffer_state(struct pipe_context *pipe,
      }
   }

-   for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
-      pipe_surface_reference(&dst->cbufs[i],
-                             (i < fb->nr_cbufs) ? fb->cbufs[i] : NULL);
-   }
-   pipe_surface_reference(&dst->zsbuf, fb->zsbuf);
+   util_copy_framebuffer_state(dst, fb);

+   /* Set the rendered-to flags */
+   for (i = 0; i < dst->nr_cbufs; i++) {
+      struct pipe_surface *s = dst->cbufs[i];
+      if (s) {
+         struct svga_texture *t = svga_texture(s->texture);
+         svga_set_texture_rendered_to(t, s->u.tex.first_layer, s->u.tex.level);
+      }
+   }

   if (svga->curr.framebuffer.zsbuf)
   {
@@ -140,6 +152,13 @@ static void svga_set_framebuffer_state(struct pipe_context *pipe,
         svga->curr.depthscale = 0.0f;
         break;
      }
+
+      /* Set rendered-to flag */
+      {
+         struct pipe_surface *s = dst->zsbuf;
+         struct svga_texture *t = svga_texture(s->texture);
+         svga_set_texture_rendered_to(t, s->u.tex.first_layer, s->u.tex.level);
+      }
   }
   else {
      svga->curr.depthscale = 0.0f;
--- a/src/gallium/drivers/svga/svga_pipe_vs.c
+++ b/src/gallium/drivers/svga/svga_pipe_vs.c
@@ -36,6 +36,7 @@
 #include "svga_hw_reg.h"
 #include "svga_cmd.h"
 #include "svga_debug.h"
+#include "svga_shader.h"


 /**
@@ -158,17 +159,8 @@ svga_delete_vs_state(struct pipe_context *pipe, void *shader)
   for (variant = vs->base.variants; variant; variant = tmp) {
      tmp = variant->next;

-      ret = SVGA3D_DestroyShader(svga->swc, variant->id, SVGA3D_SHADERTYPE_VS);
-      if (ret != PIPE_OK) {
-         svga_context_flush(svga, NULL);
-         ret = SVGA3D_DestroyShader(svga->swc, variant->id,
-                                    SVGA3D_SHADERTYPE_VS);
-         assert(ret == PIPE_OK);
-      }
-
-      util_bitmask_clear(svga->vs_bm, variant->id);
-
-      svga_destroy_shader_variant(variant);
+      ret = svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_VS, variant);
+      (void) ret;  /* PIPE_ERROR_ not handled yet */

      /*
       * Remove stale references to this variant to ensure a new variant on the
--- a/src/gallium/drivers/svga/svga_resource_buffer.c
+++ b/src/gallium/drivers/svga/svga_resource_buffer.c
@@ -103,9 +103,13 @@ svga_buffer_transfer_map(struct pipe_context *pipe,
            /*
             * Instead of flushing the context command buffer, simply discard
             * the current hwbuf, and start a new one.
+             * With GB objects, the map operation takes care of this
+             * if passed the PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE flag,
+             * and the old backing store is busy.
             */

-            svga_buffer_destroy_hw_storage(ss, sbuf);
+            if (!svga_have_gb_objects(svga))
+               svga_buffer_destroy_hw_storage(ss, sbuf);
         }

         sbuf->map.num_ranges = 0;
@@ -132,7 +136,7 @@ svga_buffer_transfer_map(struct pipe_context *pipe,
         if (sbuf->dma.pending) {
            svga_buffer_upload_flush(svga, sbuf);

-            if (sbuf->hwbuf) {
+            if (svga_buffer_has_hw_storage(sbuf)) {
               /*
                * We have a pending DMA upload from a hardware buffer, therefore
                * we need to ensure that the host finishes processing that DMA
@@ -168,7 +172,7 @@ svga_buffer_transfer_map(struct pipe_context *pipe,
      }
   }

-   if (!sbuf->swbuf && !sbuf->hwbuf) {
+   if (!sbuf->swbuf && !svga_buffer_has_hw_storage(sbuf)) {
      if (svga_buffer_create_hw_storage(ss, sbuf) != PIPE_OK) {
         /*
          * We can't create a hardware buffer big enough, so create a malloc
@@ -193,11 +197,19 @@ svga_buffer_transfer_map(struct pipe_context *pipe,
      /* User/malloc buffer */
      map = sbuf->swbuf;
   }
-   else if (sbuf->hwbuf) {
-      struct svga_screen *ss = svga_screen(pipe->screen);
-      struct svga_winsys_screen *sws = ss->sws;
+   else if (svga_buffer_has_hw_storage(sbuf)) {
+      boolean retry;

-      map = sws->buffer_map(sws, sbuf->hwbuf, transfer->usage);
+      map = svga_buffer_hw_storage_map(svga, sbuf, transfer->usage, &retry);
+      if (map == NULL && retry) {
+         /*
+          * At this point, svga_buffer_get_transfer() has already
+          * hit the DISCARD_WHOLE_RESOURCE path and flushed HWTNL
+          * for this buffer.
+          */
+         svga_context_flush(svga, NULL);
+         map = svga_buffer_hw_storage_map(svga, sbuf, transfer->usage, &retry);
+      }
   }
   else {
      map = NULL;
@@ -240,7 +252,7 @@ svga_buffer_transfer_unmap( struct pipe_context *pipe,
                            struct pipe_transfer *transfer )
 {
   struct svga_screen *ss = svga_screen(pipe->screen);
-   struct svga_winsys_screen *sws = ss->sws;
+   struct svga_context *svga = svga_context(pipe);
   struct svga_buffer *sbuf = svga_buffer(transfer->resource);
   
   pipe_mutex_lock(ss->swc_mutex);
@@ -250,8 +262,8 @@ svga_buffer_transfer_unmap( struct pipe_context *pipe,
      --sbuf->map.count;
   }

-   if (sbuf->hwbuf) {
-      sws->buffer_unmap(sws, sbuf->hwbuf);
+   if (svga_buffer_has_hw_storage(sbuf)) {
+      svga_buffer_hw_storage_unmap(svga, sbuf);
   }

   if (transfer->usage & PIPE_TRANSFER_WRITE) {
--- a/src/gallium/drivers/svga/svga_resource_buffer.h
+++ b/src/gallium/drivers/svga/svga_resource_buffer.h
@@ -34,6 +34,9 @@
 #include "util/u_double_list.h"

 #include "svga_screen_cache.h"
+#include "svga_screen.h"
+#include "svga_cmd.h"
+#include "svga_context.h"


 /**
@@ -55,6 +58,7 @@ struct svga_buffer_range
   unsigned end;
 };

+struct svga_3d_update_gb_image;

 /**
 * SVGA pipe buffer.
@@ -166,6 +170,12 @@ struct svga_buffer
       */
      SVGA3dCopyBox *boxes;

+      /**
+       * Pointer to the sequence of update commands
+       * *inside* the command buffer.
+       */
+      struct svga_3d_update_gb_image *updates;
+
      /**
       * Context that has the pending DMA to this buffer.
       */
@@ -207,8 +217,74 @@ svga_buffer_is_user_buffer( struct pipe_resource *buffer )
   }
 }

+/**
+ * Returns a pointer to a struct svga_winsys_screen given a
+ * struct svga_buffer.
+ */
+static INLINE struct svga_winsys_screen *
+svga_buffer_winsys_screen(struct svga_buffer *sbuf)
+{
+   return svga_screen(sbuf->b.b.screen)->sws;
+}


+/**
+ * Returns whether a buffer has hardware storage that is
+ * visible to the GPU.
+ */
+static INLINE boolean
+svga_buffer_has_hw_storage(struct svga_buffer *sbuf)
+{
+   if (svga_buffer_winsys_screen(sbuf)->have_gb_objects)
+      return (sbuf->handle ? TRUE : FALSE);
+   else
+      return (sbuf->hwbuf ? TRUE : FALSE);
+}
+
+/**
+ * Map the hardware storage of a buffer.
+ */
+static INLINE void *
+svga_buffer_hw_storage_map(struct svga_context *svga,
+                           struct svga_buffer *sbuf,
+                           unsigned flags, boolean *retry)
+{
+   struct svga_winsys_screen *sws = svga_buffer_winsys_screen(sbuf);
+   if (sws->have_gb_objects) {
+      return svga->swc->surface_map(svga->swc, sbuf->handle, flags, retry);
+   } else {
+      *retry = FALSE;
+      return sws->buffer_map(sws, sbuf->hwbuf, flags);
+   }
+}
+
+/**
+ * Unmap the hardware storage of a buffer.
+ */
+static INLINE void
+svga_buffer_hw_storage_unmap(struct svga_context *svga,
+                             struct svga_buffer *sbuf)
+{
+   struct svga_winsys_screen *sws = svga_buffer_winsys_screen(sbuf);
+
+   if (sws->have_gb_objects) {
+      struct svga_winsys_context *swc = svga->swc;
+      boolean rebind;
+      swc->surface_unmap(swc, sbuf->handle, &rebind);
+      if (rebind) {
+         enum pipe_error ret;
+         ret = SVGA3D_BindGBSurface(swc, sbuf->handle);
+         if (ret != PIPE_OK) {
+            /* flush and retry */
+            svga_context_flush(svga, NULL);
+            ret = SVGA3D_BindGBSurface(swc, sbuf->handle);
+            assert(ret == PIPE_OK);
+         }
+      }
+   } else
+      sws->buffer_unmap(sws, sbuf->hwbuf);
+}
+

 struct pipe_resource *
 svga_user_buffer_create(struct pipe_screen *screen,
--- a/src/gallium/drivers/svga/svga_resource_buffer_upload.c
+++ b/src/gallium/drivers/svga/svga_resource_buffer_upload.c
@@ -30,6 +30,7 @@
 #include "util/u_inlines.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
+#include "util/u_double_list.h"

 #include "svga_cmd.h"
 #include "svga_context.h"
@@ -39,6 +40,20 @@
 #include "svga_screen.h"
 #include "svga_winsys.h"

+/**
+ * Describes a complete SVGA_3D_CMD_UPDATE_GB_IMAGE command
+ *
+ */
+struct svga_3d_update_gb_image {
+   SVGA3dCmdHeader header;
+   SVGA3dCmdUpdateGBImage body;
+};
+
+struct svga_3d_invalidate_gb_image {
+   SVGA3dCmdHeader header;
+   SVGA3dCmdInvalidateGBImage body;
+};
+

 /**
 * Allocate a winsys_buffer (ie. DMA, aka GMR memory).
@@ -72,6 +87,11 @@ svga_winsys_buffer_create( struct svga_context *svga,
 }


+/**
+ * Destroy HW storage if separate from the host surface.
+ * In the GB case, the HW storage is associated with the host surface
+ * and is therefore a No-op.
+ */
 void
 svga_buffer_destroy_hw_storage(struct svga_screen *ss, struct svga_buffer *sbuf)
 {
@@ -88,7 +108,7 @@ svga_buffer_destroy_hw_storage(struct svga_screen *ss, struct svga_buffer *sbuf)


 /**
- * Allocate DMA'ble storage for the buffer.
+ * Allocate DMA'ble or Updatable storage for the buffer.
 *
 * Called before mapping a buffer.
 */
@@ -98,6 +118,10 @@ svga_buffer_create_hw_storage(struct svga_screen *ss,
 {
   assert(!sbuf->user);

+   if (ss->sws->have_gb_objects) {
+      assert(sbuf->handle || !sbuf->dma.pending);
+      return svga_buffer_create_host_surface(ss, sbuf);
+   }
   if (!sbuf->hwbuf) {
      struct svga_winsys_screen *sws = ss->sws;
      unsigned alignment = 16;
@@ -169,6 +193,104 @@ svga_buffer_destroy_host_surface(struct svga_screen *ss,
 }


+/**
+ * Insert a number of preliminary UPDATE_GB_IMAGE commands in the
+ * command buffer, equal to the current number of mapped ranges.
+ * The UPDATE_GB_IMAGE commands will be patched with the
+ * actual ranges just before flush.
+ */
+static enum pipe_error
+svga_buffer_upload_gb_command(struct svga_context *svga,
+			      struct svga_buffer *sbuf)
+{
+   struct svga_winsys_context *swc = svga->swc;
+   SVGA3dCmdUpdateGBImage *cmd;
+   struct svga_3d_update_gb_image *ccmd = NULL;
+   uint32 numBoxes = sbuf->map.num_ranges;
+   struct pipe_resource *dummy;
+   unsigned int i;
+
+   assert(numBoxes);
+   assert(sbuf->dma.updates == NULL);
+
+   if (sbuf->dma.flags.discard) {
+      struct svga_3d_invalidate_gb_image *cicmd = NULL;
+      SVGA3dCmdInvalidateGBImage *icmd;
+
+      /* Allocate FIFO space for one INVALIDATE_GB_IMAGE command followed by
+       * 'numBoxes' UPDATE_GB_IMAGE commands.  Allocate all at once rather
+       * than with separate commands because we need to properly deal with
+       * filling the command buffer.
+       */
+      icmd = SVGA3D_FIFOReserve(swc,
+				SVGA_3D_CMD_INVALIDATE_GB_IMAGE,
+				sizeof *icmd + numBoxes * sizeof *ccmd,
+				2);
+      if (!icmd)
+	 return PIPE_ERROR_OUT_OF_MEMORY;
+
+      cicmd = container_of(icmd, cicmd, body);
+      cicmd->header.size = sizeof *icmd;
+      swc->surface_relocation(swc, &icmd->image.sid, NULL, sbuf->handle,
+                              (SVGA_RELOC_WRITE |
+                               SVGA_RELOC_INTERNAL |
+                               SVGA_RELOC_DMA));
+      icmd->image.face = 0;
+      icmd->image.mipmap = 0;
+
+      /* initialize the first UPDATE_GB_IMAGE command */
+      ccmd = (struct svga_3d_update_gb_image *) &icmd[1];
+      ccmd->header.id = SVGA_3D_CMD_UPDATE_GB_IMAGE;
+      cmd = &ccmd->body;
+
+   } else {
+      /* Allocate FIFO space for 'numBoxes' UPDATE_GB_IMAGE commands */
+      cmd = SVGA3D_FIFOReserve(swc,
+			       SVGA_3D_CMD_UPDATE_GB_IMAGE,
+			       sizeof *cmd + (numBoxes - 1) * sizeof *ccmd,
+			       1);
+      if (!cmd)
+	 return PIPE_ERROR_OUT_OF_MEMORY;
+
+      ccmd = container_of(cmd, ccmd, body);
+   }
+
+   /* Init the first UPDATE_GB_IMAGE command */
+   ccmd->header.size = sizeof *cmd;
+   swc->surface_relocation(swc, &cmd->image.sid, NULL, sbuf->handle,
+			   SVGA_RELOC_WRITE | SVGA_RELOC_INTERNAL);
+   cmd->image.face = 0;
+   cmd->image.mipmap = 0;
+
+   /* Save pointer to the first UPDATE_GB_IMAGE command so that we can
+    * fill in the box info below.
+    */
+   sbuf->dma.updates = ccmd;
+
+   /*
+    * Copy the relocation info, face and mipmap to all
+    * subsequent commands. NOTE: For winsyses that actually
+    * patch the image.sid member at flush time, this will fail
+    * miserably. For those we need to add as many relocations
+    * as there are copy boxes.
+    */
+
+   for (i = 1; i < numBoxes; ++i) {
+      memcpy(++ccmd, sbuf->dma.updates, sizeof *ccmd);
+   }
+
+   /* Increment reference count */
+   sbuf->dma.svga = svga;
+   dummy = NULL;
+   pipe_resource_reference(&dummy, &sbuf->b.b);
+   SVGA_FIFOCommitAll(swc);
+
+   sbuf->dma.flags.discard = FALSE;
+
+   return PIPE_OK;
+}
+
+
 /**
 * Variant of SVGA3D_BufferDMA which leaves the copy box temporarily in blank.
 */
@@ -188,6 +310,9 @@ svga_buffer_upload_command(struct svga_context *svga,
   unsigned surface_flags;
   struct pipe_resource *dummy;

+   if (svga_have_gb_objects(svga))
+      return svga_buffer_upload_gb_command(svga, sbuf);
+
   if (transfer == SVGA3D_WRITE_HOST_VRAM) {
      region_flags = SVGA_RELOC_READ;
      surface_flags = SVGA_RELOC_WRITE;
@@ -213,7 +338,7 @@ svga_buffer_upload_command(struct svga_context *svga,
   swc->region_relocation(swc, &cmd->guest.ptr, guest, 0, region_flags);
   cmd->guest.pitch = 0;

-   swc->surface_relocation(swc, &cmd->host.sid, host, surface_flags);
+   swc->surface_relocation(swc, &cmd->host.sid, NULL, host, surface_flags);
   cmd->host.face = 0;
   cmd->host.mipmap = 0;

@@ -245,55 +370,86 @@ svga_buffer_upload_command(struct svga_context *svga,
 */
 void
 svga_buffer_upload_flush(struct svga_context *svga,
-                         struct svga_buffer *sbuf)
+			 struct svga_buffer *sbuf)
 {
-   SVGA3dCopyBox *boxes;
   unsigned i;
   struct pipe_resource *dummy;

   if (!sbuf->dma.pending) {
+      //debug_printf("no dma pending on buffer\n");
      return;
   }

   assert(sbuf->handle);
-   assert(sbuf->hwbuf);
   assert(sbuf->map.num_ranges);
   assert(sbuf->dma.svga == svga);
-   assert(sbuf->dma.boxes);

   /*
-    * Patch the DMA command with the final copy box.
+    * Patch the DMA/update command with the final copy box.
    */
+   if (svga_have_gb_objects(svga)) {
+      struct svga_3d_update_gb_image *update = sbuf->dma.updates;
+      assert(update);

-   SVGA_DBG(DEBUG_DMA, "dma to sid %p\n", sbuf->handle);
+      for (i = 0; i < sbuf->map.num_ranges; ++i, ++update) {
+         SVGA3dBox *box = &update->body.box;

-   boxes = sbuf->dma.boxes;
-   for (i = 0; i < sbuf->map.num_ranges; ++i) {
-      SVGA_DBG(DEBUG_DMA, "  bytes %u - %u\n",
+         SVGA_DBG(DEBUG_DMA, "  bytes %u - %u\n",
+                  sbuf->map.ranges[i].start, sbuf->map.ranges[i].end);
+
+         box->x = sbuf->map.ranges[i].start;
+         box->y = 0;
+         box->z = 0;
+         box->w = sbuf->map.ranges[i].end - sbuf->map.ranges[i].start;
+         box->h = 1;
+         box->d = 1;
+
+         assert(box->x <= sbuf->b.b.width0);
+         assert(box->x + box->w <= sbuf->b.b.width0);
+      }
+   }
+   else {
+      assert(sbuf->hwbuf);
+      assert(sbuf->dma.boxes);
+      SVGA_DBG(DEBUG_DMA, "dma to sid %p\n", sbuf->handle);
+
+      for (i = 0; i < sbuf->map.num_ranges; ++i) {
+         SVGA3dCopyBox *box = sbuf->dma.boxes + i;
+
+         SVGA_DBG(DEBUG_DMA, "  bytes %u - %u\n",
               sbuf->map.ranges[i].start, sbuf->map.ranges[i].end);

-      boxes[i].x = sbuf->map.ranges[i].start;
-      boxes[i].y = 0;
-      boxes[i].z = 0;
-      boxes[i].w = sbuf->map.ranges[i].end - sbuf->map.ranges[i].start;
-      boxes[i].h = 1;
-      boxes[i].d = 1;
-      boxes[i].srcx = sbuf->map.ranges[i].start;
-      boxes[i].srcy = 0;
-      boxes[i].srcz = 0;
+         box->x = sbuf->map.ranges[i].start;
+         box->y = 0;
+         box->z = 0;
+         box->w = sbuf->map.ranges[i].end - sbuf->map.ranges[i].start;
+         box->h = 1;
+         box->d = 1;
+         box->srcx = sbuf->map.ranges[i].start;
+         box->srcy = 0;
+         box->srcz = 0;
+
+         assert(box->x <= sbuf->b.b.width0);
+         assert(box->x + box->w <= sbuf->b.b.width0);
+      }
   }

+   /* Reset sbuf for next use/upload */
+
   sbuf->map.num_ranges = 0;

   assert(sbuf->head.prev && sbuf->head.next);
-   LIST_DEL(&sbuf->head);
-
+   LIST_DEL(&sbuf->head);  /* remove from svga->dirty_buffers list */
+#ifdef DEBUG
+   sbuf->head.next = sbuf->head.prev = NULL;
+#endif
   sbuf->dma.pending = FALSE;
   sbuf->dma.flags.discard = FALSE;
   sbuf->dma.flags.unsynchronized = FALSE;

   sbuf->dma.svga = NULL;
   sbuf->dma.boxes = NULL;
+   sbuf->dma.updates = NULL;

   /* Decrement reference count (and potentially destroy) */
   dummy = &sbuf->b.b;
@@ -409,24 +565,28 @@ svga_buffer_add_range(struct svga_buffer *sbuf,
 * Copy the contents of the malloc buffer to a hardware buffer.
 */
 static enum pipe_error
-svga_buffer_update_hw(struct svga_screen *ss, struct svga_buffer *sbuf)
+svga_buffer_update_hw(struct svga_context *svga, struct svga_buffer *sbuf)
 {
   assert(!sbuf->user);
-   if (!sbuf->hwbuf) {
+   if (!svga_buffer_has_hw_storage(sbuf)) {
+      struct svga_screen *ss = svga_screen(sbuf->b.b.screen);
      enum pipe_error ret;
+      boolean retry;
      void *map;

      assert(sbuf->swbuf);
      if (!sbuf->swbuf)
         return PIPE_ERROR;

-      ret = svga_buffer_create_hw_storage(ss, sbuf);
+      ret = svga_buffer_create_hw_storage(svga_screen(sbuf->b.b.screen),
+					  sbuf);
      if (ret != PIPE_OK)
         return ret;

      pipe_mutex_lock(ss->swc_mutex);
-      map = ss->sws->buffer_map(ss->sws, sbuf->hwbuf, PIPE_TRANSFER_WRITE);
+      map = svga_buffer_hw_storage_map(svga, sbuf, PIPE_TRANSFER_WRITE, &retry);
      assert(map);
+      assert(!retry);
      if (!map) {
 	 pipe_mutex_unlock(ss->swc_mutex);
         svga_buffer_destroy_hw_storage(ss, sbuf);
@@ -434,7 +594,7 @@ svga_buffer_update_hw(struct svga_screen *ss, struct svga_buffer *sbuf)
      }

      memcpy(map, sbuf->swbuf, sbuf->b.b.width0);
-      ss->sws->buffer_unmap(ss->sws, sbuf->hwbuf);
+      svga_buffer_hw_storage_unmap(svga, sbuf);

      /* This user/malloc buffer is now indistinguishable from a gpu buffer */
      assert(!sbuf->map.count);
@@ -457,6 +617,9 @@ svga_buffer_update_hw(struct svga_screen *ss, struct svga_buffer *sbuf)
 * Upload the buffer to the host in a piecewise fashion.
 *
 * Used when the buffer is too big to fit in the GMR aperture.
+ * This function should never get called in the guest-backed case
+ * since we always have a full-sized hardware storage backing the
+ * host surface.
 */
 static enum pipe_error
 svga_buffer_upload_piecewise(struct svga_screen *ss,
@@ -470,6 +633,7 @@ svga_buffer_upload_piecewise(struct svga_screen *ss,

   assert(sbuf->map.num_ranges);
   assert(!sbuf->dma.pending);
+   assert(!svga_have_gb_objects(svga));

   SVGA_DBG(DEBUG_DMA, "dma to sid %p\n", sbuf->handle);

@@ -533,10 +697,12 @@ svga_buffer_upload_piecewise(struct svga_screen *ss,
 }


-
-
-/* Get (or create/upload) the winsys surface handle so that we can
+/**
+ * Get (or create/upload) the winsys surface handle so that we can
 * refer to this buffer in fifo commands.
+ * This function will create the host surface, and in the GB case also the
+ * hardware storage. In the non-GB case, the hardware storage will be created
+ * if there are mapped ranges and the data is currently in a malloc'ed buffer.
 */
 struct svga_winsys_surface *
 svga_buffer_handle(struct svga_context *svga,
@@ -552,11 +718,15 @@ svga_buffer_handle(struct svga_context *svga,

   sbuf = svga_buffer(buf);

-   assert(!sbuf->map.count);
   assert(!sbuf->user);

   if (!sbuf->handle) {
-      ret = svga_buffer_create_host_surface(ss, sbuf);
+      /* This call will set sbuf->handle */
+      if (svga_have_gb_objects(svga)) {
+	 ret = svga_buffer_update_hw(svga, sbuf);
+      } else {
+	 ret = svga_buffer_create_host_surface(ss, sbuf);
+      }
      if (ret != PIPE_OK)
 	 return NULL;
   }
@@ -572,7 +742,7 @@ svga_buffer_handle(struct svga_context *svga,
         /*
          * Migrate the data from swbuf -> hwbuf if necessary.
          */
-         ret = svga_buffer_update_hw(ss, sbuf);
+         ret = svga_buffer_update_hw(svga, sbuf);
         if (ret == PIPE_OK) {
            /*
             * Queue a dma command.
--- a/src/gallium/drivers/svga/svga_resource_texture.c
+++ b/src/gallium/drivers/svga/svga_resource_texture.c
@@ -23,17 +23,19 @@
 *
 **********************************************************/

-#include "svga_cmd.h"
+#include "svga3d_reg.h"
+#include "svga3d_surfacedefs.h"

 #include "pipe/p_state.h"
 #include "pipe/p_defines.h"
-#include "util/u_inlines.h"
 #include "os/os_thread.h"
 #include "util/u_format.h"
+#include "util/u_inlines.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 #include "util/u_resource.h"

+#include "svga_cmd.h"
 #include "svga_format.h"
 #include "svga_screen.h"
 #include "svga_context.h"
@@ -61,6 +63,8 @@ svga_transfer_dma_band(struct svga_context *svga,
   SVGA3dCopyBox box;
   enum pipe_error ret;
 
+   assert(!st->use_direct_map);
+
   box.x = st->base.box.x;
   box.y = y;
   box.z = st->base.box.z;
@@ -111,6 +115,8 @@ svga_transfer_dma(struct svga_context *svga,
   struct svga_winsys_screen *sws = screen->sws;
   struct pipe_fence_handle *fence = NULL;

+   assert(!st->use_direct_map);
+
   if (transfer == SVGA3D_READ_HOST_VRAM) {
      SVGA_DBG(DEBUG_PERF, "%s: readback transfer\n", __FUNCTION__);
   }
@@ -232,10 +238,43 @@ svga_texture_destroy(struct pipe_screen *screen,

   ss->total_resource_bytes -= tex->size;

+   FREE(tex->rendered_to);
   FREE(tex);
 }


+/**
+ * Determine if we need to read back a texture image before mapping it.
+ */
+static boolean
+need_tex_readback(struct pipe_transfer *transfer)
+{
+   struct svga_texture *t = svga_texture(transfer->resource);
+
+   if (transfer->usage & PIPE_TRANSFER_READ)
+      return TRUE;
+
+   if ((transfer->usage & PIPE_TRANSFER_WRITE) &&
+       ((transfer->usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) == 0)) {
+      unsigned face;
+
+      if (transfer->resource->target == PIPE_TEXTURE_CUBE) {
+         assert(transfer->box.depth == 1);
+         face = transfer->box.z;
+      }
+      else {
+         face = 0;
+      }
+      if (svga_was_texture_rendered_to(t, face, transfer->level)) {
+         return TRUE;
+      }
+   }
+
+   return FALSE;
+}
+
+
+
 /* XXX: Still implementing this as if it was a screen function, but
 * can now modify it to queue transfers on the context.
 */
@@ -251,84 +290,254 @@ svga_texture_transfer_map(struct pipe_context *pipe,
   struct svga_screen *ss = svga_screen(pipe->screen);
   struct svga_winsys_screen *sws = ss->sws;
   struct svga_transfer *st;
-   unsigned nblocksx = util_format_get_nblocksx(texture->format, box->width);
-   unsigned nblocksy = util_format_get_nblocksy(texture->format, box->height);
+   unsigned nblocksx, nblocksy;
+   boolean use_direct_map = svga_have_gb_objects(svga) &&
+      !svga_have_gb_dma(svga);
+   unsigned d;

-   /* We can't map texture storage directly */
-   if (usage & PIPE_TRANSFER_MAP_DIRECTLY)
-      return NULL;
+   /* We can't map texture storage directly unless we have GB objects */
+   if (usage & PIPE_TRANSFER_MAP_DIRECTLY) {
+      if (svga_have_gb_objects(svga))
+         use_direct_map = TRUE;
+      else
+         return NULL;
+   }

   st = CALLOC_STRUCT(svga_transfer);
   if (!st)
      return NULL;

-   st->base.resource = texture;
+   {
+      unsigned w, h;
+      if (use_direct_map) {
+         /* we'll directly access the guest-backed surface */
+         w = u_minify(texture->width0, level);
+         h = u_minify(texture->height0, level);
+         d = u_minify(texture->depth0, level);
+      }
+      else {
+         /* we'll put the data into a tightly packed buffer */
+         w = box->width;
+         h = box->height;
+         d = box->depth;
+      }
+      nblocksx = util_format_get_nblocksx(texture->format, w);
+      nblocksy = util_format_get_nblocksy(texture->format, h);
+   }
+
+   pipe_resource_reference(&st->base.resource, texture);
   st->base.level = level;
   st->base.usage = usage;
   st->base.box = *box;
   st->base.stride = nblocksx*util_format_get_blocksize(texture->format);
   st->base.layer_stride = st->base.stride * nblocksy;

-   st->hw_nblocksy = nblocksy;
+   if (!use_direct_map) {
+      /* Use a DMA buffer */
+      st->hw_nblocksy = nblocksy;

-   st->hwbuf = svga_winsys_buffer_create(svga,
-                                         1, 
-                                         0,
-                                         st->hw_nblocksy * st->base.stride * box->depth);
-   while(!st->hwbuf && (st->hw_nblocksy /= 2)) {
      st->hwbuf = svga_winsys_buffer_create(svga,
                                            1, 
                                            0,
-                                            st->hw_nblocksy * st->base.stride * box->depth);
-   }
-
-   if(!st->hwbuf)
-      goto no_hwbuf;
-
-   if(st->hw_nblocksy < nblocksy) {
-      /* We couldn't allocate a hardware buffer big enough for the transfer, 
-       * so allocate regular malloc memory instead */
-      if (0) {
-         debug_printf("%s: failed to allocate %u KB of DMA, "
-                      "splitting into %u x %u KB DMA transfers\n",
-                      __FUNCTION__,
-                      (nblocksy*st->base.stride + 1023)/1024,
-                      (nblocksy + st->hw_nblocksy - 1)/st->hw_nblocksy,
-                      (st->hw_nblocksy*st->base.stride + 1023)/1024);
+                                            st->hw_nblocksy * st->base.stride * d);
+      while(!st->hwbuf && (st->hw_nblocksy /= 2)) {
+         st->hwbuf = svga_winsys_buffer_create(svga,
+                                               1, 
+                                               0,
+                                               st->hw_nblocksy * st->base.stride * d);
      }

-      st->swbuf = MALLOC(nblocksy*st->base.stride);
-      if(!st->swbuf)
-         goto no_swbuf;
-   }
+      if (!st->hwbuf) {
+         FREE(st);
+         return NULL;
+      }

-   if (usage & PIPE_TRANSFER_READ) {
-      SVGA3dSurfaceDMAFlags flags;
-      memset(&flags, 0, sizeof flags);
-      svga_transfer_dma(svga, st, SVGA3D_READ_HOST_VRAM, flags);
-   }
+      if(st->hw_nblocksy < nblocksy) {
+         /* We couldn't allocate a hardware buffer big enough for the transfer, 
+          * so allocate regular malloc memory instead */
+         if (0) {
+            debug_printf("%s: failed to allocate %u KB of DMA, "
+                         "splitting into %u x %u KB DMA transfers\n",
+                         __FUNCTION__,
+                         (nblocksy*st->base.stride + 1023)/1024,
+                         (nblocksy + st->hw_nblocksy - 1)/st->hw_nblocksy,
+                         (st->hw_nblocksy*st->base.stride + 1023)/1024);
+         }

-   if (st->swbuf) {
-      *ptransfer = &st->base;
-      return st->swbuf;
+         st->swbuf = MALLOC(nblocksy * st->base.stride * d);
+         if (!st->swbuf) {
+            sws->buffer_destroy(sws, st->hwbuf);
+            FREE(st);
+            return NULL;
+         }
+      }
+
+      if (usage & PIPE_TRANSFER_READ) {
+         SVGA3dSurfaceDMAFlags flags;
+         memset(&flags, 0, sizeof flags);
+         svga_transfer_dma(svga, st, SVGA3D_READ_HOST_VRAM, flags);
+      }
   } else {
-      /* The wait for read transfers already happened when svga_transfer_dma
-       * was called. */
-      void *map = sws->buffer_map(sws, st->hwbuf, usage);
-      if (!map)
-         goto fail;
+      struct pipe_transfer *transfer = &st->base;
+      struct svga_texture *tex = svga_texture(transfer->resource);
+      struct svga_winsys_surface *surf = tex->handle;
+      unsigned face;

-      *ptransfer = &st->base;
-      return map;
+      assert(surf);
+
+      if (tex->b.b.target == PIPE_TEXTURE_CUBE) {
+	 face = transfer->box.z;
+      } else {
+	 face = 0;
+      }
+
+      if (need_tex_readback(transfer)) {
+	 SVGA3dBox box;
+	 enum pipe_error ret;
+
+	 box.x = transfer->box.x;
+	 box.y = transfer->box.y;
+	 box.w = transfer->box.width;
+	 box.h = transfer->box.height;
+	 box.d = transfer->box.depth;
+	 if (tex->b.b.target == PIPE_TEXTURE_CUBE) {
+	    box.z = 0;
+	 }
+	 else {
+	    box.z = transfer->box.z;
+	 }
+
+         (void) box;  /* not used at this time */
+
+         svga_surfaces_flush(svga);
+
+	 ret = SVGA3D_ReadbackGBImage(svga->swc, surf, face, transfer->level);
+
+	 if (ret != PIPE_OK) {
+	    svga_context_flush(svga, NULL);
+	    ret = SVGA3D_ReadbackGBImage(svga->swc, surf, face, transfer->level);
+	    assert(ret == PIPE_OK);
+	 }
+
+	 svga_context_flush(svga, NULL);
+
+         /*
+          * Note: if PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE were specified
+          * we could potentially clear the flag for all faces/layers/mips.
+          */
+         svga_clear_texture_rendered_to(tex, face, transfer->level);
+      }
+      else {
+	 assert(transfer->usage & PIPE_TRANSFER_WRITE);
+	 if ((transfer->usage & PIPE_TRANSFER_UNSYNCHRONIZED) == 0) {
+            svga_surfaces_flush(svga);
+            if (!sws->surface_is_flushed(sws, surf))
+               svga_context_flush(svga, NULL);
+	 }
+      }
   }

-fail:
-   FREE(st->swbuf);
-no_swbuf:
-   sws->buffer_destroy(sws, st->hwbuf);
-no_hwbuf:
-   FREE(st);
-   return NULL;
+   st->use_direct_map = use_direct_map;
+
+   *ptransfer = &st->base;
+
+   /*
+    * Begin mapping code
+    */
+   if (st->swbuf) {
+      return st->swbuf;
+   }
+   else if (!st->use_direct_map) {
+      return sws->buffer_map(sws, st->hwbuf, usage);
+   }
+   else {
+      struct svga_screen *screen = svga_screen(svga->pipe.screen);
+      SVGA3dSurfaceFormat format;
+      SVGA3dSize baseLevelSize;
+      struct svga_texture *tex = svga_texture(texture);
+      struct svga_winsys_surface *surf = tex->handle;
+      uint8_t *map;
+      boolean retry;
+      unsigned face, offset, mip_width, mip_height;
+      unsigned xoffset = box->x;
+      unsigned yoffset = box->y;
+      unsigned zoffset = box->z;
+
+      map = svga->swc->surface_map(svga->swc, surf, usage, &retry);
+      if (map == NULL && retry) {
+         /*
+          * At this point, the svga_surfaces_flush() should already have
+          * called in svga_texture_get_transfer().
+          */
+         svga_context_flush(svga, NULL);
+         map = svga->swc->surface_map(svga->swc, surf, usage, &retry);
+      }
+
+      /*
+       * Make sure whe return NULL if the map fails
+       */
+      if (map == NULL) {
+         FREE(st);
+         return map;
+      }
+
+      /**
+       * Compute the offset to the specific texture slice in the buffer.
+       */
+      if (tex->b.b.target == PIPE_TEXTURE_CUBE) {
+         face = zoffset;
+         zoffset = 0;
+      } else {
+         face = 0;
+      }
+
+      format = svga_translate_format(screen, tex->b.b.format, 0);
+      baseLevelSize.width = tex->b.b.width0;
+      baseLevelSize.height = tex->b.b.height0;
+      baseLevelSize.depth = tex->b.b.depth0;
+
+      offset = svga3dsurface_get_image_offset(format, baseLevelSize,
+                                              tex->b.b.last_level + 1, /* numMips */
+                                              face, level);
+      if (level > 0) {
+         assert(offset > 0);
+      }
+
+      mip_width = u_minify(tex->b.b.width0, level);
+      mip_height = u_minify(tex->b.b.height0, level);
+
+      offset += svga3dsurface_get_pixel_offset(format, mip_width, mip_height,
+                                               xoffset, yoffset, zoffset);
+
+      return (void *) (map + offset);
+   }
+}
+
+
+/**
+ * Unmap a GB texture surface.
+ */
+static void
+svga_texture_surface_unmap(struct svga_context *svga,
+                           struct pipe_transfer *transfer)
+{
+   struct svga_winsys_surface *surf = svga_texture(transfer->resource)->handle;
+   struct svga_winsys_context *swc = svga->swc;
+   boolean rebind;
+
+   assert(surf);
+
+   swc->surface_unmap(swc, surf, &rebind);
+   if (rebind) {
+      enum pipe_error ret;
+      ret = SVGA3D_BindGBSurface(swc, surf);
+      if (ret != PIPE_OK) {
+         /* flush and retry */
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_BindGBSurface(swc, surf);
+         assert(ret == PIPE_OK);
+      }
+   }
 }


@@ -345,10 +554,17 @@ svga_texture_transfer_unmap(struct pipe_context *pipe,
   struct svga_transfer *st = svga_transfer(transfer);
   struct svga_texture *tex = svga_texture(transfer->resource);

-   if(!st->swbuf)
-      sws->buffer_unmap(sws, st->hwbuf);
+   if (!st->swbuf) {
+      if (st->use_direct_map) {
+         svga_texture_surface_unmap(svga, transfer);
+      }
+      else {
+         sws->buffer_unmap(sws, st->hwbuf);
+      }
+   }

-   if (st->base.usage & PIPE_TRANSFER_WRITE) {
+   if (!st->use_direct_map && (st->base.usage & PIPE_TRANSFER_WRITE)) {
+      /* Use DMA to transfer texture data */
      SVGA3dSurfaceDMAFlags flags;

      memset(&flags, 0, sizeof flags);
@@ -360,16 +576,61 @@ svga_texture_transfer_unmap(struct pipe_context *pipe,
      }

      svga_transfer_dma(svga, st, SVGA3D_WRITE_HOST_VRAM, flags);
-      ss->texture_timestamp++;
-      svga_age_texture_view(tex, transfer->level);
-      if (transfer->resource->target == PIPE_TEXTURE_CUBE)
-         svga_define_texture_level(tex, transfer->box.z, transfer->level);
-      else
-         svga_define_texture_level(tex, 0, transfer->level);
+   } else if (transfer->usage & PIPE_TRANSFER_WRITE) {
+      struct svga_winsys_surface *surf =
+	 svga_texture(transfer->resource)->handle;
+      unsigned face;
+      SVGA3dBox box;
+      enum pipe_error ret;
+
+      assert(svga_have_gb_objects(svga));
+
+      /* update the effected region */
+      if (tex->b.b.target == PIPE_TEXTURE_CUBE) {
+	 face = transfer->box.z;
+      } else {
+	 face = 0;
+      }
+
+      box.x = transfer->box.x;
+      box.y = transfer->box.y;
+      if (tex->b.b.target == PIPE_TEXTURE_CUBE) {
+         box.z = 0;
+      }
+      else {
+         box.z = transfer->box.z;
+      }
+      box.w = transfer->box.width;
+      box.h = transfer->box.height;
+      box.d = transfer->box.depth;
+
+      if (0)
+         debug_printf("%s %d, %d, %d  %d x %d x %d\n",
+                      __FUNCTION__,
+                      box.x, box.y, box.z,
+                      box.w, box.h, box.d);
+
+      ret = SVGA3D_UpdateGBImage(svga->swc, surf, &box, face, transfer->level);
+      if (ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_UpdateGBImage(svga->swc, surf, &box, face, transfer->level);
+         assert(ret == PIPE_OK);
+      }
   }

+   ss->texture_timestamp++;
+   svga_age_texture_view(tex, transfer->level);
+   if (transfer->resource->target == PIPE_TEXTURE_CUBE)
+      svga_define_texture_level(tex, transfer->box.z, transfer->level);
+   else
+      svga_define_texture_level(tex, 0, transfer->level);
+
+   pipe_resource_reference(&st->base.resource, NULL);
+
   FREE(st->swbuf);
-   sws->buffer_destroy(sws, st->hwbuf);
+   if (!st->use_direct_map) {
+      sws->buffer_destroy(sws, st->hwbuf);
+   }
   FREE(st);
 }

@@ -418,7 +679,7 @@ svga_texture_create(struct pipe_screen *screen,
   }

   if (template->target == PIPE_TEXTURE_3D) {
-      tex->key.flags |= SVGA3D_SURFACE_HINT_VOLUME;
+      tex->key.flags |= SVGA3D_SURFACE_VOLUME;
   }

   tex->key.cachable = 1;
@@ -475,9 +736,15 @@ svga_texture_create(struct pipe_screen *screen,
   tex->size = util_resource_size(template);
   svgascreen->total_resource_bytes += tex->size;

+   tex->rendered_to = CALLOC(template->depth0 * template->array_size,
+                             sizeof(tex->rendered_to[0]));
+   if (!tex->rendered_to)
+      goto error2;
+
   return &tex->b.b;

 error2:
+   FREE(tex->rendered_to);
   FREE(tex);
 error1:
   return NULL;
@@ -536,5 +803,7 @@ svga_texture_from_handle(struct pipe_screen *screen,
   tex->key.cachable = 0;
   tex->handle = srf;

+   tex->rendered_to = CALLOC(1, sizeof(tex->rendered_to[0]));
+
   return &tex->b.b;
 }
--- a/src/gallium/drivers/svga/svga_resource_texture.h
+++ b/src/gallium/drivers/svga/svga_resource_texture.h
@@ -78,6 +78,9 @@ struct svga_texture
   struct svga_winsys_surface *handle;

   unsigned size;  /**< Approximate size in bytes */
+
+   /** array indexed by cube face or 3D/array slice, one bit per mipmap level */
+   ushort *rendered_to;
 };


@@ -98,6 +101,8 @@ struct svga_transfer
   /* Temporary malloc buffer when we can't allocate a hardware buffer
    * big enough */
   void *swbuf;
+
+   boolean use_direct_map;
 };


@@ -143,6 +148,62 @@ svga_define_texture_level(struct svga_texture *tex,
 }


+static INLINE bool
+svga_is_texture_level_defined(const struct svga_texture *tex,
+                              unsigned face, unsigned level)
+{
+   assert(face < Elements(tex->defined));
+   assert(level < Elements(tex->defined[0]));
+   return tex->defined[face][level];
+}
+
+
+/** For debugging, check that face and level are legal */
+static inline void
+check_face_level(const struct svga_texture *tex,
+                 unsigned face, unsigned level)
+{
+   if (tex->b.b.target == PIPE_TEXTURE_CUBE) {
+      assert(face < 6);
+   }
+   else if (tex->b.b.target == PIPE_TEXTURE_3D) {
+      assert(face < tex->b.b.depth0);
+   }
+   else {
+      assert(face < tex->b.b.array_size);
+   }
+
+   assert(level < 8 * sizeof(tex->rendered_to[0]));
+}
+
+
+static INLINE void
+svga_set_texture_rendered_to(struct svga_texture *tex,
+                             unsigned face, unsigned level)
+{
+   check_face_level(tex, face, level);
+   tex->rendered_to[face] |= 1 << level;
+}
+
+
+static INLINE void
+svga_clear_texture_rendered_to(struct svga_texture *tex,
+                               unsigned face, unsigned level)
+{
+   check_face_level(tex, face, level);
+   tex->rendered_to[face] &= ~(1 << level);
+}
+
+
+static INLINE boolean
+svga_was_texture_rendered_to(const struct svga_texture *tex,
+                             unsigned face, unsigned level)
+{
+   check_face_level(tex, face, level);
+   return !!(tex->rendered_to[face] & (1 << level));
+}
+
+
 struct pipe_resource *
 svga_texture_create(struct pipe_screen *screen,
                    const struct pipe_resource *template);
--- a/src/gallium/drivers/svga/svga_sampler_view.c
+++ b/src/gallium/drivers/svga/svga_sampler_view.c
@@ -150,7 +150,6 @@ svga_get_tex_sampler_view(struct pipe_context *pipe,
                                          &sv->key);

   if (!sv->handle) {
-      assert(0);
      sv->key.cachable = 0;
      sv->handle = tex->handle;
      debug_reference(&sv->reference,
--- a/Show More
+++ b/Show More