docs: 9.1.4 release notes

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
mesa: Bump version to 9.1.4
2013-07-01 14:05:00 -07:00 · 2013-07-01 13:58:56 -07:00 · 2013-07-01 08:49:08 +02:00 · 2013-06-30 21:41:57 -07:00 · 2013-06-29 15:21:29 -07:00 · 2013-06-27 13:17:20 -07:00
154 changed files with 3097 additions and 887 deletions
--- a/Makefile.am
+++ b/Makefile.am
@@ -36,7 +36,7 @@ check-local:

 # Rules for making release tarballs

-PACKAGE_VERSION=9.1.2
+PACKAGE_VERSION=9.1.4
 PACKAGE_DIR = Mesa-$(PACKAGE_VERSION)
 PACKAGE_NAME = MesaLib-$(PACKAGE_VERSION)

--- a/bin/.cherry-ignore
+++ b/bin/.cherry-ignore
@@ -8,3 +8,25 @@ d60da27273d2cdb68bc32cae2ca66718dab15f27 st/mesa: set ctx->Const.MaxSamples = 0,

 # This patch is superceded by 7d4f1e6
 dbf94d105a48b7aafb2c8cf64d8b4392d87efea1 glsl: Replace constant-index vector array accesses with swizzles
+
+# This patch is superceded by 34a4fc5
+0967c362bf378b7415c30ca6d9523d3b2a3a7f5d i965: Fix an inconsistency inb the VUE map with gl_ClipVertex on gen4/5.
+
+# This patch was backported as c3eb301
+a8246927e35a49097f70cffb7fa8dd05ec1365e1 r600g: Fix UMAD on Cayman
+
+# These patches cannot be backported without other, too invasive changes
+eb19163a4dd3d7bfeed63229820c926f99ed00d9 radeonsi: Initial support for multiple constant buffers
+e3befbca5ed9f22effcdc91c5886c86b644bc190 radeonsi: Handle TGSI_SEMANTIC_CLIPVERTEX
+
+# These patches are performance improvements that are difficult to backport and cause regressions
+740350c982bd2735b9eb9063c2b91856b6f1ad31 i965: Make the fragment shader pull constants index by dwords, not vec4s.
+dca5fc14358a8b267b3854c39c976a822885898f i965/fs: Improve performance of varying-index uniform loads on IVB.
+70b27e0e4b5d15e575ea477d63c0f6cb19d645c2 i965/fs: Use LD messages for pre-gen7 varying-index uniform loads
+62501c3af85089b423218a41a2e2433ac849c2d3 i965/fs: Allow CSE on pre-gen7 varying-index uniform loads
+
+# Reverted in master
+98dfd59a0445666060c97b0dccaf0e9f030b547a i965: fix problem with constant out of bounds access (v2)
+
+# Already cherry-picked, but squashed with the commit that broke what this fixed
+4405ff4055685841c9d9545da52c7edc8708b14b i965: Fix haswell_upload_cut_index when there's no index buffer.
--- a/bin/bugzilla_mesa.sh
+++ b/bin/bugzilla_mesa.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+# This script is used to generate the list of fixed bugs that
+# appears in the release notes files, with HTML formatting.
+#
+# Note: This script could take a while until all details have
+#       been fetched from bugzilla.
+#
+# Usage examples:
+#
+# $ bin/bugzilla_mesa.sh mesa-9.0.2..mesa-9.0.3
+# $ bin/bugzilla_mesa.sh mesa-9.0.2..mesa-9.0.3 > bugfixes
+# $ bin/bugzilla_mesa.sh mesa-9.0.2..mesa-9.0.3 | tee bugfixes
+# $ DRYRUN=yes bin/bugzilla_mesa.sh mesa-9.0.2..mesa-9.0.3
+# $ DRYRUN=yes bin/bugzilla_mesa.sh mesa-9.0.2..mesa-9.0.3 | wc -l
+
+
+# regex pattern: trim before url
+trim_before='s/.*\(http\)/\1/'
+
+# regex pattern: trim after url
+trim_after='s/\(show_bug.cgi?id=[0-9]*\).*/\1/'
+
+# regex pattern: always use https
+use_https='s/http:/https:/'
+
+# extract fdo urls from commit log
+urls=$(git log $* | grep 'bugs.freedesktop.org/show_bug' | sed -e $trim_before -e $trim_after -e $use_https | sort | uniq)
+
+# if DRYRUN is set to "yes", simply print the URLs and don't fetch the
+# details from fdo bugzilla.
+#DRYRUN=yes
+
+if [ "x$DRYRUN" = xyes ]; then
+	for i in $urls
+	do
+		echo $i
+	done
+else
+	echo "<ul>"
+	echo ""
+
+	for i in $urls
+	do
+		id=$(echo $i | cut -d'=' -f2)
+		summary=$(wget --quiet -O - $i | grep -e '<title>.*</title>' | sed -e 's/ *<title>Bug [0-9]\+ &ndash; \(.*\)<\/title>/\1/')
+		echo "<li><a href=\"$i\">Bug $id</a> - $summary</li>"
+		echo ""
+	done
+
+	echo "</ul>"
+fi
--- a/bin/get-pick-list.sh
+++ b/bin/get-pick-list.sh
@@ -1,6 +1,12 @@
 #!/bin/sh

 # Script for generating a list of candidates for cherry-picking to a stable branch
+#
+# Usage examples:
+#
+# $ bin/get-pick-list.sh
+# $ bin/get-pick-list.sh > picklist
+# $ bin/get-pick-list.sh | tee picklist

 # Grep for commits with "cherry picked from commit" in the commit message.
 git log --reverse --grep="cherry picked from commit" origin/master..HEAD |\
--- a/bin/shortlog_mesa.sh
+++ b/bin/shortlog_mesa.sh
@@ -2,6 +2,12 @@

 # This script is used to generate the list of changes that
 # appears in the release notes files, with HTML formatting.
+#
+# Usage examples:
+#
+# $ bin/shortlog_mesa.sh mesa-9.0.2..mesa-9.0.3
+# $ bin/shortlog_mesa.sh mesa-9.0.2..mesa-9.0.3 > changes
+# $ bin/shortlog_mesa.sh mesa-9.0.2..mesa-9.0.3 | tee changes


 typeset -i in_log=0
--- a/configure.ac
+++ b/configure.ac
@@ -6,7 +6,7 @@ dnl Tell the user about autoconf.html in the --help output
 m4_divert_once([HELP_END], [
 See docs/autoconf.html for more details on the options for Mesa.])

-AC_INIT([Mesa], [9.1.2],
+AC_INIT([Mesa], [9.1.4],
    [https://bugs.freedesktop.org/enter_bug.cgi?product=Mesa])
 AC_CONFIG_AUX_DIR([bin])
 AC_CONFIG_MACRO_DIR([m4])
@@ -1059,26 +1059,24 @@ if test "x$enable_dri" = xyes; then
    DRI_DIRS=`echo "$DRI_DIRS" | $SED 's/  */ /g'`

    # Check for expat
-    if test "x$enable_dri" = xyes; then
-        EXPAT_INCLUDES=""
-        EXPAT_LIB=-lexpat
-        AC_ARG_WITH([expat],
-            [AS_HELP_STRING([--with-expat=DIR],
-                [expat install directory])],[
-            EXPAT_INCLUDES="-I$withval/include"
-            CPPFLAGS="$CPPFLAGS $EXPAT_INCLUDES"
-            LDFLAGS="$LDFLAGS -L$withval/$LIB_DIR"
-            EXPAT_LIB="-L$withval/$LIB_DIR -lexpat"
-            ])
-        AC_CHECK_HEADER([expat.h],[],[AC_MSG_ERROR([Expat required for DRI.])])
-	save_LIBS="$LIBS"
-        AC_CHECK_LIB([expat],[XML_ParserCreate],[],
-            [AC_MSG_ERROR([Expat required for DRI.])])
-	LIBS="$save_LIBS"
-    fi
+    EXPAT_INCLUDES=""
+    EXPAT_LIB=-lexpat
+    AC_ARG_WITH([expat],
+        [AS_HELP_STRING([--with-expat=DIR],
+            [expat install directory])],[
+        EXPAT_INCLUDES="-I$withval/include"
+        CPPFLAGS="$CPPFLAGS $EXPAT_INCLUDES"
+        LDFLAGS="$LDFLAGS -L$withval/$LIB_DIR"
+        EXPAT_LIB="-L$withval/$LIB_DIR -lexpat"
+        ])
+    AC_CHECK_HEADER([expat.h],[],[AC_MSG_ERROR([Expat required for DRI.])])
+    save_LIBS="$LIBS"
+    AC_CHECK_LIB([expat],[XML_ParserCreate],[],
+        [AC_MSG_ERROR([Expat required for DRI.])])
+    LIBS="$save_LIBS"

-    # if we are building any dri driver other than swrast or using the dri state tracker ...
-    if test -n "$DRI_DIRS" -a x"$DRI_DIRS" != xswrast || test "x$enable_dri" = xyes; then
+    # If we are building any DRI driver other than swrast.
+    if test -n "$DRI_DIRS" -a x"$DRI_DIRS" != xswrast; then
        # ... libdrm is required
        if test "x$have_libdrm" != xyes; then
            AC_MSG_ERROR([DRI drivers requires libdrm >= $LIBDRM_REQUIRED])
@@ -1146,14 +1144,6 @@ case $DRI_DIRS in
    ;;
 esac

-AM_CONDITIONAL(HAVE_I915_DRI, test x$HAVE_I915_DRI = xyes)
-AM_CONDITIONAL(HAVE_I965_DRI, test x$HAVE_I965_DRI = xyes)
-AM_CONDITIONAL(HAVE_NOUVEAU_DRI, test x$HAVE_NOUVEAU_DRI = xyes)
-AM_CONDITIONAL(HAVE_R200_DRI, test x$HAVE_R200_DRI = xyes)
-AM_CONDITIONAL(HAVE_RADEON_DRI, test x$HAVE_RADEON_DRI = xyes)
-AM_CONDITIONAL(HAVE_SWRAST_DRI, test x$HAVE_SWRAST_DRI = xyes)
-AM_CONDITIONAL(HAVE_COMMON_DRI, test x$HAVE_COMMON_DRI = xyes)
-
 dnl
 dnl OSMesa configuration
 dnl
@@ -1752,6 +1742,7 @@ gallium_check_st() {
    fi
    if test "x$HAVE_ST_DRI" = xyes && test "x$2" != x; then
         GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS $2"
+         HAVE_COMMON_DRI=yes
    fi
    if test "x$HAVE_ST_XORG" = xyes && test "x$3" != x; then
         GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS $3"
@@ -1877,6 +1868,7 @@ if test "x$with_gallium_drivers" != x; then

            if test "x$HAVE_ST_DRI" = xyes; then
                GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS dri-swrast"
+                HAVE_COMMON_DRI=yes
            fi
            if test "x$HAVE_ST_VDPAU" = xyes; then
                GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS vdpau-softpipe"
@@ -1993,6 +1985,14 @@ for driver in $GALLIUM_DRIVERS_DIRS; do
    esac
 done

+AM_CONDITIONAL(HAVE_I915_DRI, test x$HAVE_I915_DRI = xyes)
+AM_CONDITIONAL(HAVE_I965_DRI, test x$HAVE_I965_DRI = xyes)
+AM_CONDITIONAL(HAVE_NOUVEAU_DRI, test x$HAVE_NOUVEAU_DRI = xyes)
+AM_CONDITIONAL(HAVE_R200_DRI, test x$HAVE_R200_DRI = xyes)
+AM_CONDITIONAL(HAVE_RADEON_DRI, test x$HAVE_RADEON_DRI = xyes)
+AM_CONDITIONAL(HAVE_SWRAST_DRI, test x$HAVE_SWRAST_DRI = xyes)
+AM_CONDITIONAL(HAVE_COMMON_DRI, test x$HAVE_COMMON_DRI = xyes)
+
 AM_CONDITIONAL(HAVE_GALAHAD_GALLIUM, test x$HAVE_GALAHAD_GALLIUM = xyes)
 AM_CONDITIONAL(HAVE_IDENTITY_GALLIUM, test x$HAVE_IDENTITY_GALLIUM = xyes)
 AM_CONDITIONAL(HAVE_NOOP_GALLIUM, test x$HAVE_NOOP_GALLIUM = xyes)
--- a/docs/relnotes-9.1.2.html
+++ b/docs/relnotes-9.1.2.html
@@ -30,7 +30,9 @@ because GL_ARB_compatibility is not supported.

 <h2>MD5 checksums</h2>
 <pre>
-TBD
+df2aab86ff4a510ce5b0d074caa0a59f  MesaLib-9.1.2.tar.bz2
+415c2bc3a9eb571aafbfa474ebf5a2e0  MesaLib-9.1.2.tar.gz
+b1ae5a4d9255953980bc9254f5323420  MesaLib-9.1.2.zip
 </pre>

 <h2>New features</h2>
--- a/docs/relnotes-9.1.3.html
+++ b/docs/relnotes-9.1.3.html
@@ -0,0 +1,230 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 9.1.3 Release Notes / May 21st, 2013</h1>
+
+<p>
+Mesa 9.1.3 is a bug fix release which fixes bugs found since the 9.1.1 release.
+</p>
+<p>
+Mesa 9.1 implements the OpenGL 3.1 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.1.  OpenGL
+3.1 is <strong>only</strong> available if requested at context creation
+because GL_ARB_compatibility is not supported.
+</p>
+
+<h2>MD5 checksums</h2>
+<pre>
+952ccd03547ed72333b64e1746cf8ada  MesaLib-9.1.3.tar.bz2
+26d2f1aa8e9db388d51fcbd163c61fb7  MesaLib-9.1.3.tar.gz
+7017b7bdf0ebfd39a5c46cee7cf6b567  MesaLib-9.1.3.zip
+</pre>
+
+<h2>New features</h2>
+<p>None.</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=39251">Bug 39251</a> - Second Life viewers from release 2.7.4.235167 to the last  3.4.0.264911 crash on start.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=47478">Bug 47478</a> - [wine] GLX_DONT_CARE does not work for GLX_DRAWABLE_TYPE or GLX_RENDER_TYPE</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=56416">Bug 56416</a> - [SNB bisected] SNB hang with rc6 and hiz on glxgears (and other GL apps) immediately after xinit.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=57436">Bug 57436</a> - [GLSL1.40 IVB/HSW]Piglit spec/glsl-1.40/compiler_built-in-functions/inverse-mat2.frag fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=61554">Bug 61554</a> - [ivb] Mesa 9.1 performance regression on KWin's Lanczos shader</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=61773">Bug 61773</a> - abort is an incredibly not-smart way to handle IR validation</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=62868">Bug 62868</a> - solaris build broken with missing ffsll</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=62999">Bug 62999</a> - glXChooseFBConfig with GLX_DRAWABLE_TYPE, GLX_DONT_CARE fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=63078">Bug 63078</a> - EGL X11 Regression: Maximum swap interval is 0 (worked with 9.0)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=63447">Bug 63447</a> - [i965 Bisected]Ogles1conform/Ogles2conform/Ogles3conform cases segfault</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=64662">Bug 64662</a> - [SNB 9.1 Bisected]Ogles2conform GL2ExtensionTests/depth_texture_cube_map/depth_texture_cube_map.test fail</li>
+
+</ul>
+
+<h2>Changes</h2>
+<p>The full set of changes can be viewed by using the following GIT command:</p>
+
+<pre>
+  git log mesa-9.1.2..mesa-9.1.3
+</pre>
+
+<p>Alex Deucher (2):</p>
+<ul>
+  <li>r600g: add new richland pci ids</li>
+  <li>radeonsi: add new SI pci ids</li>
+</ul>
+
+<p>Alexander Monakov (1):</p>
+<ul>
+  <li>Honor GLX_DONT_CARE in MATCH_MASK</li>
+</ul>
+
+<p>Andreas Boll (2):</p>
+<ul>
+  <li>mesa: Add a script to generate the list of fixed bugs</li>
+  <li>mesa: add usage examples to get-pick-list and shortlog scripts</li>
+</ul>
+
+<p>Aras Pranckevicius (1):</p>
+<ul>
+  <li>GLSL: fix lower_jumps to report progress properly</li>
+</ul>
+
+<p>Brian Paul (3):</p>
+<ul>
+  <li>mesa: remove platform checks around __builtin_ffs, __builtin_ffsll</li>
+  <li>gallium/u_blitter: fix is_blit_generic_supported() stencil checking</li>
+  <li>mesa: enable GL_ARB_texture_float if TEXTURE_FLOAT_ENABLED is defined</li>
+</ul>
+
+<p>Chad Versace (2):</p>
+<ul>
+  <li>egl/dri2: Fix min/max swap interval of configs</li>
+  <li>intel: Allocate hiz in intel_renderbuffer_move_to_temp()</li>
+</ul>
+
+<p>Chris Forbes (2):</p>
+<ul>
+  <li>i965/fs: Don't try to use bogus interpolation modes pre-Gen6.</li>
+  <li>mesa: don't memcmp() off the end of a cache key.</li>
+</ul>
+
+<p>Dave Airlie (2):</p>
+<ul>
+  <li>st/mesa: fix UBO offsets.</li>
+  <li>ralloc: don't write to memory in case of alloc fail.</li>
+</ul>
+
+<p>Eric Anholt (11):</p>
+<ul>
+  <li>i965/fs: Remove creation of a MOV instruction that's never used.</li>
+  <li>i965/fs: Move varying uniform offset compuation into the helper func.</li>
+  <li>i965: Make the constant surface interface take a normal byte size.</li>
+  <li>i965/fs: Avoid inappropriate optimization with regs_written &gt; 1.</li>
+  <li>i965/fs: Do CSE on gen7's varying-index pull constant loads.</li>
+  <li>i965/fs: Clean up the setup of gen4 simd16 message destinations.</li>
+  <li>i965/gen7: Skip resetting SOL offsets at batch start with HW contexts.</li>
+  <li>i965/gen6: Reduce updates of transform feedback offsets with HW contexts.</li>
+  <li>i965: Fix SNB GPU hangs when a blorp batch is the first thing to execute.</li>
+  <li>i965: Fix hangs on HSW since the gen6 blorp fix.</li>
+  <li>i965: Disable write masking when setting up texturing m0.</li>
+</ul>
+
+<p>Haixia Shi (1):</p>
+<ul>
+  <li>ACTIVE_UNIFORM_MAX_LENGTH should include 3 extra characters for arrays.</li>
+</ul>
+
+<p>Ian Romanick (11):</p>
+<ul>
+  <li>docs: Add 9.1.2 release md5sums</li>
+  <li>mesa: Note that patch 0967c36 shouldn't actually get picked to the 9.1 branch</li>
+  <li>mesa: NULL check the pointer before trying to dereference it</li>
+  <li>egl/dri2: NULL check value returned by dri2_create_surface</li>
+  <li>mesa: Don't leak shared state when context initialization fails</li>
+  <li>mesa: Don't leak gl_context::BeginEnd at context destruction</li>
+  <li>mesa/swrast: Refactor no-memory error checking in blit_linear</li>
+  <li>mesa/swrast: Move free calls outside the attachment loop</li>
+  <li>intel: Don't dereference a NULL pointer of calloc fails</li>
+  <li>mesa: Note that a824692 is already back ported</li>
+  <li>mesa: Bump version to 9.1.3</li>
+</ul>
+
+<p>José Fonseca (1):</p>
+<ul>
+  <li>winsys/sw/xlib: Prevent shared memory segment leakage.</li>
+</ul>
+
+<p>Kenneth Graunke (9):</p>
+<ul>
+  <li>mesa: Add new ctx-&gt;Stencil._WriteEnabled derived state flag.</li>
+  <li>i965: Fix stencil write enable flag in 3DSTATE_DEPTH_BUFFER on Gen7+.</li>
+  <li>mesa: Fix unpack function for ETC2_SRGB8_PUNCHTHROUGH_ALPHA1.</li>
+  <li>mesa: Add an unpack function for ARGB2101010_UINT.</li>
+  <li>mesa: Add unpack functions for R/RG/RGB [U]INT8/16/32 formats.</li>
+  <li>mesa: Add unpack functions for A/I/L/LA [U]INT8/16/32 formats.</li>
+  <li>glsl: Ignore redundant prototypes after a function's been defined.</li>
+  <li>i965: Lower textureGrad() for samplerCubeShadow.</li>
+  <li>i965/vs: Fix textureGrad() with shadow samplers on Haswell.</li>
+</ul>
+
+<p>Maarten Lankhorst (1):</p>
+<ul>
+  <li>nvc0: Fix fd leak in nvc0_create_decoder</li>
+</ul>
+
+<p>Marek Olšák (5):</p>
+<ul>
+  <li>radeonsi: add more cases for copying unsupported formats to resource_copy_region</li>
+  <li>mesa: fix glGet queries depending on derived framebuffer state (v2)</li>
+  <li>gallium/u_blitter: implement buffer clearing</li>
+  <li>r600g: initialize CMASK and HTILE with the GPU using streamout</li>
+  <li>st/mesa: depth-stencil-alpha state also depends on _NEW_BUFFERS</li>
+</ul>
+
+<p>Martin Andersson (1):</p>
+<ul>
+  <li>r600g: Fix UMAD on Cayman</li>
+</ul>
+
+<p>Michel Dänzer (1):</p>
+<ul>
+  <li>radeonsi: Handle arbitrary 2-byte formats in resource_copy_region</li>
+</ul>
+
+<p>Paul Berry (7):</p>
+<ul>
+  <li>glsl: Fix array indexing when constant folding built-in functions.</li>
+  <li>i965: Reduce code duplication in handling of depth, stencil, and HiZ.</li>
+  <li>glsl/linker: fix varying packing for non-flat integer varyings.</li>
+  <li>glsl: Document lower_packed_varyings' "flat" requirement with an assert.</li>
+  <li>glsl/linker: Adapt flat varying handling in preparation for geometry shaders.</li>
+  <li>glsl/linker: Reduce scope of non-flat integer varying fix.</li>
+  <li>intel: Do a depth resolve before copying images between miptrees.</li>
+</ul>
+
+<p>Ralf Jung (1):</p>
+<ul>
+  <li>egl/x11: Fix initialisation of swap_interval</li>
+</ul>
+
+<p>Roland Scheidegger (1):</p>
+<ul>
+  <li>gallivm: fix small but severe bug in handling multiple lod level strides</li>
+</ul>
+
+<p>Vadim Girlin (1):</p>
+<ul>
+  <li>gallium: handle drirc disable_glsl_line_continuations option</li>
+</ul>
+
+</div>
+</body>
+</html>
--- a/docs/relnotes-9.1.4.html
+++ b/docs/relnotes-9.1.4.html
@@ -0,0 +1,319 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 9.1.4 Release Notes / July 1st, 2013</h1>
+
+<p>
+Mesa 9.1.4 is a bug fix release which fixes bugs found since the 9.1.3 release.
+</p>
+<p>
+Mesa 9.1 implements the OpenGL 3.1 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.1.  OpenGL
+3.1 is <strong>only</strong> available if requested at context creation
+because GL_ARB_compatibility is not supported.
+</p>
+
+<h2>MD5 checksums</h2>
+<pre>
+TBD
+</pre>
+
+<h2>New features</h2>
+<p>None.</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=37871">Bug 37871</a> - [bisected i965] Bus error (core dumped) on oglc texdecaltile</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=42182">Bug 42182</a> - egl/opengles1/tri_x11 renders wrong</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=44958">Bug 44958</a> - [SNB IVB HSW] mesa demo test texleak bus error</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=53494">Bug 53494</a> - [snb] crash in texsubimage to a large atlas in clutter</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=60518">Bug 60518</a> - glDrawElements segfault when compiled into display list</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=61821">Bug 61821</a> - src/mesa/drivers/dri/common/xmlpool.h:96:29: fatal error: xmlpool/options.h</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=63520">Bug 63520</a> - r300g regression (RV380): Strange rendering of light sources in Penumbra  (bisected)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=63701">Bug 63701</a> - [HSW] support new haswell graphics [8086:0a2e]</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=64727">Bug 64727</a> - [gm45, bisected] some piglit glsl 1.10 built-in-functions tests crash</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=64745">Bug 64745</a> - [llvmpipe] SIGSEGV src/gallium/state_trackers/glx/xlib/glx_api.c:1374</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=64934">Bug 64934</a> - [llvmpipe] SIGSEGV src/gallium/state_trackers/glx/xlib/glx_api.c:1363</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=65173">Bug 65173</a> - segfault in _mesa_get_format_datatype and _mesa_get_color_read_type when state dumping with glretrace</li>
+
+</ul>
+
+<h2>Changes</h2>
+<p>The full set of changes can be viewed by using the following GIT command:</p>
+
+<pre>
+  git log mesa-9.1.3..mesa-9.1.4
+</pre>
+
+<p>Alan Coopersmith (2):</p>
+<ul>
+  <li>integer overflow in XF86DRIOpenConnection() [CVE-2013-1993 1/2]</li>
+  <li>integer overflow in XF86DRIGetClientDriverName() [CVE-2013-1993 2/2]</li>
+</ul>
+
+<p>Alex Deucher (3):</p>
+<ul>
+  <li>radeonsi: add support for hainan chips</li>
+  <li>radeonsi: add Hainan pci ids</li>
+  <li>winsys/radeon: add env var to disable VM on Cayman/Trinity</li>
+</ul>
+pp
+<p>Andreas Boll (1):</p>
+<ul>
+  <li>glapi: Add some missing static_dispatch="false" annotations to es_EXT.xml</li>
+</ul>
+
+<p>Anuj Phogat (1):</p>
+<ul>
+  <li>intel: Add a null pointer check before dereferencing the pointer</li>
+</ul>
+
+<p>Armin K (1):</p>
+<ul>
+  <li>gallivm: Fix build with LLVM 3.3</li>
+</ul>
+
+<p>Brian Paul (9):</p>
+<ul>
+  <li>mesa: fix the compressed TexSubImage size checking code</li>
+  <li>st/mesa: generate GL_OUT_OF_MEMORY if we can't create the index buffer</li>
+  <li>mesa: fix error checking of DXT sRGB formats in _mesa_base_tex_format()</li>
+  <li>st/glx/xlib: check for null ctx pointer in glXIsDirect()</li>
+  <li>xlib: check for null ctx pointer in glXIsDirect()</li>
+  <li>st/glx: add null ctx check in glXDestroyContext()</li>
+  <li>xlib: add null ctx check in glXDestroyContext()</li>
+  <li>meta: move vertex array enables for mipmap generation</li>
+  <li>mesa: handle missing read buffer in _mesa_get_color_read_format/type()</li>
+</ul>
+
+<p>Bryan Cain (1):</p>
+<ul>
+  <li>nv50: initialize kick_notify callback in nv50_create</li>
+</ul>
+
+<p>Chad Versace (3):</p>
+<ul>
+  <li>egl/android: Fix error condition for EGL_ANDROID_image_native_buffer</li>
+  <li>i965: Fix glColorPointer(GL_FIXED)</li>
+  <li>intel: Return early if miptree allocation fails</li>
+</ul>
+
+<p>Chia-I Wu (1):</p>
+<ul>
+  <li>u_vbuf: fix index buffer leak</li>
+</ul>
+
+<p>Chris Forbes (8):</p>
+<ul>
+  <li>mesa: add accessor for effective stencil ref</li>
+  <li>intel: Use accessor for stencil reference values</li>
+  <li>nouveau: Use accessor for stencil reference values</li>
+  <li>radeon: Use accessor for stencil reference values</li>
+  <li>st: Use accessor for stencil reference values</li>
+  <li>swrast: Use accessor for stencil reference values</li>
+  <li>mesa: Stop clamping stencil reference value at specification time</li>
+  <li>mesa: Use accessor for stencil reference values in glGet</li>
+</ul>
+
+<p>Chí-Thanh Christopher Nguyễn (1):</p>
+<ul>
+  <li>targets/dri-i915: Force c++ linker in all cases</li>
+</ul>
+
+<p>Daniel Martin (1):</p>
+<ul>
+  <li>Fix build of swrast only without libdrm</li>
+</ul>
+
+<p>Dave Airlie (1):</p>
+<ul>
+  <li>i965: fix problem with constant out of bounds access (v3)</li>
+</ul>
+
+<p>Eric Anholt (10):</p>
+<ul>
+  <li>mesa: Make core Mesa allocate the texture renderbuffer wrapper.</li>
+  <li>mesa: Make gl_renderbuffers backed by EGL images use FinishRenderTexture.</li>
+  <li>i965/fs: Bake regs_written into the IR instead of recomputing it later.</li>
+  <li>i965/vs: Fix implied_mrf_writes() for integer division pre-gen6.</li>
+  <li>intel: Add support for writing to our linear-temporary-CPU-map case.</li>
+  <li>intel: Do temporary CPU maps of textures that are too big to GTT map.</li>
+  <li>intel: Avoid making tiled miptrees we won't be able to blit.</li>
+  <li>intel: Fix MRT handling of glBitmap().</li>
+  <li>intel: Fix format handling of blit glBitmap()</li>
+  <li>i965: Shut up the last release build warning.</li>
+</ul>
+
+<p>Fabian Bieler (2):</p>
+<ul>
+  <li>mesa/st: Don't copy propagate from swizzles.</li>
+  <li>mesa/program: Don't copy propagate from swizzles.</li>
+</ul>
+
+<p>Frank Henigman (1):</p>
+<ul>
+  <li>intel: initialize fs_visitor::params_remap in constructor</li>
+</ul>
+
+<p>Ian Romanick (2):</p>
+<ul>
+  <li>docs: Add 9.1.3 release md5sums</li>
+  <li>mesa: Bump version to 9.1.4</li>
+</ul>
+
+<p>José Fonseca (1):</p>
+<ul>
+  <li>scons: Fix implicit python dependency discovery on Windows.</li>
+</ul>
+
+<p>Kenneth Graunke (17):</p>
+<ul>
+  <li>mesa: Add i965 varying index patches to .cherry-ignore.</li>
+  <li>i965: Turn brw-&gt;urb.vs_size and gs_size into local variables.</li>
+  <li>i965: Use a variable for the push constant size in kB.</li>
+  <li>i965: Update URB partitioning code for Haswell's GT3 variant.</li>
+  <li>i965: Add chipset limits for the Haswell GT3 variant.</li>
+  <li>i965: Enable the Bay Trail platform.</li>
+  <li>mesa: Add a reverted commit to cherry-ignore.</li>
+  <li>vbo: Ignore PRIMITIVE_RESTART_FIXED_INDEX for glDrawArrays().</li>
+  <li>mesa: Add a helper function for determining the restart index.</li>
+  <li>vbo: Use the new primitive restart index helper function.</li>
+  <li>i965: Use the correct restart index for fixed index mode on Haswell.</li>
+  <li>mesa: Cherry-ignore a patch that got picked but squashed.</li>
+  <li>i965: Fix can_cut_index_handle_restart_index() for byte/short types.</li>
+  <li>st/mesa: Go back to using ctx-&gt;Array.RestartIndex, not _RestartIndex.</li>
+  <li>mesa: Ignore fixed-index primitive restart in ArrayElement().</li>
+  <li>mesa: Delete the ctx-&gt;Array._RestartIndex derived state.</li>
+  <li>glsl: Bail on parsing if the #version directive is bogus.</li>
+</ul>
+
+<p>Lauri Kasanen (1):</p>
+<ul>
+  <li>r600g: Correctly initialize the shader key, v2</li>
+</ul>
+
+<p>Maarten Lankhorst (4):</p>
+<ul>
+  <li>nvc0: fix up video buffer alignment requirements</li>
+  <li>nvc0: kill assert in ppp code</li>
+  <li>nvc0: set rsvd_kick correctly</li>
+  <li>nvc0: allow frame dropping in h264</li>
+</ul>
+
+<p>Marek Olšák (7):</p>
+<ul>
+  <li>radeonsi: increase array size for shader inputs and outputs</li>
+  <li>vbo: fix possible use-after-free segfault after a VAO is deleted</li>
+  <li>glsl: fix the value of gl_MaxFragmentUniformVectors</li>
+  <li>st/mesa: initialize all program constants and UBO limits</li>
+  <li>st/mesa: initialize Const.MaxColorAttachments</li>
+  <li>st/mesa: fix a couple of issues in st_bind_ubos</li>
+  <li>mesa: declare UniformBufferBindings as an array with a static size</li>
+</ul>
+
+<p>Matt Turner (3):</p>
+<ul>
+  <li>configure.ac: Remove redundant checks of enable_dri.</li>
+  <li>configure.ac: Build dricommon for DRI gallium drivers</li>
+  <li>i965: NULL check depth_mt to quiet static analysis.</li>
+</ul>
+
+<p>Michel Dänzer (3):</p>
+<ul>
+  <li>radeonsi: Fix handling of TGSI_SEMANTIC_PSIZE</li>
+  <li>radeonsi: Fix user clip planes</li>
+  <li>mesa: Note that two radeonsi fixes cannot be backported after all</li>
+</ul>
+
+<p>Mike Stroyan (1):</p>
+<ul>
+  <li>configure.ac: Build dricommon for gallium swrast</li>
+</ul>
+
+<p>Naohiro Aota (1):</p>
+<ul>
+  <li>xmlpool/build: Make sure to set mo properly</li>
+</ul>
+
+<p>Paul Berry (2):</p>
+<ul>
+  <li>glsl: Fix error checking on "flat" keyword to match GLSL ES 3.00, GLSL 1.50.</li>
+  <li>i965/gen7.5: Allow HW primitive restart for all primitive types.</li>
+</ul>
+
+<p>Paulo Zanoni (1):</p>
+<ul>
+  <li>i965: make GT3 machines work as GT3 instead of GT2</li>
+</ul>
+
+<p>Rodrigo Vivi (2):</p>
+<ul>
+  <li>i965: Add missing Haswell GT3 Desktop to IS_HSW_GT3 check.</li>
+  <li>i965: Adding more reserved PCI IDs for Haswell.</li>
+</ul>
+
+<p>Roland Scheidegger (1):</p>
+<ul>
+  <li>gallivm: fix out-of-bounds access with mirror_clamp_to_edge address mode</li>
+</ul>
+
+<p>Stéphane Marchesin (2):</p>
+<ul>
+  <li>st/xlib: Fix upside down coordinates for CopySubBuffer</li>
+  <li>st/xlib: Flush the front buffer before doing CopySubBuffer</li>
+</ul>
+
+<p>Sven Joachim (1):</p>
+<ul>
+  <li>mesa: Fix ieee fp on Alpha</li>
+</ul>
+
+<p>Tapani Pälli (1):</p>
+<ul>
+  <li>mesa: fix type comparison errors in sub-texture error checking code</li>
+</ul>
+
+<p>Tom Stellard (2):</p>
+<ul>
+  <li>gallivm: Fix build with LLVM &gt;= r180063</li>
+  <li>r300g/compiler: Prevent regalloc from swizzling texture operands v2</li>
+</ul>
+
+<p>Vinson Lee (1):</p>
+<ul>
+  <li>radeon: Initialize variables in radeon_llvm_context_init.</li>
+</ul>
+
+</div>
+</body>
+</html>
--- a/include/pci_ids/i965_pci_ids.h
+++ b/include/pci_ids/i965_pci_ids.h
@@ -28,37 +28,66 @@ CHIPSET(0x015a, IVYBRIDGE_S_GT1, ivb_gt1)
 CHIPSET(0x016a, IVYBRIDGE_S_GT2, ivb_gt2)
 CHIPSET(0x0402, HASWELL_GT1, hsw_gt1)
 CHIPSET(0x0412, HASWELL_GT2, hsw_gt2)
-CHIPSET(0x0422, HASWELL_GT2_PLUS, hsw_gt2)
+CHIPSET(0x0422, HASWELL_GT3, hsw_gt3)
 CHIPSET(0x0406, HASWELL_M_GT1, hsw_gt1)
 CHIPSET(0x0416, HASWELL_M_GT2, hsw_gt2)
-CHIPSET(0x0426, HASWELL_M_GT2_PLUS, hsw_gt2)
+CHIPSET(0x0426, HASWELL_M_GT3, hsw_gt3)
 CHIPSET(0x040A, HASWELL_S_GT1, hsw_gt1)
 CHIPSET(0x041A, HASWELL_S_GT2, hsw_gt2)
-CHIPSET(0x042A, HASWELL_S_GT2_PLUS, hsw_gt2)
+CHIPSET(0x042A, HASWELL_S_GT3, hsw_gt3)
+CHIPSET(0x040B, HASWELL_B_GT1, hsw_gt1)
+CHIPSET(0x041B, HASWELL_B_GT2, hsw_gt2)
+CHIPSET(0x042B, HASWELL_B_GT3, hsw_gt3)
+CHIPSET(0x040E, HASWELL_E_GT1, hsw_gt1)
+CHIPSET(0x041E, HASWELL_E_GT2, hsw_gt2)
+CHIPSET(0x042E, HASWELL_E_GT3, hsw_gt3)
 CHIPSET(0x0C02, HASWELL_SDV_GT1, hsw_gt1)
 CHIPSET(0x0C12, HASWELL_SDV_GT2, hsw_gt2)
-CHIPSET(0x0C22, HASWELL_SDV_GT2_PLUS, hsw_gt2)
+CHIPSET(0x0C22, HASWELL_SDV_GT3, hsw_gt3)
 CHIPSET(0x0C06, HASWELL_SDV_M_GT1, hsw_gt1)
 CHIPSET(0x0C16, HASWELL_SDV_M_GT2, hsw_gt2)
-CHIPSET(0x0C26, HASWELL_SDV_M_GT2_PLUS, hsw_gt2)
+CHIPSET(0x0C26, HASWELL_SDV_M_GT3, hsw_gt3)
 CHIPSET(0x0C0A, HASWELL_SDV_S_GT1, hsw_gt1)
 CHIPSET(0x0C1A, HASWELL_SDV_S_GT2, hsw_gt2)
-CHIPSET(0x0C2A, HASWELL_SDV_S_GT2_PLUS, hsw_gt2)
+CHIPSET(0x0C2A, HASWELL_SDV_S_GT3, hsw_gt3)
+CHIPSET(0x0C0B, HASWELL_SDV_B_GT1, hsw_gt1)
+CHIPSET(0x0C1B, HASWELL_SDV_B_GT2, hsw_gt2)
+CHIPSET(0x0C2B, HASWELL_SDV_B_GT3, hsw_gt3)
+CHIPSET(0x0C0E, HASWELL_SDV_E_GT1, hsw_gt1)
+CHIPSET(0x0C1E, HASWELL_SDV_E_GT2, hsw_gt2)
+CHIPSET(0x0C2E, HASWELL_SDV_E_GT3, hsw_gt3)
 CHIPSET(0x0A02, HASWELL_ULT_GT1, hsw_gt1)
 CHIPSET(0x0A12, HASWELL_ULT_GT2, hsw_gt2)
-CHIPSET(0x0A22, HASWELL_ULT_GT2_PLUS, hsw_gt2)
+CHIPSET(0x0A22, HASWELL_ULT_GT3, hsw_gt3)
 CHIPSET(0x0A06, HASWELL_ULT_M_GT1, hsw_gt1)
 CHIPSET(0x0A16, HASWELL_ULT_M_GT2, hsw_gt2)
-CHIPSET(0x0A26, HASWELL_ULT_M_GT2_PLUS, hsw_gt2)
+CHIPSET(0x0A26, HASWELL_ULT_M_GT3, hsw_gt3)
 CHIPSET(0x0A0A, HASWELL_ULT_S_GT1, hsw_gt1)
 CHIPSET(0x0A1A, HASWELL_ULT_S_GT2, hsw_gt2)
-CHIPSET(0x0A2A, HASWELL_ULT_S_GT2_PLUS, hsw_gt2)
+CHIPSET(0x0A2A, HASWELL_ULT_S_GT3, hsw_gt3)
+CHIPSET(0x0A0B, HASWELL_ULT_B_GT1, hsw_gt1)
+CHIPSET(0x0A1B, HASWELL_ULT_B_GT2, hsw_gt2)
+CHIPSET(0x0A2B, HASWELL_ULT_B_GT3, hsw_gt3)
+CHIPSET(0x0A0E, HASWELL_ULT_E_GT1, hsw_gt1)
+CHIPSET(0x0A1E, HASWELL_ULT_E_GT2, hsw_gt2)
+CHIPSET(0x0A2E, HASWELL_ULT_E_GT3, hsw_gt3)
 CHIPSET(0x0D02, HASWELL_CRW_GT1, hsw_gt1)
 CHIPSET(0x0D12, HASWELL_CRW_GT2, hsw_gt2)
-CHIPSET(0x0D22, HASWELL_CRW_GT2_PLUS, hsw_gt2)
+CHIPSET(0x0D22, HASWELL_CRW_GT3, hsw_gt3)
 CHIPSET(0x0D06, HASWELL_CRW_M_GT1, hsw_gt1)
 CHIPSET(0x0D16, HASWELL_CRW_M_GT2, hsw_gt2)
-CHIPSET(0x0D26, HASWELL_CRW_M_GT2_PLUS, hsw_gt2)
+CHIPSET(0x0D26, HASWELL_CRW_M_GT3, hsw_gt3)
 CHIPSET(0x0D0A, HASWELL_CRW_S_GT1, hsw_gt1)
 CHIPSET(0x0D1A, HASWELL_CRW_S_GT2, hsw_gt2)
-CHIPSET(0x0D2A, HASWELL_CRW_S_GT2_PLUS, hsw_gt2)
+CHIPSET(0x0D2A, HASWELL_CRW_S_GT3, hsw_gt3)
+CHIPSET(0x0D0B, HASWELL_CRW_B_GT1, hsw_gt1)
+CHIPSET(0x0D1B, HASWELL_CRW_B_GT2, hsw_gt2)
+CHIPSET(0x0D2B, HASWELL_CRW_B_GT3, hsw_gt3)
+CHIPSET(0x0D0E, HASWELL_CRW_E_GT1, hsw_gt1)
+CHIPSET(0x0D1E, HASWELL_CRW_E_GT2, hsw_gt2)
+CHIPSET(0x0D2E, HASWELL_CRW_E_GT3, hsw_gt3)
+CHIPSET(0x0F31, BAYTRAIL_M_1, byt)
+CHIPSET(0x0F32, BAYTRAIL_M_2, byt)
+CHIPSET(0x0F33, BAYTRAIL_M_3, byt)
+CHIPSET(0x0157, BAYTRAIL_M_4, byt)
+CHIPSET(0x0155, BAYTRAIL_D, byt)
--- a/include/pci_ids/r600_pci_ids.h
+++ b/include/pci_ids/r600_pci_ids.h
@@ -320,6 +320,8 @@ CHIPSET(0x9998, ARUBA_9998, ARUBA)
 CHIPSET(0x9999, ARUBA_9999, ARUBA)
 CHIPSET(0x999A, ARUBA_999A, ARUBA)
 CHIPSET(0x999B, ARUBA_999B, ARUBA)
+CHIPSET(0x999C, ARUBA_999C, ARUBA)
+CHIPSET(0x999D, ARUBA_999D, ARUBA)
 CHIPSET(0x99A0, ARUBA_99A0, ARUBA)
 CHIPSET(0x99A2, ARUBA_99A2, ARUBA)
 CHIPSET(0x99A4, ARUBA_99A4, ARUBA)
--- a/include/pci_ids/radeonsi_pci_ids.h
+++ b/include/pci_ids/radeonsi_pci_ids.h
@@ -28,6 +28,7 @@ CHIPSET(0x684C, PITCAIRN_684C, PITCAIRN)

 CHIPSET(0x6820, VERDE_6820, VERDE)
 CHIPSET(0x6821, VERDE_6821, VERDE)
+CHIPSET(0x6822, VERDE_6822, VERDE)
 CHIPSET(0x6823, VERDE_6823, VERDE)
 CHIPSET(0x6824, VERDE_6824, VERDE)
 CHIPSET(0x6825, VERDE_6825, VERDE)
@@ -35,11 +36,13 @@ CHIPSET(0x6826, VERDE_6826, VERDE)
 CHIPSET(0x6827, VERDE_6827, VERDE)
 CHIPSET(0x6828, VERDE_6828, VERDE)
 CHIPSET(0x6829, VERDE_6829, VERDE)
+CHIPSET(0x682A, VERDE_682A, VERDE)
 CHIPSET(0x682B, VERDE_682B, VERDE)
 CHIPSET(0x682D, VERDE_682D, VERDE)
 CHIPSET(0x682F, VERDE_682F, VERDE)
 CHIPSET(0x6830, VERDE_6830, VERDE)
 CHIPSET(0x6831, VERDE_6831, VERDE)
+CHIPSET(0x6835, VERDE_6835, VERDE)
 CHIPSET(0x6837, VERDE_6837, VERDE)
 CHIPSET(0x6838, VERDE_6838, VERDE)
 CHIPSET(0x6839, VERDE_6839, VERDE)
@@ -60,3 +63,10 @@ CHIPSET(0x6620, OLAND_6620, OLAND)
 CHIPSET(0x6621, OLAND_6621, OLAND)
 CHIPSET(0x6623, OLAND_6623, OLAND)
 CHIPSET(0x6631, OLAND_6631, OLAND)
+
+CHIPSET(0x6660, HAINAN_6660, HAINAN)
+CHIPSET(0x6663, HAINAN_6663, HAINAN)
+CHIPSET(0x6664, HAINAN_6664, HAINAN)
+CHIPSET(0x6665, HAINAN_6665, HAINAN)
+CHIPSET(0x6667, HAINAN_6667, HAINAN)
+CHIPSET(0x666F, HAINAN_666F, HAINAN)
--- a/scons/custom.py
+++ b/scons/custom.py
@@ -95,7 +95,7 @@ def createConvenienceLibBuilder(env):

 # TODO: handle import statements with multiple modules
 # TODO: handle from import statements
-import_re = re.compile(r'^import\s+(\S+)$', re.M)
+import_re = re.compile(r'^\s*import\s+(\S+)\s*$', re.M)

 def python_scan(node, env, path):
    # http://www.scons.org/doc/0.98.5/HTML/scons-user/c2781.html#AEN2789
@@ -113,6 +113,7 @@ def python_scan(node, env, path):
            if os.path.exists(file):
                results.append(env.File(file))
                break
+    #print node, map(str, results)
    return results

 python_scanner = SCons.Scanner.Scanner(function = python_scan, skeys = ['.py'])
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -221,6 +221,9 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,
   base.RenderableType = disp->ClientAPIs;
   base.Conformant = disp->ClientAPIs;

+   base.MinSwapInterval = dri2_dpy->min_swap_interval;
+   base.MaxSwapInterval = dri2_dpy->max_swap_interval;
+
   if (!_eglValidateConfig(&base, EGL_FALSE)) {
      _eglLog(_EGL_DEBUG, "DRI2: failed to validate config %d", id);
      return NULL;
@@ -268,9 +271,6 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,

   if (double_buffer) {
      surface_type &= ~EGL_PIXMAP_BIT;
-
-      conf->base.MinSwapInterval = dri2_dpy->min_swap_interval;
-      conf->base.MaxSwapInterval = dri2_dpy->max_swap_interval;
   }

   conf->base.SurfaceType |= surface_type;
--- a/src/egl/drivers/dri2/platform_android.c
+++ b/src/egl/drivers/dri2/platform_android.c
@@ -338,7 +338,7 @@ droid_swap_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw)
 }

 static _EGLImage *
-dri2_create_image_android_native_buffer(_EGLDisplay *disp,
+dri2_create_image_android_native_buffer(_EGLDisplay *disp, _EGLContext *ctx,
                                        struct ANativeWindowBuffer *buf)
 {
   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
@@ -346,6 +346,18 @@ dri2_create_image_android_native_buffer(_EGLDisplay *disp,
   int name;
   EGLint format;

+   if (ctx != NULL) {
+      /* From the EGL_ANDROID_image_native_buffer spec:
+       *
+       *     * If <target> is EGL_NATIVE_BUFFER_ANDROID and <ctx> is not
+       *       EGL_NO_CONTEXT, the error EGL_BAD_CONTEXT is generated.
+       */
+      _eglError(EGL_BAD_CONTEXT, "eglCreateEGLImageKHR: for "
+                "EGL_NATIVE_BUFFER_ANDROID, the context must be "
+                "EGL_NO_CONTEXT");
+      return NULL;
+   }
+
   if (!buf || buf->common.magic != ANDROID_NATIVE_BUFFER_MAGIC ||
       buf->common.version != sizeof(*buf)) {
      _eglError(EGL_BAD_PARAMETER, "eglCreateEGLImageKHR");
@@ -417,7 +429,7 @@ droid_create_image_khr(_EGLDriver *drv, _EGLDisplay *disp,
 {
   switch (target) {
   case EGL_NATIVE_BUFFER_ANDROID:
-      return dri2_create_image_android_native_buffer(disp,
+      return dri2_create_image_android_native_buffer(disp, ctx,
            (struct ANativeWindowBuffer *) buffer);
   default:
      return dri2_create_image_khr(drv, disp, ctx, target, buffer, attr_list);
--- a/src/egl/drivers/dri2/platform_x11.c
+++ b/src/egl/drivers/dri2/platform_x11.c
@@ -284,14 +284,15 @@ dri2_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,

   surf = dri2_create_surface(drv, disp, EGL_WINDOW_BIT, conf,
                              window, attrib_list);
+   if (surf != NULL) {
+      /* When we first create the DRI2 drawable, its swap interval on the
+       * server side is 1.
+       */
+      surf->SwapInterval = 1;

-   /* When we first create the DRI2 drawable, its swap interval on the server
-    * side is 1.
-    */
-   surf->SwapInterval = 1;
-
-   /* Override that with a driconf-set value. */
-   drv->API.SwapInterval(drv, disp, surf, dri2_dpy->default_swap_interval);
+      /* Override that with a driconf-set value. */
+      drv->API.SwapInterval(drv, disp, surf, dri2_dpy->default_swap_interval);
+   }

   return surf;
 }
@@ -1162,6 +1163,8 @@ dri2_initialize_x11_dri2(_EGLDriver *drv, _EGLDisplay *disp)
   if (!dri2_create_screen(disp))
      goto cleanup_fd;

+   dri2_setup_swap_interval(dri2_dpy);
+
   if (dri2_dpy->conn) {
      if (!dri2_add_configs_for_visuals(dri2_dpy, disp))
 	 goto cleanup_configs;
@@ -1181,8 +1184,6 @@ dri2_initialize_x11_dri2(_EGLDriver *drv, _EGLDisplay *disp)
   disp->VersionMajor = 1;
   disp->VersionMinor = 4;

-   dri2_setup_swap_interval(dri2_dpy);
-
   return EGL_TRUE;

 cleanup_configs:
--- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
@@ -55,6 +55,10 @@
 #include <llvm/MC/MCRegisterInfo.h>
 #endif /* HAVE_LLVM >= 0x0301 */

+#if HAVE_LLVM >= 0x0303
+#include <llvm/ADT/OwningPtr.h>
+#endif
+
 #include "util/u_math.h"
 #include "util/u_debug.h"

--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -60,6 +60,12 @@
 #include <llvm/Target/TargetSelect.h>
 #endif /* HAVE_LLVM < 0x0300 */

+#if HAVE_LLVM >= 0x0303
+#include <llvm/IR/IRBuilder.h>
+#include <llvm/IR/Module.h>
+#include <llvm/Support/CBindingWrapping.h>
+#endif
+
 #include "pipe/p_config.h"
 #include "util/u_debug.h"
 #include "util/u_cpu_detect.h"
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -867,7 +867,7 @@ lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
      stride = bld->int_coord_bld.undef;
      for (i = 0; i < bld->num_lods; i++) {
         LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
-         LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, i);
+         LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i);
         indexes[1] = LLVMBuildExtractElement(builder, level, indexi, "");
         stride1 = LLVMBuildGEP(builder, stride_array, indexes, 2, "");
         stride1 = LLVMBuildLoad(builder, stride1, "");
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -406,7 +406,6 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,

   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
      {
-         LLVMValueRef min, max;
         struct lp_build_context abs_coord_bld = bld->coord_bld;
         abs_coord_bld.type.sign = FALSE;
         coord = lp_build_abs(coord_bld, coord);
@@ -416,16 +415,18 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
            coord = lp_build_mul(coord_bld, coord, length_f);
         }

-         /* clamp to [0.5, length - 0.5] */
-         min = half;
-         max = lp_build_sub(coord_bld, length_f, min);
-         coord = lp_build_clamp(coord_bld, coord, min, max);
-
+         /* clamp to length max */
+         coord = lp_build_min(coord_bld, coord, length_f);
+         /* subtract 0.5 */
         coord = lp_build_sub(coord_bld, coord, half);
+         /* clamp to [0, length - 0.5] */
+         coord = lp_build_max(coord_bld, coord, coord_bld->zero);

         /* convert to int, compute lerp weight */
         lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
         coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
+         /* coord1 = min(coord1, length-1) */
+         coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
      }
      break;

--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -100,7 +100,7 @@ struct blitter_context_priv
   void *velem_state;
   void *velem_uint_state;
   void *velem_sint_state;
-   void *velem_state_readbuf;
+   void *velem_state_readbuf[4]; /**< X, XY, XYZ, XYZW */

   /* Sampler state. */
   void *sampler_state, *sampler_state_linear;
@@ -277,9 +277,19 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
   }

   if (ctx->has_stream_out) {
-      velem[0].src_format = PIPE_FORMAT_R32_UINT;
-      velem[0].vertex_buffer_index = ctx->base.vb_slot;
-      ctx->velem_state_readbuf = pipe->create_vertex_elements_state(pipe, 1, &velem[0]);
+      static enum pipe_format formats[4] = {
+         PIPE_FORMAT_R32_UINT,
+         PIPE_FORMAT_R32G32_UINT,
+         PIPE_FORMAT_R32G32B32_UINT,
+         PIPE_FORMAT_R32G32B32A32_UINT
+      };
+
+      for (i = 0; i < 4; i++) {
+         velem[0].src_format = formats[i];
+         velem[0].vertex_buffer_index = ctx->base.vb_slot;
+         ctx->velem_state_readbuf[i] =
+               pipe->create_vertex_elements_state(pipe, 1, &velem[0]);
+      }
   }

   /* fragment shaders are created on-demand */
@@ -344,8 +354,11 @@ void util_blitter_destroy(struct blitter_context *blitter)
      pipe->delete_vertex_elements_state(pipe, ctx->velem_sint_state);
      pipe->delete_vertex_elements_state(pipe, ctx->velem_uint_state);
   }
-   if (ctx->velem_state_readbuf)
-      pipe->delete_vertex_elements_state(pipe, ctx->velem_state_readbuf);
+   for (i = 0; i < 4; i++) {
+      if (ctx->velem_state_readbuf[i]) {
+         pipe->delete_vertex_elements_state(pipe, ctx->velem_state_readbuf[i]);
+      }
+   }

   for (i = 0; i < PIPE_MAX_TEXTURE_TYPES; i++) {
      if (ctx->fs_texfetch_col[i])
@@ -1120,18 +1133,17 @@ static boolean is_blit_generic_supported(struct blitter_context *blitter,

   if (dst) {
      unsigned bind;
-      boolean is_stencil;
      const struct util_format_description *desc =
            util_format_description(dst_format);
-
-      is_stencil = util_format_has_stencil(desc);
+      boolean dst_has_stencil = util_format_has_stencil(desc);

      /* Stencil export must be supported for stencil copy. */
-      if ((mask & PIPE_MASK_S) && is_stencil && !ctx->has_stencil_export) {
+      if ((mask & PIPE_MASK_S) && dst_has_stencil &&
+          !ctx->has_stencil_export) {
         return FALSE;
      }

-      if (is_stencil || util_format_has_depth(desc))
+      if (dst_has_stencil || util_format_has_depth(desc))
         bind = PIPE_BIND_DEPTH_STENCIL;
      else
         bind = PIPE_BIND_RENDER_TARGET;
@@ -1153,15 +1165,18 @@ static boolean is_blit_generic_supported(struct blitter_context *blitter,
      }

      /* Check stencil sampler support for stencil copy. */
-      if (util_format_has_stencil(util_format_description(src_format))) {
-         enum pipe_format stencil_format =
+      if (mask & PIPE_MASK_S) {
+         if (util_format_has_stencil(util_format_description(src_format))) {
+            enum pipe_format stencil_format =
               util_format_stencil_only(src_format);
-         assert(stencil_format != PIPE_FORMAT_NONE);
+            assert(stencil_format != PIPE_FORMAT_NONE);

-         if (stencil_format != src_format &&
-             !screen->is_format_supported(screen, stencil_format, src->target,
-                                 src->nr_samples, PIPE_BIND_SAMPLER_VIEW)) {
-            return FALSE;
+            if (stencil_format != src_format &&
+                !screen->is_format_supported(screen, stencil_format,
+                                             src->target, src->nr_samples,
+                                             PIPE_BIND_SAMPLER_VIEW)) {
+               return FALSE;
+            }
         }
      }
   }
@@ -1714,7 +1729,7 @@ void util_blitter_copy_buffer(struct blitter_context *blitter,
   vb.stride = 4;

   pipe->set_vertex_buffers(pipe, ctx->base.vb_slot, 1, &vb);
-   pipe->bind_vertex_elements_state(pipe, ctx->velem_state_readbuf);
+   pipe->bind_vertex_elements_state(pipe, ctx->velem_state_readbuf[0]);
   pipe->bind_vs_state(pipe, ctx->vs_pos_only);
   if (ctx->has_geometry_shader)
      pipe->bind_gs_state(pipe, NULL);
@@ -1731,6 +1746,66 @@ void util_blitter_copy_buffer(struct blitter_context *blitter,
   pipe_so_target_reference(&so_target, NULL);
 }

+void util_blitter_clear_buffer(struct blitter_context *blitter,
+                               struct pipe_resource *dst,
+                               unsigned offset, unsigned size,
+                               unsigned num_channels,
+                               const union pipe_color_union *clear_value)
+{
+   struct blitter_context_priv *ctx = (struct blitter_context_priv*)blitter;
+   struct pipe_context *pipe = ctx->base.pipe;
+   struct pipe_vertex_buffer vb = {0};
+   struct pipe_stream_output_target *so_target;
+
+   assert(num_channels >= 1);
+   assert(num_channels <= 4);
+
+   /* IMPORTANT:  DON'T DO ANY BOUNDS CHECKING HERE!
+    *
+    * R600 uses this to initialize texture resources, so width0 might not be
+    * what you think it is.
+    */
+
+   /* Streamout is required. */
+   if (!ctx->has_stream_out) {
+      assert(!"Streamout unsupported in util_blitter_clear_buffer()");
+      return;
+   }
+
+   /* Some alignment is required. */
+   if (offset % 4 != 0 || size % 4 != 0) {
+      assert(!"Bad alignment in util_blitter_clear_buffer()");
+      return;
+   }
+
+   u_upload_data(ctx->upload, 0, num_channels*4, clear_value,
+                 &vb.buffer_offset, &vb.buffer);
+   vb.stride = 0;
+
+   blitter_set_running_flag(ctx);
+   blitter_check_saved_vertex_states(ctx);
+   blitter_disable_render_cond(ctx);
+
+   pipe->set_vertex_buffers(pipe, ctx->base.vb_slot, 1, &vb);
+   pipe->bind_vertex_elements_state(pipe,
+                                    ctx->velem_state_readbuf[num_channels-1]);
+   pipe->bind_vs_state(pipe, ctx->vs_pos_only);
+   if (ctx->has_geometry_shader)
+      pipe->bind_gs_state(pipe, NULL);
+   pipe->bind_rasterizer_state(pipe, ctx->rs_discard_state);
+
+   so_target = pipe->create_stream_output_target(pipe, dst, offset, size);
+   pipe->set_stream_output_targets(pipe, 1, &so_target, 0);
+
+   util_draw_arrays(pipe, PIPE_PRIM_POINTS, 0, size / 4);
+
+   blitter_restore_vertex_states(ctx);
+   blitter_restore_render_cond(ctx);
+   blitter_unset_running_flag(ctx);
+   pipe_so_target_reference(&so_target, NULL);
+   pipe_resource_reference(&vb.buffer, NULL);
+}
+
 /* probably radeon specific */
 void util_blitter_custom_resolve_color(struct blitter_context *blitter,
 				       struct pipe_resource *dst,
--- a/src/gallium/auxiliary/util/u_blitter.h
+++ b/src/gallium/auxiliary/util/u_blitter.h
@@ -276,7 +276,7 @@ void util_blitter_default_src_texture(struct pipe_sampler_view *src_templ,

 /**
 * Copy data from one buffer to another using the Stream Output functionality.
- * Some alignment is required, otherwise software fallback is used.
+ * 4-byte alignment is required, otherwise software fallback is used.
 */
 void util_blitter_copy_buffer(struct blitter_context *blitter,
                              struct pipe_resource *dst,
@@ -285,6 +285,22 @@ void util_blitter_copy_buffer(struct blitter_context *blitter,
                              unsigned srcx,
                              unsigned size);

+/**
+ * Clear the contents of a buffer using the Stream Output functionality.
+ * 4-byte alignment is required.
+ *
+ * "num_channels" can be 1, 2, 3, or 4, and specifies if the clear value is
+ * R, RG, RGB, or RGBA.
+ *
+ * For each element, only "num_channels" components of "clear_value" are
+ * copied to the buffer, then the offset is incremented by num_channels*4.
+ */
+void util_blitter_clear_buffer(struct blitter_context *blitter,
+                               struct pipe_resource *dst,
+                               unsigned offset, unsigned size,
+                               unsigned num_channels,
+                               const union pipe_color_union *clear_value);
+
 /**
 * Clear a region of a (color) surface to a constant value.
 *
--- a/src/gallium/auxiliary/util/u_vbuf.c
+++ b/src/gallium/auxiliary/util/u_vbuf.c
@@ -307,6 +307,9 @@ void u_vbuf_destroy(struct u_vbuf *mgr)
   unsigned num_vb = screen->get_shader_param(screen, PIPE_SHADER_VERTEX,
                                              PIPE_SHADER_CAP_MAX_INPUTS);

+   mgr->pipe->set_index_buffer(mgr->pipe, NULL);
+   pipe_resource_reference(&mgr->index_buffer.buffer, NULL);
+
   mgr->pipe->set_vertex_buffers(mgr->pipe, 0, num_vb, NULL);

   for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
--- a/src/gallium/drivers/nv50/nv50_context.c
+++ b/src/gallium/drivers/nv50/nv50_context.c
@@ -242,6 +242,7 @@ nv50_create(struct pipe_screen *pscreen, void *priv)
      screen->cur_ctx = nv50;
      nouveau_pushbuf_bufctx(screen->base.pushbuf, nv50->bufctx);
   }
+   nv50->base.pushbuf->kick_notify = nv50_default_kick_notify;

   nv50_init_query_functions(nv50);
   nv50_init_surface_functions(nv50);
--- a/src/gallium/drivers/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nvc0/nvc0_screen.c
@@ -445,6 +445,7 @@ nvc0_screen_create(struct nouveau_device *dev)
   chan = screen->base.channel;
   push = screen->base.pushbuf;
   push->user_priv = screen;
+   push->rsvd_kick = 5;

   screen->base.vidmem_bindings |= PIPE_BIND_CONSTANT_BUFFER |
      PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER;
--- a/src/gallium/drivers/nvc0/nvc0_video.c
+++ b/src/gallium/drivers/nvc0/nvc0_video.c
@@ -356,19 +356,19 @@ nvc0_create_decoder(struct pipe_context *context,
         goto fw_fail;
      }
      r = read(fd, dec->fw_bo->map, 0x4000);
+      close(fd);
+
      if (r < 0) {
         fprintf(stderr, "reading firmware file %s failed: %m\n", path);
         goto fw_fail;
      }

      if (r == 0x4000) {
-         close(fd);
         fprintf(stderr, "firmware file %s too large!\n", path);
         goto fw_fail;
      }

      if (r & 0xff) {
-         close(fd);
         fprintf(stderr, "firmware file %s wrong size!\n", path);
         goto fw_fail;
      }
@@ -558,8 +558,6 @@ nvc0_video_buffer_create(struct pipe_context *pipe,
   buffer = CALLOC_STRUCT(nvc0_video_buffer);
   if (!buffer)
      return NULL;
-   assert(!(templat->height % 4));
-   assert(!(templat->width % 2));

   buffer->base.buffer_format = templat->buffer_format;
   buffer->base.context = pipe;
@@ -578,7 +576,7 @@ nvc0_video_buffer_create(struct pipe_context *pipe,
   templ.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
   templ.format = PIPE_FORMAT_R8_UNORM;
   templ.width0 = buffer->base.width;
-   templ.height0 = buffer->base.height/2;
+   templ.height0 = (buffer->base.height + 1)/2;
   templ.flags = NVC0_RESOURCE_FLAG_VIDEO;
   templ.last_level = 0;
   templ.array_size = 1;
@@ -589,8 +587,8 @@ nvc0_video_buffer_create(struct pipe_context *pipe,

   templ.format = PIPE_FORMAT_R8G8_UNORM;
   buffer->num_planes = 2;
-   templ.width0 /= 2;
-   templ.height0 /= 2;
+   templ.width0 = (templ.width0 + 1) / 2;
+   templ.height0 = (templ.height0 + 1) / 2;
   for (i = 1; i < buffer->num_planes; ++i) {
      buffer->resources[i] = pipe->screen->resource_create(pipe->screen, &templ);
      if (!buffer->resources[i])
--- a/src/gallium/drivers/nvc0/nvc0_video_ppp.c
+++ b/src/gallium/drivers/nvc0/nvc0_video_ppp.c
@@ -62,8 +62,6 @@ nvc0_decoder_setup_ppp(struct nvc0_decoder *dec, struct nvc0_video_buffer *targe
   PUSH_DATA (push, in_addr + y2); // 70c
   PUSH_DATA (push, in_addr + cbcr); // 710
   PUSH_DATA (push, in_addr + cbcr2); // 714
-   assert(target->resources[0]->width0 >= 16 * dec_w);
-   assert(target->resources[0]->height0 >= dec->base.height/2);

   for (i = 0; i < 2; ++i) {
      struct nv50_miptree *mt = (struct nv50_miptree *)target->resources[i];
--- a/src/gallium/drivers/nvc0/nvc0_video_vp.c
+++ b/src/gallium/drivers/nvc0/nvc0_video_vp.c
@@ -185,8 +185,6 @@ nvc0_decoder_handle_references(struct nvc0_decoder *dec, struct nvc0_video_buffe
              (!dec->refs[idx].decoded_bottom || !dec->refs[idx].decoded_top)));
      if (target == refs[i])
         empty_spot = 0;
-      assert(!h264 ||
-             dec->refs[idx].last_used == seq - 1);

      if (dec->refs[idx].vidbuf != refs[i]) {
         debug_printf("%p is not a real ref\n", refs[i]);
@@ -338,7 +336,6 @@ nvc0_decoder_fill_picparm_h264_vp(struct nvc0_decoder *dec,
   unsigned ring, i, j = 0;
   assert(offsetof(struct h264_picparm_vp, u224) == 0x224);
   *is_ref = d->is_reference;
-   assert(!d->frame_num || dec->last_frame_num + 1 == d->frame_num || dec->last_frame_num == d->frame_num);
   dec->last_frame_num = d->frame_num;

   h->width = mb(dec->base.width);
--- a/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c
+++ b/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c
@@ -383,6 +383,14 @@ static enum rc_reg_class variable_get_class(
 							RC_INSTRUCTION_PAIR ) {
 						old_swizzle = r.U.P.Arg->Swizzle;
 					} else {
+						/* Source operands of TEX
+						 * instructions can't be
+						 * swizzle on r300/r400 GPUs.
+						 */
+						if (!variable->C->is_r500) {
+							can_change_writemask = 0;
+							break;
+						}
 						old_swizzle = r.U.I.Src->Swizzle;
 					}
 					new_swizzle = rc_adjust_channels(
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -523,6 +523,37 @@ void r600_copy_buffer(struct pipe_context *ctx, struct pipe_resource *dst, unsig
 	}
 }

+static void r600_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
+			      unsigned offset, unsigned size, unsigned char value)
+{
+	struct r600_context *rctx = (struct r600_context*)ctx;
+
+	if (rctx->screen->has_streamout && offset % 4 == 0 && size % 4 == 0) {
+		union pipe_color_union clear_value;
+		uint32_t v = value;
+
+		clear_value.ui[0] = v | (v << 8) | (v << 16) | (v << 24);
+
+		r600_blitter_begin(ctx, R600_DISABLE_RENDER_COND);
+		util_blitter_clear_buffer(rctx->blitter, dst, offset, size,
+					  1, &clear_value);
+		r600_blitter_end(ctx);
+	} else {
+		char *map = r600_buffer_mmap_sync_with_rings(rctx, r600_resource(dst),
+							     PIPE_TRANSFER_WRITE);
+		memset(map + offset, value, size);
+	}
+}
+
+void r600_screen_clear_buffer(struct r600_screen *rscreen, struct pipe_resource *dst,
+			      unsigned offset, unsigned size, unsigned char value)
+{
+	pipe_mutex_lock(rscreen->aux_context_lock);
+	r600_clear_buffer(rscreen->aux_context, dst, offset, size, value);
+	rscreen->aux_context->flush(rscreen->aux_context, NULL, 0);
+	pipe_mutex_unlock(rscreen->aux_context_lock);
+}
+
 static bool util_format_is_subsampled_2x1_32bpp(enum pipe_format format)
 {
 	const struct util_format_description *desc = util_format_description(format);
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -834,6 +834,9 @@ static void r600_destroy_screen(struct pipe_screen* pscreen)
 	if (rscreen == NULL)
 		return;

+	pipe_mutex_destroy(rscreen->aux_context_lock);
+	rscreen->aux_context->destroy(rscreen->aux_context);
+
 	if (rscreen->global_pool) {
 		compute_memory_pool_delete(rscreen->global_pool);
 	}
@@ -1176,5 +1179,41 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws)
 	}
 #endif

+	/* Create the auxiliary context. */
+	pipe_mutex_init(rscreen->aux_context_lock);
+	rscreen->aux_context = rscreen->screen.context_create(&rscreen->screen, NULL);
+
+#if 0 /* This is for testing whether aux_context and buffer clearing work correctly. */
+	struct pipe_resource templ = {};
+
+	templ.width0 = 4;
+	templ.height0 = 2048;
+	templ.depth0 = 1;
+	templ.array_size = 1;
+	templ.target = PIPE_TEXTURE_2D;
+	templ.format = PIPE_FORMAT_R8G8B8A8_UNORM;
+	templ.usage = PIPE_USAGE_STATIC;
+
+	struct r600_resource *res = r600_resource(rscreen->screen.resource_create(&rscreen->screen, &templ));
+	unsigned char *map = ws->buffer_map(res->cs_buf, NULL, PIPE_TRANSFER_WRITE);
+
+	memset(map, 0, 256);
+
+	r600_screen_clear_buffer(rscreen, &res->b.b, 4, 4, 0xCC);
+	r600_screen_clear_buffer(rscreen, &res->b.b, 8, 4, 0xDD);
+	r600_screen_clear_buffer(rscreen, &res->b.b, 12, 4, 0xEE);
+	r600_screen_clear_buffer(rscreen, &res->b.b, 20, 4, 0xFF);
+	r600_screen_clear_buffer(rscreen, &res->b.b, 32, 20, 0x87);
+
+	ws->buffer_wait(res->buf, RADEON_USAGE_WRITE);
+
+	int i;
+	for (i = 0; i < 256; i++) {
+		printf("%02X", map[i]);
+		if (i % 16 == 15)
+			printf("\n");
+	}
+#endif
+
 	return &rscreen->screen;
 }
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -252,6 +252,11 @@ struct r600_screen {
 	unsigned			cs_count;
 #endif
 	r600g_dma_blit_t		dma_blit;
+
+	/* Auxiliary context. Mainly used to initialize resources.
+	 * It must be locked prior to using and flushed before unlocking. */
+	struct pipe_context		*aux_context;
+	pipe_mutex			aux_context_lock;
 };

 struct r600_pipe_sampler_view {
@@ -630,6 +635,8 @@ void evergreen_update_db_shader_control(struct r600_context * rctx);
 /* r600_blit.c */
 void r600_copy_buffer(struct pipe_context *ctx, struct pipe_resource *dst, unsigned dstx,
 		      struct pipe_resource *src, const struct pipe_box *src_box);
+void r600_screen_clear_buffer(struct r600_screen *rscreen, struct pipe_resource *dst,
+			      unsigned offset, unsigned size, unsigned char value);
 void r600_init_blit_functions(struct r600_context *rctx);
 void r600_blit_decompress_depth(struct pipe_context *ctx,
 		struct r600_texture *texture,
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -5760,7 +5760,7 @@ static int tgsi_umad(struct r600_shader_ctx *ctx)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
 	struct r600_bytecode_alu alu;
-	int i, j, r;
+	int i, j, k, r;
 	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);

 	/* src0 * src1 */
@@ -5768,21 +5768,40 @@ static int tgsi_umad(struct r600_shader_ctx *ctx)
 		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
 			continue;

-		memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+                if (ctx->bc->chip_class == CAYMAN) {
+                        for (j = 0; j < 4; j++) {
+                                memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+                                alu.dst.chan = j;
+                                alu.dst.sel = ctx->temp_reg;
+                                alu.dst.write = (j == i);

-		alu.dst.chan = i;
-		alu.dst.sel = ctx->temp_reg;
-		alu.dst.write = 1;
+                                if (j == 3)
+                                        alu.last = 1;
+                                alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
+                                for (k = 0; k < inst->Instruction.NumSrcRegs; k++) {
+                                        r600_bytecode_src(&alu.src[k], &ctx->src[k], i);
+                                }
+                                r = r600_bytecode_add_alu(ctx->bc, &alu);
+                                if (r)
+                                        return r;
+                        }
+                } else {
+                        memset(&alu, 0, sizeof(struct r600_bytecode_alu));

-		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
-		for (j = 0; j < 2; j++) {
-		        r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
-		}
+                        alu.dst.chan = i;
+                        alu.dst.sel = ctx->temp_reg;
+                        alu.dst.write = 1;

-		alu.last = 1;
-		r = r600_bytecode_add_alu(ctx->bc, &alu);
-		if (r)
-			return r;
+                        alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT);
+                        for (j = 0; j < 2; j++) {
+                                r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
+                        }
+
+                        alu.last = 1;
+                        r = r600_bytecode_add_alu(ctx->bc, &alu);
+                        if (r)
+                                return r;
+                }
 	}


--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -720,6 +720,7 @@ static int r600_shader_select(struct pipe_context *ctx,
 	struct r600_pipe_shader * shader = NULL;
 	int r;

+	memset(&key, 0, sizeof(key));
 	key = r600_shader_selector_key(ctx, sel);

 	/* Check if we don't need to change anything.
@@ -766,7 +767,7 @@ static int r600_shader_select(struct pipe_context *ctx,
 			key = r600_shader_selector_key(ctx, sel);
 		}

-		shader->key = key;
+		memcpy(&shader->key, &key, sizeof(key));
 		sel->num_shaders++;
 	}

--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -480,10 +480,7 @@ r600_texture_create_object(struct pipe_screen *screen,
 			 */
 			R600_ERR("r600: failed to create bo for htile buffers\n");
 		} else {
-			void *ptr;
-			ptr = rscreen->ws->buffer_map(rtex->htile->cs_buf, NULL, PIPE_TRANSFER_WRITE);
-			memset(ptr, 0x0, htile_size);
-			rscreen->ws->buffer_unmap(rtex->htile->cs_buf);
+			r600_screen_clear_buffer(rscreen, &rtex->htile->b.b, 0, htile_size, 0);
 		}
 	}

@@ -505,9 +502,8 @@ r600_texture_create_object(struct pipe_screen *screen,

 	if (rtex->cmask_size) {
 		/* Initialize the cmask to 0xCC (= compressed state). */
-		char *ptr = rscreen->ws->buffer_map(resource->cs_buf, NULL, PIPE_TRANSFER_WRITE);
-		memset(ptr + rtex->cmask_offset, 0xCC, rtex->cmask_size);
-		rscreen->ws->buffer_unmap(resource->cs_buf);
+		r600_screen_clear_buffer(rscreen, &rtex->resource.b.b,
+					 rtex->cmask_offset, rtex->cmask_size, 0xCC);
 	}

 	if (debug_get_option_print_texdepth() && rtex->is_depth && rtex->non_disp_tiling) {
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -1126,7 +1126,9 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
 	/* XXX: We need to revisit this.I think the correct way to do this is
 	 * to use length = 4 here and use the elem_bld for everything. */
 	type.floating = TRUE;
+	type.fixed = FALSE;
 	type.sign = TRUE;
+	type.norm = FALSE;
 	type.width = 32;
 	type.length = 1;

--- a/src/gallium/drivers/radeonsi/r600_blit.c
+++ b/src/gallium/drivers/radeonsi/r600_blit.c
@@ -417,12 +417,30 @@ static void r600_resource_copy_region(struct pipe_context *ctx,
 			r600_change_format(dst, dst_level, &orig_info[1],
 					   PIPE_FORMAT_R8_UNORM);
 			break;
+		case 2:
+			r600_change_format(src, src_level, &orig_info[0],
+					   PIPE_FORMAT_R8G8_UNORM);
+			r600_change_format(dst, dst_level, &orig_info[1],
+					   PIPE_FORMAT_R8G8_UNORM);
+			break;
 		case 4:
 			r600_change_format(src, src_level, &orig_info[0],
 					   PIPE_FORMAT_R8G8B8A8_UNORM);
 			r600_change_format(dst, dst_level, &orig_info[1],
 					   PIPE_FORMAT_R8G8B8A8_UNORM);
 			break;
+		case 8:
+			r600_change_format(src, src_level, &orig_info[0],
+					   PIPE_FORMAT_R16G16B16A16_UINT);
+			r600_change_format(dst, dst_level, &orig_info[1],
+					   PIPE_FORMAT_R16G16B16A16_UINT);
+			break;
+		case 16:
+			r600_change_format(src, src_level, &orig_info[0],
+					   PIPE_FORMAT_R32G32B32A32_UINT);
+			r600_change_format(dst, dst_level, &orig_info[1],
+					   PIPE_FORMAT_R32G32B32A32_UINT);
+			break;
 		default:
 			fprintf(stderr, "Unhandled format %s with blocksize %u\n",
 				util_format_short_name(src->format), blocksize);
--- a/src/gallium/drivers/radeonsi/radeonsi_pipe.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
@@ -281,6 +281,7 @@ static const char *r600_get_family_name(enum radeon_family family)
 	case CHIP_PITCAIRN: return "AMD PITCAIRN";
 	case CHIP_VERDE: return "AMD CAPE VERDE";
 	case CHIP_OLAND: return "AMD OLAND";
+	case CHIP_HAINAN: return "AMD HAINAN";
 	default: return "AMD unknown";
 	}
 }
--- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h
+++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
@@ -126,7 +126,6 @@ struct r600_context {
 	unsigned			pa_sc_line_stipple;
 	unsigned			pa_su_sc_mode_cntl;
 	unsigned			pa_cl_clip_cntl;
-	unsigned			pa_cl_vs_out_cntl;
 	/* for saving when using blitter */
 	struct pipe_stencil_ref		stencil_ref;
 	struct si_pipe_shader_selector	*ps_shader;
--- a/src/gallium/drivers/radeonsi/radeonsi_shader.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c
@@ -35,6 +35,7 @@
 #include "gallivm/lp_bld_tgsi.h"
 #include "radeon_llvm.h"
 #include "radeon_llvm_emit.h"
+#include "util/u_memory.h"
 #include "tgsi/tgsi_info.h"
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_scan.h"
@@ -626,6 +627,7 @@ static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
 		switch (d->Declaration.File) {
 		case TGSI_FILE_INPUT:
 			i = shader->ninput++;
+			assert(i < Elements(shader->input));
 			shader->input[i].name = d->Semantic.Name;
 			shader->input[i].sid = d->Semantic.Index;
 			shader->input[i].interpolate = d->Interp.Interpolate;
@@ -634,6 +636,7 @@ static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)

 		case TGSI_FILE_OUTPUT:
 			i = shader->noutput++;
+			assert(i < Elements(shader->output));
 			shader->output[i].name = d->Semantic.Name;
 			shader->output[i].sid = d->Semantic.Index;
 			shader->output[i].interpolate = d->Interp.Interpolate;
@@ -647,7 +650,9 @@ static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
 			/* Select the correct target */
 			switch(d->Semantic.Name) {
 			case TGSI_SEMANTIC_PSIZE:
-				target = V_008DFC_SQ_EXP_POS;
+				shader->vs_out_misc_write = 1;
+				shader->vs_out_point_size = 1;
+				target = V_008DFC_SQ_EXP_POS + 1;
 				break;
 			case TGSI_SEMANTIC_POSITION:
 				if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
--- a/src/gallium/drivers/radeonsi/radeonsi_shader.h
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.h
@@ -66,14 +66,16 @@ struct si_pipe_shader_selector {

 struct si_shader {
 	unsigned		ninput;
-	struct si_shader_io	input[32];
+	struct si_shader_io	input[40];

 	unsigned		noutput;
-	struct si_shader_io	output[32];
+	struct si_shader_io	output[40];

 	unsigned		ninterp;
 	bool			uses_kill;
 	bool			fs_write_all;
+	bool			vs_out_misc_write;
+	bool			vs_out_point_size;
 	unsigned		nr_cbufs;
 };

--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -383,6 +383,7 @@ static void *si_create_rs_state(struct pipe_context *ctx,
 	}

 	rs->two_side = state->light_twoside;
+	rs->clip_plane_enable = state->clip_plane_enable;

 	polygon_dual_mode = (state->fill_front != PIPE_POLYGON_MODE_FILL ||
 				state->fill_back != PIPE_POLYGON_MODE_FILL);
@@ -411,9 +412,6 @@ static void *si_create_rs_state(struct pipe_context *ctx,
 		S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) |
 		S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) |
 		S_028810_DX_LINEAR_ATTR_CLIP_ENA(1);
-	rs->pa_cl_vs_out_cntl =
-		S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) |
-		S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex);

 	clip_rule = state->scissor ? 0xAAAA : 0xFFFF;

@@ -484,7 +482,6 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state)
 	rctx->pa_sc_line_stipple = rs->pa_sc_line_stipple;
 	rctx->pa_su_sc_mode_cntl = rs->pa_su_sc_mode_cntl;
 	rctx->pa_cl_clip_cntl = rs->pa_cl_clip_cntl;
-	rctx->pa_cl_vs_out_cntl = rs->pa_cl_vs_out_cntl;

 	si_pm4_bind_state(rctx, rasterizer, rs);
 	si_update_fb_rs_state(rctx);
@@ -2738,6 +2735,9 @@ void si_init_config(struct r600_context *rctx)
 	case CHIP_OLAND:
 		si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000082);
 		break;
+	case CHIP_HAINAN:
+		si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000);
+		break;
 	default:
 		si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000);
 		break;
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -49,6 +49,7 @@ struct si_state_rasterizer {
 	unsigned		pa_su_sc_mode_cntl;
 	unsigned		pa_cl_clip_cntl;
 	unsigned		pa_cl_vs_out_cntl;
+	unsigned		clip_plane_enable;
 	float			offset_units;
 	float			offset_scale;
 };
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -55,8 +55,13 @@ static void si_pipe_shader_vs(struct pipe_context *ctx, struct si_pipe_shader *s
 	 * takes care of adding a dummy export.
 	 */
 	for (nparams = 0, i = 0 ; i < shader->shader.noutput; i++) {
-		if (shader->shader.output[i].name != TGSI_SEMANTIC_POSITION)
+		switch (shader->shader.output[i].name) {
+		case TGSI_SEMANTIC_POSITION:
+		case TGSI_SEMANTIC_PSIZE:
+			break;
+		default:
 			nparams++;
+		}
 	}
 	if (nparams < 1)
 		nparams = 1;
@@ -66,7 +71,9 @@ static void si_pipe_shader_vs(struct pipe_context *ctx, struct si_pipe_shader *s

 	si_pm4_set_reg(pm4, R_02870C_SPI_SHADER_POS_FORMAT,
 		       S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) |
-		       S_02870C_POS1_EXPORT_FORMAT(V_02870C_SPI_SHADER_NONE) |
+		       S_02870C_POS1_EXPORT_FORMAT(shader->shader.vs_out_misc_write ?
+						   V_02870C_SPI_SHADER_4COMP :
+						   V_02870C_SPI_SHADER_NONE) |
 		       S_02870C_POS2_EXPORT_FORMAT(V_02870C_SPI_SHADER_NONE) |
 		       S_02870C_POS3_EXPORT_FORMAT(V_02870C_SPI_SHADER_NONE));

@@ -258,6 +265,7 @@ static bool si_update_draw_info_state(struct r600_context *rctx,
 			       const struct pipe_draw_info *info)
 {
 	struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
+	struct si_shader *vs = &rctx->vs_shader->current->shader;
 	unsigned prim = si_conv_pipe_prim(info->mode);
 	unsigned ls_mask = 0;

@@ -296,13 +304,14 @@ static bool si_update_draw_info_state(struct r600_context *rctx,
 		si_pm4_set_reg(pm4, R_028814_PA_SU_SC_MODE_CNTL, rctx->pa_su_sc_mode_cntl);
        }
 	si_pm4_set_reg(pm4, R_02881C_PA_CL_VS_OUT_CNTL,
-		       prim == PIPE_PRIM_POINTS ? rctx->pa_cl_vs_out_cntl : 0
+		       S_02881C_USE_VTX_POINT_SIZE(vs->vs_out_point_size) |
+		       S_02881C_VS_OUT_MISC_VEC_ENA(vs->vs_out_misc_write)
 		       /*| (rctx->rasterizer->clip_plane_enable &
 		       rctx->vs_shader->shader.clip_dist_write)*/);
 	si_pm4_set_reg(pm4, R_028810_PA_CL_CLIP_CNTL, rctx->pa_cl_clip_cntl
-			/*| (rctx->vs_shader->shader.clip_dist_write ||
+			| (/*rctx->vs_shader->shader.clip_dist_write ||
 			rctx->vs_shader->shader.vs_prohibit_ucps ?
-			0 : rctx->rasterizer->clip_plane_enable & 0x3F)*/);
+			0 :*/ rctx->queued.named.rasterizer->clip_plane_enable & 0x3F));

 	si_pm4_set_state(rctx, draw_info, pm4);
 	return true;
--- a/src/gallium/include/state_tracker/st_api.h
+++ b/src/gallium/include/state_tracker/st_api.h
@@ -240,6 +240,7 @@ struct st_visual
 struct st_config_options
 {
 	boolean force_glsl_extensions_warn;
+	boolean disable_glsl_line_continuations;
 };

 /**
--- a/src/gallium/state_trackers/dri/common/dri_context.c
+++ b/src/gallium/state_trackers/dri/common/dri_context.c
@@ -54,6 +54,8 @@ static void dri_fill_st_options(struct st_config_options *options,
 {
   options->force_glsl_extensions_warn =
      driQueryOptionb(optionCache, "force_glsl_extensions_warn");
+   options->disable_glsl_line_continuations =
+      driQueryOptionb(optionCache, "disable_glsl_line_continuations");
 }

 GLboolean
--- a/src/gallium/state_trackers/dri/common/dri_screen.c
+++ b/src/gallium/state_trackers/dri/common/dri_screen.c
@@ -65,13 +65,14 @@ PUBLIC const char __driConfigOptions[] =

      DRI_CONF_SECTION_DEBUG
         DRI_CONF_FORCE_GLSL_EXTENSIONS_WARN(false)
+         DRI_CONF_DISABLE_GLSL_LINE_CONTINUATIONS(false)
      DRI_CONF_SECTION_END

   DRI_CONF_END;

 #define false 0

-static const uint __driNConfigOptions = 10;
+static const uint __driNConfigOptions = 11;

 static const __DRIconfig **
 dri_fill_in_modes(struct dri_screen *screen)
--- a/src/gallium/state_trackers/glx/xlib/glx_api.c
+++ b/src/gallium/state_trackers/glx/xlib/glx_api.c
@@ -1352,25 +1352,25 @@ glXQueryExtension( Display *dpy, int *errorBase, int *eventBase )
 PUBLIC void
 glXDestroyContext( Display *dpy, GLXContext ctx )
 {
-   GLXContext glxCtx = ctx;
-   (void) dpy;
-   MakeCurrent_PrevContext = 0;
-   MakeCurrent_PrevDrawable = 0;
-   MakeCurrent_PrevReadable = 0;
-   MakeCurrent_PrevDrawBuffer = 0;
-   MakeCurrent_PrevReadBuffer = 0;
-   XMesaDestroyContext( glxCtx->xmesaContext );
-   XMesaGarbageCollect();
-   free(glxCtx);
+   if (ctx) {
+      GLXContext glxCtx = ctx;
+      (void) dpy;
+      MakeCurrent_PrevContext = 0;
+      MakeCurrent_PrevDrawable = 0;
+      MakeCurrent_PrevReadable = 0;
+      MakeCurrent_PrevDrawBuffer = 0;
+      MakeCurrent_PrevReadBuffer = 0;
+      XMesaDestroyContext( glxCtx->xmesaContext );
+      XMesaGarbageCollect();
+      free(glxCtx);
+   }
 }


 PUBLIC Bool
 glXIsDirect( Display *dpy, GLXContext ctx )
 {
-   GLXContext glxCtx = ctx;
-   (void) ctx;
-   return glxCtx->isDirect;
+   return ctx ? ctx->isDirect : False;
 }


--- a/src/gallium/state_trackers/glx/xlib/xm_api.c
+++ b/src/gallium/state_trackers/glx/xlib/xm_api.c
@@ -1238,9 +1238,13 @@ void XMesaSwapBuffers( XMesaBuffer b )
 */
 void XMesaCopySubBuffer( XMesaBuffer b, int x, int y, int width, int height )
 {
+   XMesaContext xmctx = XMesaGetCurrentContext();
+
+   xmctx->st->flush( xmctx->st, ST_FLUSH_FRONT, NULL);
+
   xmesa_copy_st_framebuffer(b->stfb,
         ST_ATTACHMENT_BACK_LEFT, ST_ATTACHMENT_FRONT_LEFT,
-         x, y, width, height);
+         x, b->height - y - height, width, height);
 }


--- a/src/gallium/targets/dri-i915/Makefile.am
+++ b/src/gallium/targets/dri-i915/Makefile.am
@@ -62,17 +62,11 @@ i915_dri_la_LIBADD = \
 	$(GALLIUM_DRI_LIB_DEPS) \
 	$(INTEL_LIBS)

-if HAVE_MESA_LLVM
-i915_dri_la_LINK = $(CXXLINK) $(i915_dri_la_LDFLAGS)
-# Mention a dummy pure C++ file to trigger generation of the $(LINK) variable
-nodist_EXTRA_i915_dri_la_SOURCES = dummy-cpp.cpp
+nodist_EXTRA_i915_dri_la_SOURCES = dummy.cpp

+if HAVE_MESA_LLVM
 AM_CPPFLAGS += -DGALLIUM_LLVMPIPE
 i915_dri_la_LIBADD += $(top_builddir)/src/gallium/drivers/llvmpipe/libllvmpipe.la $(LLVM_LIBS)
-else
-i915_dri_la_LINK = $(LINK) $(i915_dri_la_LDFLAGS)
-# Mention a dummy pure C file to trigger generation of the $(LINK) variable
-nodist_EXTRA_i915_dri_la_SOURCES = dummy-c.c
 endif

 # Provide compatibility with scripts for the old Mesa build system for
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -313,6 +313,7 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
    case CHIP_PITCAIRN:
    case CHIP_VERDE:
    case CHIP_OLAND:
+    case CHIP_HAINAN:
        ws->info.chip_class = TAHITI;
        break;
    }
@@ -381,6 +382,8 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
                                      &ws->info.r600_ib_vm_max_size))
                ws->info.r600_virtual_address = FALSE;
        }
+	if (ws->gen == DRV_R600 && !debug_get_bool_option("RADEON_VA", TRUE))
+		ws->info.r600_virtual_address = FALSE;
    }

    /* Get max pipes, this is only needed for compute shaders.  All evergreen+
--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
@@ -124,6 +124,7 @@ enum radeon_family {
    CHIP_PITCAIRN,
    CHIP_VERDE,
    CHIP_OLAND,
+    CHIP_HAINAN,
    CHIP_LAST,
 };

--- a/src/gallium/winsys/sw/xlib/xlib_sw_winsys.c
+++ b/src/gallium/winsys/sw/xlib/xlib_sw_winsys.c
@@ -165,6 +165,7 @@ alloc_shm_ximage(struct xlib_displaytarget *xlib_dt,
                                      &xlib_dt->shminfo,
                                      width, height);
   if (xlib_dt->tempImage == NULL) {
+      shmctl(xlib_dt->shminfo.shmid, IPC_RMID, 0);
      xlib_dt->shm = False;
      return;
   }
@@ -176,6 +177,11 @@ alloc_shm_ximage(struct xlib_displaytarget *xlib_dt,
   XShmAttach(xlib_dt->display, &xlib_dt->shminfo);
   XSync(xlib_dt->display, False);

+   /* Mark the segment to be destroyed, so that it is automatically destroyed
+    * when this process dies.  Needs to be after XShmAttach() for *BSD.
+    */
+   shmctl(xlib_dt->shminfo.shmid, IPC_RMID, 0);
+
   if (XErrorFlag) {
      /* we are on a remote display, this error is normal, don't print it */
      XFlush(xlib_dt->display);
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -2821,30 +2821,46 @@ ast_declarator_list::hir(exec_list *instructions,
 	 }
      }

-      /* Integer vertex outputs must be qualified with 'flat'.
+      /* Integer fragment inputs must be qualified with 'flat'.  In GLSL ES,
+       * so must integer vertex outputs.
       *
-       * From section 4.3.6 of the GLSL 1.30 spec:
-       *    "If a vertex output is a signed or unsigned integer or integer
-       *    vector, then it must be qualified with the interpolation qualifier
+       * From section 4.3.4 ("Inputs") of the GLSL 1.50 spec:
+       *    "Fragment shader inputs that are signed or unsigned integers or
+       *    integer vectors must be qualified with the interpolation qualifier
       *    flat."
       *
-       * From section 4.3.4 of the GLSL 3.00 ES spec:
+       * From section 4.3.4 ("Input Variables") of the GLSL 3.00 ES spec:
       *    "Fragment shader inputs that are, or contain, signed or unsigned
       *    integers or integer vectors must be qualified with the
       *    interpolation qualifier flat."
       *
-       * Since vertex outputs and fragment inputs must have matching
-       * qualifiers, these two requirements are equivalent.
+       * From section 4.3.6 ("Output Variables") of the GLSL 3.00 ES spec:
+       *    "Vertex shader outputs that are, or contain, signed or unsigned
+       *    integers or integer vectors must be qualified with the
+       *    interpolation qualifier flat."
+       *
+       * Note that prior to GLSL 1.50, this requirement applied to vertex
+       * outputs rather than fragment inputs.  That creates problems in the
+       * presence of geometry shaders, so we adopt the GLSL 1.50 rule for all
+       * desktop GL shaders.  For GLSL ES shaders, we follow the spec and
+       * apply the restriction to both vertex outputs and fragment inputs.
+       *
+       * Note also that the desktop GLSL specs are missing the text "or
+       * contain"; this is presumably an oversight, since there is no
+       * reasonable way to interpolate a fragment shader input that contains
+       * an integer.
       */
-      if (state->is_version(130, 300)
-          && state->target == vertex_shader
-          && state->current_function == NULL
-          && var->type->contains_integer()
-          && var->mode == ir_var_shader_out
-          && var->interpolation != INTERP_QUALIFIER_FLAT) {
-
-         _mesa_glsl_error(&loc, state, "If a vertex output is (or contains) "
-                          "an integer, then it must be qualified with 'flat'");
+      if (state->is_version(130, 300) &&
+          var->type->contains_integer() &&
+          var->interpolation != INTERP_QUALIFIER_FLAT &&
+          ((state->target == fragment_shader && var->mode == ir_var_shader_in)
+           || (state->target == vertex_shader && var->mode == ir_var_shader_out
+               && state->es_shader))) {
+         const char *var_type = (state->target == vertex_shader) ?
+            "vertex output" : "fragment input";
+         _mesa_glsl_error(&loc, state, "If a %s is (or contains) "
+                          "an integer, then it must be qualified with 'flat'",
+                          var_type);
      }


@@ -3349,10 +3365,17 @@ ast_function::hir(exec_list *instructions,
 			     "match prototype", name);
 	 }

-	 if (is_definition && sig->is_defined) {
-	    YYLTYPE loc = this->get_location();
-
-	    _mesa_glsl_error(& loc, state, "function `%s' redefined", name);
+         if (sig->is_defined) {
+            if (is_definition) {
+               YYLTYPE loc = this->get_location();
+               _mesa_glsl_error(& loc, state, "function `%s' redefined", name);
+            } else {
+               /* We just encountered a prototype that exactly matches a
+                * function that's already been defined.  This is redundant,
+                * and we should ignore it.
+                */
+               return NULL;
+            }
 	 }
      }
   } else {
--- a/src/glsl/builtin_variables.cpp
+++ b/src/glsl/builtin_variables.cpp
@@ -535,7 +535,7 @@ generate_common_ES_uniforms(exec_list *instructions,
   add_builtin_constant(instructions, symtab, "gl_MaxTextureImageUnits",
 			state->Const.MaxTextureImageUnits);
   add_builtin_constant(instructions, symtab, "gl_MaxFragmentUniformVectors",
-			state->Const.MaxFragmentUniformComponents);
+			state->Const.MaxFragmentUniformComponents / 4);

   add_uniform(instructions, symtab, "gl_DepthRange",
 	       state->symbols->get_type("gl_DepthRangeParameters"));
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -265,10 +265,16 @@ version_statement:
 	| VERSION_TOK INTCONSTANT EOL
 	{
           state->process_version_directive(&@2, $2, NULL);
+	   if (state->error) {
+	      YYERROR;
+	   }
 	}
        | VERSION_TOK INTCONSTANT any_identifier EOL
        {
           state->process_version_directive(&@2, $2, $3);
+	   if (state->error) {
+	      YYERROR;
+	   }
        }
 	;

--- a/src/glsl/ir_constant_expression.cpp
+++ b/src/glsl/ir_constant_expression.cpp
@@ -1385,7 +1385,7 @@ ir_dereference_array::constant_referenced(struct hash_table *variable_context,
      return;
   }

-   const glsl_type *vt = substore->type;
+   const glsl_type *vt = array->type;
   if (vt->is_array()) {
      store = substore->get_array_element(index);
      offset = 0;
--- a/src/glsl/link_varyings.cpp
+++ b/src/glsl/link_varyings.cpp
@@ -541,7 +541,7 @@ store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog,
 class varying_matches
 {
 public:
-   varying_matches(bool disable_varying_packing);
+   varying_matches(bool disable_varying_packing, bool consumer_is_fs);
   ~varying_matches();
   void record(ir_variable *producer_var, ir_variable *consumer_var);
   unsigned assign_locations();
@@ -621,11 +621,15 @@ private:
    * it was allocated.
    */
   unsigned matches_capacity;
+
+   const bool consumer_is_fs;
 };


-varying_matches::varying_matches(bool disable_varying_packing)
-   : disable_varying_packing(disable_varying_packing)
+varying_matches::varying_matches(bool disable_varying_packing,
+                                 bool consumer_is_fs)
+   : disable_varying_packing(disable_varying_packing),
+     consumer_is_fs(consumer_is_fs)
 {
   /* Note: this initial capacity is rather arbitrarily chosen to be large
    * enough for many cases without wasting an unreasonable amount of space.
@@ -656,6 +660,10 @@ varying_matches::~varying_matches()
 * If \c producer_var has already been paired up with a consumer_var, or
 * producer_var is part of fixed pipeline functionality (and hence already has
 * a location assigned), this function has no effect.
+ *
+ * Note: as a side effect this function may change the interpolation type of
+ * \c producer_var, but only when the change couldn't possibly affect
+ * rendering.
 */
 void
 varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
@@ -668,6 +676,25 @@ varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
      return;
   }

+   if ((consumer_var == NULL && producer_var->type->contains_integer()) ||
+       !consumer_is_fs) {
+      /* Since this varying is not being consumed by the fragment shader, its
+       * interpolation type varying cannot possibly affect rendering.  Also,
+       * this variable is non-flat and is (or contains) an integer.
+       *
+       * lower_packed_varyings requires all integer varyings to flat,
+       * regardless of where they appear.  We can trivially satisfy that
+       * requirement by changing the interpolation type to flat here.
+       */
+      producer_var->centroid = false;
+      producer_var->interpolation = INTERP_QUALIFIER_FLAT;
+
+      if (consumer_var) {
+         consumer_var->centroid = false;
+         consumer_var->interpolation = INTERP_QUALIFIER_FLAT;
+      }
+   }
+
   if (this->num_matches == this->matches_capacity) {
      this->matches_capacity *= 2;
      this->matches = (match *)
@@ -959,7 +986,8 @@ assign_varying_locations(struct gl_context *ctx,
   /* FINISHME: Set dynamically when geometry shader support is added. */
   const unsigned producer_base = VERT_RESULT_VAR0;
   const unsigned consumer_base = FRAG_ATTRIB_VAR0;
-   varying_matches matches(ctx->Const.DisableVaryingPacking);
+   varying_matches matches(ctx->Const.DisableVaryingPacking,
+                           consumer && consumer->Type == GL_FRAGMENT_SHADER);
   hash_table *tfeedback_candidates
      = hash_table_ctor(0, hash_table_string_hash, hash_table_string_compare);

--- a/src/glsl/lower_jumps.cpp
+++ b/src/glsl/lower_jumps.cpp
@@ -1002,10 +1002,12 @@ do_lower_jumps(exec_list *instructions, bool pull_out_jumps, bool lower_sub_retu
   v.lower_sub_return = lower_sub_return;
   v.lower_main_return = lower_main_return;

+   bool progress_ever = false;
   do {
      v.progress = false;
      visit_exec_list(instructions, &v);
+      progress_ever = v.progress || progress_ever;
   } while (v.progress);

-   return v.progress;
+   return progress_ever;
 }
--- a/src/glsl/lower_packed_varyings.cpp
+++ b/src/glsl/lower_packed_varyings.cpp
@@ -178,6 +178,14 @@ lower_packed_varyings_visitor::run(exec_list *instructions)
          !this->needs_lowering(var))
         continue;

+      /* This lowering pass is only capable of packing floats and ints
+       * together when their interpolation mode is "flat".  Therefore, to be
+       * safe, caller should ensure that integral varyings always use flat
+       * interpolation, even when this is not required by GLSL.
+       */
+      assert(var->interpolation == INTERP_QUALIFIER_FLAT ||
+             !var->type->contains_integer());
+
      /* Change the old varying into an ordinary global. */
      var->mode = ir_var_auto;

--- a/src/glsl/ralloc.c
+++ b/src/glsl/ralloc.c
@@ -108,6 +108,8 @@ ralloc_size(const void *ctx, size_t size)
 {
   void *block = calloc(1, size + sizeof(ralloc_header));

+   if (unlikely(block == NULL))
+      return NULL;
   ralloc_header *info = (ralloc_header *) block;
   ralloc_header *parent = ctx != NULL ? get_header(ctx) : NULL;

--- a/src/glx/XF86dri.c
+++ b/src/glx/XF86dri.c
@@ -43,6 +43,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include <X11/extensions/Xext.h>
 #include <X11/extensions/extutil.h>
 #include "xf86dristr.h"
+#include <limits.h>

 static XExtensionInfo _xf86dri_info_data;
 static XExtensionInfo *xf86dri_info = &_xf86dri_info_data;
@@ -201,7 +202,11 @@ XF86DRIOpenConnection(Display * dpy, int screen, drm_handle_t * hSAREA,
   }

   if (rep.length) {
-      if (!(*busIdString = calloc(rep.busIdStringLength + 1, 1))) {
+      if (rep.busIdStringLength < INT_MAX)
+         *busIdString = calloc(rep.busIdStringLength + 1, 1);
+      else
+         *busIdString = NULL;
+      if (*busIdString == NULL) {
         _XEatData(dpy, ((rep.busIdStringLength + 3) & ~3));
         UnlockDisplay(dpy);
         SyncHandle();
@@ -300,9 +305,11 @@ XF86DRIGetClientDriverName(Display * dpy, int screen,
   *ddxDriverPatchVersion = rep.ddxDriverPatchVersion;

   if (rep.length) {
-      if (!
-          (*clientDriverName =
-           calloc(rep.clientDriverNameLength + 1, 1))) {
+      if (rep.clientDriverNameLength < INT_MAX)
+         *clientDriverName = calloc(rep.clientDriverNameLength + 1, 1);
+      else
+         *clientDriverName = NULL;
+      if (*clientDriverName == NULL) {
         _XEatData(dpy, ((rep.clientDriverNameLength + 3) & ~3));
         UnlockDisplay(dpy);
         SyncHandle();
--- a/src/glx/glxcmds.c
+++ b/src/glx/glxcmds.c
@@ -912,8 +912,10 @@ init_fbconfig_for_chooser(struct glx_config * config,
 /* Test that all bits from a are contained in b */
 #define MATCH_MASK(param)			\
  do {						\
-    if ((a->param & ~b->param) != 0)		\
+    if ( ((int) a-> param != (int) GLX_DONT_CARE)	\
+         && ((a->param & ~b->param) != 0) ) {   \
      return False;				\
+    }                                           \
  } while (0);

 /**
--- a/src/mapi/glapi/gen/es_EXT.xml
+++ b/src/mapi/glapi/gen/es_EXT.xml
@@ -675,22 +675,25 @@

 <!-- 71. GL_OES_vertex_array_object -->
 <category name="GL_OES_vertex_array_object" number="71">
-    <function name="BindVertexArrayOES" alias="BindVertexArray" es2="2.0">
+    <function name="BindVertexArrayOES" alias="BindVertexArray"
+              static_dispatch="false" es2="2.0">
        <param name="array" type="GLuint"/>
    </function>

    <function name="DeleteVertexArraysOES" alias="DeleteVertexArrays"
-              es2="2.0">
+              static_dispatch="false" es2="2.0">
        <param name="n" type="GLsizei"/>
        <param name="arrays" type="const GLuint *" count="n"/>
    </function>

-    <function name="GenVertexArraysOES" alias="GenVertexArrays" es2="2.0">
+    <function name="GenVertexArraysOES" alias="GenVertexArrays"
+              static_dispatch="false" es2="2.0">
        <param name="n" type="GLsizei"/>
        <param name="arrays" type="GLuint *" output="true" count="n"/>
    </function>

-    <function name="IsVertexArrayOES" alias="IsVertexArray" es2="2.0">
+    <function name="IsVertexArrayOES" alias="IsVertexArray"
+              static_dispatch="false" es2="2.0">
        <param name="array" type="GLuint"/>
        <return type="GLboolean"/>
    </function>
@@ -765,7 +768,8 @@
        <size name="Get" mode="get"/>
    </enum>

-    <function name="DrawBuffersNV" alias="DrawBuffers" es2="2.0">
+    <function name="DrawBuffersNV" alias="DrawBuffers"
+              static_dispatch="false" es2="2.0">
        <param name="n" type="GLsizei" counter="true"/>
        <param name="bufs" type="const GLenum *" count="n"/>
    </function>
@@ -773,7 +777,8 @@

 <!-- 93. GL_NV_read_buffer -->
 <category name="GL_NV_read_buffer">
-    <function name="ReadBufferNV" alias="ReadBuffer" es2="2.0">
+    <function name="ReadBufferNV" alias="ReadBuffer"
+              static_dispatch="false" es2="2.0">
        <param name="mode" type="GLenum"/>
    </function>
 </category>
@@ -801,8 +806,8 @@
    <enum name="MAP_FLUSH_EXPLICIT_BIT_EXT"               value="0x0010"/>
    <enum name="MAP_UNSYNCHRONIZED_BIT_EXT"               value="0x0020"/>

-    <function name="MapBufferRangeEXT" alias="MapBufferRange" es1="1.0"
-              es2="2.0">
+    <function name="MapBufferRangeEXT" alias="MapBufferRange"
+              static_dispatch="false" es1="1.0" es2="2.0">
        <param name="target" type="GLenum"/>
        <param name="offset" type="GLintptr"/>
        <param name="size" type="GLsizeiptr"/>
@@ -811,7 +816,7 @@
    </function>

    <function name="FlushMappedBufferRangeEXT" alias="FlushMappedBufferRange"
-              es1="1.0" es2="2.0">
+              static_dispatch="false" es1="1.0" es2="2.0">
        <param name="target" type="GLenum"/>
        <param name="offset" type="GLintptr"/>
        <param name="length" type="GLsizeiptr"/>
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -3364,6 +3364,8 @@ setup_glsl_generate_mipmap(struct gl_context *ctx,
                                   sizeof(struct vertex), OFFSET(x));
      _mesa_VertexAttribPointer(1, 3, GL_FLOAT, GL_FALSE,
                                   sizeof(struct vertex), OFFSET(tex));
+      _mesa_EnableVertexAttribArray(0);
+      _mesa_EnableVertexAttribArray(1);
   }

   /* Generate a fragment shader program appropriate for the texture target */
@@ -3435,8 +3437,6 @@ setup_glsl_generate_mipmap(struct gl_context *ctx,
   _mesa_DeleteObjectARB(vs);
   _mesa_BindAttribLocation(mipmap->ShaderProg, 0, "position");
   _mesa_BindAttribLocation(mipmap->ShaderProg, 1, "texcoords");
-   _mesa_EnableVertexAttribArray(0);
-   _mesa_EnableVertexAttribArray(1);
   link_program_with_debug(ctx, mipmap->ShaderProg);
   sampler->shader_prog = mipmap->ShaderProg;
   ralloc_free(mem_ctx);
--- a/src/mesa/drivers/dri/common/xmlpool/Makefile.am
+++ b/src/mesa/drivers/dri/common/xmlpool/Makefile.am
@@ -66,7 +66,7 @@ options.h: t_options.h $(MOS)

 # Update .mo files from the corresponding .po files.
 %/LC_MESSAGES/options.mo: %.po
-	@mo="$@" \
+	@mo="$@"; \
 	lang=$${mo%%/*}; \
 	echo "Updating ($$lang) $@ from $?."; \
 	mkdir -p $$lang/LC_MESSAGES; \
--- a/src/mesa/drivers/dri/i915/i915_state.c
+++ b/src/mesa/drivers/dri/i915/i915_state.c
@@ -33,6 +33,7 @@
 #include "main/fbobject.h"
 #include "main/dd.h"
 #include "main/state.h"
+#include "main/stencil.h"
 #include "tnl/tnl.h"
 #include "tnl/t_context.h"

@@ -63,14 +64,14 @@ i915_update_stencil(struct gl_context * ctx)
    */
   /* _NEW_POLYGON | _NEW_STENCIL */
   if (ctx->Polygon.FrontFace == GL_CW) {
-      front_ref = ctx->Stencil.Ref[0];
+      front_ref = _mesa_get_stencil_ref(ctx, 0);
      front_mask = ctx->Stencil.ValueMask[0];
      front_writemask = ctx->Stencil.WriteMask[0];
      front_func = ctx->Stencil.Function[0];
      front_fail = ctx->Stencil.FailFunc[0];
      front_pass_z_fail = ctx->Stencil.ZFailFunc[0];
      front_pass_z_pass = ctx->Stencil.ZPassFunc[0];
-      back_ref = ctx->Stencil.Ref[ctx->Stencil._BackFace];
+      back_ref = _mesa_get_stencil_ref(ctx, ctx->Stencil._BackFace);
      back_mask = ctx->Stencil.ValueMask[ctx->Stencil._BackFace];
      back_writemask = ctx->Stencil.WriteMask[ctx->Stencil._BackFace];
      back_func = ctx->Stencil.Function[ctx->Stencil._BackFace];
@@ -78,14 +79,14 @@ i915_update_stencil(struct gl_context * ctx)
      back_pass_z_fail = ctx->Stencil.ZFailFunc[ctx->Stencil._BackFace];
      back_pass_z_pass = ctx->Stencil.ZPassFunc[ctx->Stencil._BackFace];
   } else {
-      front_ref = ctx->Stencil.Ref[ctx->Stencil._BackFace];
+      front_ref = _mesa_get_stencil_ref(ctx, ctx->Stencil._BackFace);
      front_mask = ctx->Stencil.ValueMask[ctx->Stencil._BackFace];
      front_writemask = ctx->Stencil.WriteMask[ctx->Stencil._BackFace];
      front_func = ctx->Stencil.Function[ctx->Stencil._BackFace];
      front_fail = ctx->Stencil.FailFunc[ctx->Stencil._BackFace];
      front_pass_z_fail = ctx->Stencil.ZFailFunc[ctx->Stencil._BackFace];
      front_pass_z_pass = ctx->Stencil.ZPassFunc[ctx->Stencil._BackFace];
-      back_ref = ctx->Stencil.Ref[0];
+      back_ref = _mesa_get_stencil_ref(ctx, 0);
      back_mask = ctx->Stencil.ValueMask[0];
      back_writemask = ctx->Stencil.WriteMask[0];
      back_func = ctx->Stencil.Function[0];
--- a/src/mesa/drivers/dri/i965/brw_cc.c
+++ b/src/mesa/drivers/dri/i965/brw_cc.c
@@ -35,6 +35,7 @@
 #include "brw_defines.h"
 #include "brw_util.h"
 #include "main/macros.h"
+#include "main/stencil.h"
 #include "intel_batchbuffer.h"

 static void
@@ -116,7 +117,7 @@ static void upload_cc_unit(struct brw_context *brw)
 	 intel_translate_stencil_op(ctx->Stencil.ZFailFunc[0]);
      cc->cc0.stencil_pass_depth_pass_op =
 	 intel_translate_stencil_op(ctx->Stencil.ZPassFunc[0]);
-      cc->cc1.stencil_ref = ctx->Stencil.Ref[0];
+      cc->cc1.stencil_ref = _mesa_get_stencil_ref(ctx, 0);
      cc->cc1.stencil_write_mask = ctx->Stencil.WriteMask[0];
      cc->cc1.stencil_test_mask = ctx->Stencil.ValueMask[0];

@@ -130,7 +131,7 @@ static void upload_cc_unit(struct brw_context *brw)
 	    intel_translate_stencil_op(ctx->Stencil.ZFailFunc[back]);
 	 cc->cc0.bf_stencil_pass_depth_pass_op =
 	    intel_translate_stencil_op(ctx->Stencil.ZPassFunc[back]);
-	 cc->cc1.bf_stencil_ref = ctx->Stencil.Ref[back];
+	 cc->cc1.bf_stencil_ref = _mesa_get_stencil_ref(ctx, back);
 	 cc->cc2.bf_stencil_write_mask = ctx->Stencil.WriteMask[back];
 	 cc->cc2.bf_stencil_test_mask = ctx->Stencil.ValueMask[back];
      }
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -293,6 +293,12 @@ brwCreateContext(int api,
 	 brw->urb.size = 256;
 	 brw->urb.max_vs_entries = 1664;
 	 brw->urb.max_gs_entries = 640;
+      } else if (intel->gt == 3) {
+	 brw->max_wm_threads = 408;
+	 brw->max_vs_threads = 280;
+	 brw->urb.size = 512;
+	 brw->urb.max_vs_entries = 1664;
+	 brw->urb.max_gs_entries = 640;
      }
   } else if (intel->gen == 7) {
      if (intel->gt == 1) {
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -844,15 +844,6 @@ struct brw_context
      GLuint nr_sf_entries;
      GLuint nr_cs_entries;

-      /* gen6:
-       * The length of each URB entry owned by the VS (or GS), as
-       * a number of 1024-bit (128-byte) rows.  Should be >= 1.
-       *
-       * gen7: Same meaning, but in 512-bit (64-byte) rows.
-       */
-      GLuint vs_size;
-      GLuint gs_size;
-
      GLuint vs_start;
      GLuint gs_start;
      GLuint clip_start;
@@ -1310,7 +1301,8 @@ brw_program_reloc(struct brw_context *brw, uint32_t state_offset,
 }

 bool brw_do_cubemap_normalize(struct exec_list *instructions);
-bool brw_lower_texture_gradients(struct exec_list *instructions);
+bool brw_lower_texture_gradients(struct intel_context *intel,
+                                 struct exec_list *instructions);

 struct opcode_desc {
    char    *name;
@@ -1320,6 +1312,29 @@ struct opcode_desc {

 extern const struct opcode_desc opcode_descs[128];

+void
+brw_emit_depthbuffer(struct brw_context *brw);
+
+void
+brw_emit_depth_stencil_hiz(struct brw_context *brw,
+                           struct intel_mipmap_tree *depth_mt,
+                           uint32_t depth_offset, uint32_t depthbuffer_format,
+                           uint32_t depth_surface_type,
+                           struct intel_mipmap_tree *stencil_mt,
+                           struct intel_mipmap_tree *hiz_mt,
+                           bool separate_stencil, uint32_t width,
+                           uint32_t height, uint32_t tile_x, uint32_t tile_y);
+
+void
+gen7_emit_depth_stencil_hiz(struct brw_context *brw,
+                            struct intel_mipmap_tree *depth_mt,
+                            uint32_t depth_offset, uint32_t depthbuffer_format,
+                            uint32_t depth_surface_type,
+                            struct intel_mipmap_tree *stencil_mt,
+                            struct intel_mipmap_tree *hiz_mt,
+                            bool separate_stencil, uint32_t width,
+                            uint32_t height, uint32_t tile_x, uint32_t tile_y);
+
 #ifdef __cplusplus
 }
 #endif
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -263,6 +263,14 @@ get_surface_type(struct intel_context *intel,
         else {
            return ubyte_types_norm[size];
         }
+      case GL_FIXED:
+         if (intel->gen >= 8 || intel->is_haswell)
+            return fixed_point_types[size];
+
+         /* This produces GL_FIXED inputs as values between INT32_MIN and
+          * INT32_MAX, which will be scaled down by 1/65536 by the VS.
+          */
+         return int_types_scale[size];
      /* See GL_ARB_vertex_type_2_10_10_10_rev.
       * W/A: Pre-Haswell, the hardware doesn't really support the formats we'd
       * like to use here, so upload everything as UINT and fix
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -60,6 +60,9 @@ fs_inst::init()
   this->src[0] = reg_undef;
   this->src[1] = reg_undef;
   this->src[2] = reg_undef;
+
+   /* This will be the case for almost all instructions. */
+   this->regs_written = 1;
 }

 fs_inst::fs_inst()
@@ -221,14 +224,19 @@ fs_visitor::CMP(fs_reg dst, fs_reg src0, fs_reg src1, uint32_t condition)

 exec_list
 fs_visitor::VARYING_PULL_CONSTANT_LOAD(fs_reg dst, fs_reg surf_index,
-                                       fs_reg offset)
+                                       fs_reg varying_offset,
+                                       uint32_t const_offset)
 {
   exec_list instructions;
   fs_inst *inst;

+   fs_reg offset = fs_reg(this, glsl_type::uint_type);
+   instructions.push_tail(ADD(offset, varying_offset, fs_reg(const_offset)));
+
   if (intel->gen >= 7) {
      inst = new(mem_ctx) fs_inst(FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
                                  dst, surf_index, offset);
+      inst->regs_written = 1;
      instructions.push_tail(inst);
   } else {
      int base_mrf = 13;
@@ -245,7 +253,6 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(fs_reg dst, fs_reg surf_index,
      } else {
         instructions.push_tail(MUL(mrf, offset, fs_reg(4)));
      }
-      inst = MOV(mrf, offset);
      inst = new(mem_ctx) fs_inst(FS_OPCODE_VARYING_PULL_CONSTANT_LOAD,
                                  dst, surf_index);
      inst->header_present = header_present;
@@ -299,26 +306,13 @@ fs_inst::equals(fs_inst *inst)
           offset == inst->offset);
 }

-int
-fs_inst::regs_written()
-{
-   if (is_tex())
-      return 4;
-
-   /* The SINCOS and INT_DIV_QUOTIENT_AND_REMAINDER math functions return 2,
-    * but we don't currently use them...nor do we have an opcode for them.
-    */
-
-   return 1;
-}
-
 bool
 fs_inst::overwrites_reg(const fs_reg &reg)
 {
   return (reg.file == dst.file &&
           reg.reg == dst.reg &&
           reg.reg_offset >= dst.reg_offset  &&
-           reg.reg_offset < dst.reg_offset + regs_written());
+           reg.reg_offset < dst.reg_offset + regs_written);
 }

 bool
@@ -826,6 +820,7 @@ fs_visitor::import_uniforms(fs_visitor *v)
 			   import_uniforms_callback,
 			   variable_ht);
   this->params_remap = v->params_remap;
+   this->nr_params_remap = v->nr_params_remap;
 }

 /* Our support for uniforms is piggy-backed on the struct
@@ -959,16 +954,24 @@ fs_visitor::emit_linterp(const fs_reg &attr, const fs_reg &interp,
                         bool is_centroid)
 {
   brw_wm_barycentric_interp_mode barycoord_mode;
-   if (is_centroid) {
-      if (interpolation_mode == INTERP_QUALIFIER_SMOOTH)
-         barycoord_mode = BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC;
-      else
-         barycoord_mode = BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC;
+   if (intel->gen >= 6) {
+      if (is_centroid) {
+         if (interpolation_mode == INTERP_QUALIFIER_SMOOTH)
+            barycoord_mode = BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC;
+         else
+            barycoord_mode = BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC;
+      } else {
+         if (interpolation_mode == INTERP_QUALIFIER_SMOOTH)
+            barycoord_mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
+         else
+            barycoord_mode = BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC;
+      }
   } else {
-      if (interpolation_mode == INTERP_QUALIFIER_SMOOTH)
-         barycoord_mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
-      else
-         barycoord_mode = BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC;
+      /* On Ironlake and below, there is only one interpolation mode.
+       * Centroid interpolation doesn't mean anything on this hardware --
+       * there is no multisampling.
+       */
+      barycoord_mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
   }
   return emit(FS_OPCODE_LINTERP, attr,
               this->delta_x[barycoord_mode],
@@ -1357,7 +1360,7 @@ fs_visitor::split_virtual_grfs()
      /* If there's a SEND message that requires contiguous destination
       * registers, no splitting is allowed.
       */
-      if (inst->regs_written() > 1) {
+      if (inst->regs_written > 1) {
 	 split_grf[inst->dst.reg] = false;
      }
   }
@@ -1490,6 +1493,7 @@ fs_visitor::remove_dead_constants()
 {
   if (dispatch_width == 8) {
      this->params_remap = ralloc_array(mem_ctx, int, c->prog_data.nr_params);
+      this->nr_params_remap = c->prog_data.nr_params;

      for (unsigned int i = 0; i < c->prog_data.nr_params; i++)
 	 this->params_remap[i] = -1;
@@ -1504,7 +1508,14 @@ fs_visitor::remove_dead_constants()
 	    if (inst->src[i].file != UNIFORM)
 	       continue;

-	    assert(constant_nr < (int)c->prog_data.nr_params);
+	    /* Section 5.11 of the OpenGL 4.3 spec says:
+	     *
+	     *     "Out-of-bounds reads return undefined values, which include
+	     *     values from other variables of the active program or zero."
+	     */
+	    if (constant_nr < 0 || constant_nr >= (int)c->prog_data.nr_params) {
+	       constant_nr = 0;
+	    }

 	    /* For now, set this to non-negative.  We'll give it the
 	     * actual new number in a moment, in order to keep the
@@ -1552,6 +1563,10 @@ fs_visitor::remove_dead_constants()
 	 if (inst->src[i].file != UNIFORM)
 	    continue;

+	 /* as above alias to 0 */
+	 if (constant_nr < 0 || constant_nr >= (int)this->nr_params_remap) {
+	    constant_nr = 0;
+	 }
 	 assert(this->params_remap[constant_nr] != -1);
 	 inst->src[i].reg = this->params_remap[constant_nr];
 	 inst->src[i].reg_offset = 0;
@@ -1617,15 +1632,13 @@ fs_visitor::move_uniform_array_access_to_pull_constants()
         base_ir = inst->ir;
         current_annotation = inst->annotation;

-         fs_reg offset = fs_reg(this, glsl_type::int_type);
-         inst->insert_before(ADD(offset, *inst->src[i].reladdr,
-                                 fs_reg(pull_constant_loc[uniform] +
-                                        inst->src[i].reg_offset)));
-
         fs_reg surf_index = fs_reg((unsigned)SURF_INDEX_FRAG_CONST_BUFFER);
         fs_reg temp = fs_reg(this, glsl_type::float_type);
         exec_list list = VARYING_PULL_CONSTANT_LOAD(temp,
-                                                     surf_index, offset);
+                                                     surf_index,
+                                                     *inst->src[i].reladdr,
+                                                     pull_constant_loc[uniform] +
+                                                     inst->src[i].reg_offset);
         inst->insert_before(&list);

         inst->src[i].file = temp.file;
@@ -2082,6 +2095,12 @@ fs_visitor::compute_to_mrf()
 	       break;
 	    }

+            /* Things returning more than one register would need us to
+             * understand coalescing out more than one MOV at a time.
+             */
+            if (scan_inst->regs_written > 1)
+               break;
+
 	    /* SEND instructions can't have MRF as a destination. */
 	    if (scan_inst->mlen)
 	       break;
@@ -2296,7 +2315,7 @@ void
 fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst)
 {
   int reg_size = dispatch_width / 8;
-   int write_len = inst->regs_written() * reg_size;
+   int write_len = inst->regs_written * reg_size;
   int first_write_grf = inst->dst.reg;
   bool needs_dep[BRW_MAX_MRF];
   assert(write_len < (int)sizeof(needs_dep) - 1);
@@ -2336,7 +2355,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst)
       * dependency has more latency than a MOV.
       */
      if (scan_inst->dst.file == GRF) {
-         for (int i = 0; i < scan_inst->regs_written(); i++) {
+         for (int i = 0; i < scan_inst->regs_written; i++) {
            int reg = scan_inst->dst.reg + i * reg_size;

            if (reg >= first_write_grf &&
@@ -2375,7 +2394,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst)
 void
 fs_visitor::insert_gen4_post_send_dependency_workarounds(fs_inst *inst)
 {
-   int write_len = inst->regs_written() * dispatch_width / 8;
+   int write_len = inst->regs_written * dispatch_width / 8;
   int first_write_grf = inst->dst.reg;
   bool needs_dep[BRW_MAX_MRF];
   assert(write_len < (int)sizeof(needs_dep) - 1);
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -174,7 +174,6 @@ public:
           fs_reg src0, fs_reg src1,fs_reg src2);

   bool equals(fs_inst *inst);
-   int regs_written();
   bool overwrites_reg(const fs_reg &reg);
   bool is_tex();
   bool is_math();
@@ -192,6 +191,7 @@ public:
   uint8_t flag_subreg;

   int mlen; /**< SEND message length */
+   int regs_written; /**< Number of vgrfs written by a SEND message, or 1 */
   int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */
   uint32_t texture_offset; /**< Texture offset bitfield */
   int sampler;
@@ -293,7 +293,8 @@ public:
 					   fs_reg reg);

   exec_list VARYING_PULL_CONSTANT_LOAD(fs_reg dst, fs_reg surf_index,
-                                        fs_reg offset);
+                                        fs_reg varying_offset,
+                                        uint32_t const_offset);

   bool run();
   void setup_payload_gen4();
@@ -439,6 +440,7 @@ public:
    * uniform index.
    */
   int *params_remap;
+   int nr_params_remap;

   struct hash_table *variable_ht;
   fs_reg frag_depth;
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -66,6 +66,7 @@ is_expression(const fs_inst *const inst)
   case BRW_OPCODE_LINE:
   case BRW_OPCODE_PLN:
   case BRW_OPCODE_MAD:
+   case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
   case FS_OPCODE_CINTERP:
   case FS_OPCODE_LINTERP:
      return true;
@@ -126,19 +127,41 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
 	     */
 	    bool no_existing_temp = entry->tmp.file == BAD_FILE;
 	    if (no_existing_temp) {
-	       entry->tmp = fs_reg(this, glsl_type::float_type);
-	       entry->tmp.type = inst->dst.type;
+               int written = entry->generator->regs_written;

-	       fs_inst *copy = new(ralloc_parent(inst))
-		  fs_inst(BRW_OPCODE_MOV, entry->generator->dst, entry->tmp);
-	       entry->generator->insert_after(copy);
-	       entry->generator->dst = entry->tmp;
+               fs_reg orig_dst = entry->generator->dst;
+               fs_reg tmp = fs_reg(GRF, virtual_grf_alloc(written),
+                                   orig_dst.type);
+               entry->tmp = tmp;
+               entry->generator->dst = tmp;
+
+               for (int i = 0; i < written; i++) {
+                  fs_inst *copy = MOV(orig_dst, tmp);
+                  copy->force_writemask_all =
+                     entry->generator->force_writemask_all;
+                  entry->generator->insert_after(copy);
+
+                  orig_dst.reg_offset++;
+                  tmp.reg_offset++;
+               }
 	    }

 	    /* dest <- temp */
-	    fs_inst *copy = new(ralloc_parent(inst))
-	       fs_inst(BRW_OPCODE_MOV, inst->dst, entry->tmp);
-	    inst->replace_with(copy);
+            int written = inst->regs_written;
+            assert(written == entry->generator->regs_written);
+            assert(inst->dst.type == entry->tmp.type);
+            fs_reg dst = inst->dst;
+            fs_reg tmp = entry->tmp;
+            fs_inst *copy = NULL;
+            for (int i = 0; i < written; i++) {
+               copy = MOV(dst, tmp);
+               copy->force_writemask_all = inst->force_writemask_all;
+               inst->insert_before(copy);
+
+               dst.reg_offset++;
+               tmp.reg_offset++;
+            }
+            inst->remove();

 	    /* Appending an instruction may have changed our bblock end. */
 	    if (inst == block->end) {
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -469,6 +469,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
    */
   if (inst->texture_offset) {
      brw_push_insn_state(p);
+      brw_set_mask_control(p, BRW_MASK_DISABLE);
      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
      /* Explicitly set up the message header by copying g0 to the MRF. */
      brw_MOV(p, retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD),
--- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
@@ -77,7 +77,7 @@ fs_live_variables::setup_def_use()
 	  * variable, and thus qualify for being in def[].
 	  */
 	 if (inst->dst.file == GRF &&
-	     inst->regs_written() == v->virtual_grf_sizes[inst->dst.reg] &&
+	     inst->regs_written == v->virtual_grf_sizes[inst->dst.reg] &&
 	     !inst->predicate &&
 	     !inst->force_uncompressed &&
 	     !inst->force_sechalf) {
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -549,7 +549,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
      }

      if (inst->dst.file == GRF) {
-	 spill_costs[inst->dst.reg] += inst->regs_written() * loop_scale;
+	 spill_costs[inst->dst.reg] += inst->regs_written * loop_scale;

         if (inst->dst.smear >= 0) {
            no_spill[inst->dst.reg] = true;
@@ -618,7 +618,7 @@ fs_visitor::spill_reg(int spill_reg)
 	  inst->dst.reg == spill_reg) {
         int subset_spill_offset = (spill_offset +
                                    REG_SIZE * inst->dst.reg_offset);
-         inst->dst.reg = virtual_grf_alloc(inst->regs_written());
+         inst->dst.reg = virtual_grf_alloc(inst->regs_written);
         inst->dst.reg_offset = 0;

 	 /* If our write is going to affect just part of the
@@ -627,7 +627,7 @@ fs_visitor::spill_reg(int spill_reg)
 	  */
 	 if (inst->predicate || inst->force_uncompressed || inst->force_sechalf) {
            fs_reg unspill_reg = inst->dst;
-            for (int chan = 0; chan < inst->regs_written(); chan++) {
+            for (int chan = 0; chan < inst->regs_written; chan++) {
               emit_unspill(inst, unspill_reg,
                            subset_spill_offset + REG_SIZE * chan);
               unspill_reg.reg_offset++;
@@ -640,7 +640,7 @@ fs_visitor::spill_reg(int spill_reg)
 	 spill_src.negate = false;
 	 spill_src.smear = -1;

-	 for (int chan = 0; chan < inst->regs_written(); chan++) {
+	 for (int chan = 0; chan < inst->regs_written; chan++) {
 	    fs_inst *spill_inst = new(mem_ctx) fs_inst(FS_OPCODE_SPILL,
 						       reg_null_f, spill_src);
 	    spill_src.reg_offset++;
--- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
@@ -510,7 +510,7 @@ instruction_scheduler::calculate_deps()
      /* write-after-write deps. */
      if (inst->dst.file == GRF) {
         if (post_reg_alloc) {
-            for (int r = 0; r < inst->regs_written() * reg_width; r++) {
+            for (int r = 0; r < inst->regs_written * reg_width; r++) {
               add_dep(last_grf_write[inst->dst.reg + r], n);
               last_grf_write[inst->dst.reg + r] = n;
            }
@@ -617,7 +617,7 @@ instruction_scheduler::calculate_deps()
       */
      if (inst->dst.file == GRF) {
         if (post_reg_alloc) {
-            for (int r = 0; r < inst->regs_written() * reg_width; r++)
+            for (int r = 0; r < inst->regs_written * reg_width; r++)
               last_grf_write[inst->dst.reg + r] = n;
         } else {
            last_grf_write[inst->dst.reg] = n;
@@ -716,7 +716,7 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header)
            schedule_node *n = (schedule_node *)node;

            chosen = n;
-            if (chosen->inst->regs_written() <= 1)
+            if (chosen->inst->regs_written <= 1)
               break;
         }

--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -237,7 +237,7 @@ fs_visitor::try_emit_saturate(ir_expression *ir)
    * src, generate a saturated MOV
    */
   fs_inst *modify = get_instruction_generating_reg(pre_inst, last_inst, src);
-   if (!modify || modify->regs_written() != 1) {
+   if (!modify || modify->regs_written != 1) {
      this->result = fs_reg(this, ir->type);
      fs_inst *inst = emit(MOV(this->result, src));
      inst->saturate = true;
@@ -626,9 +626,8 @@ fs_visitor::visit(ir_expression *ir)
         emit(SHR(base_offset, op[1], fs_reg(2)));

         for (int i = 0; i < ir->type->vector_elements; i++) {
-            fs_reg offset = fs_reg(this, glsl_type::int_type);
-            emit(ADD(offset, base_offset, fs_reg(i)));
-            emit(VARYING_PULL_CONSTANT_LOAD(result, surf_index, offset));
+            emit(VARYING_PULL_CONSTANT_LOAD(result, surf_index,
+                                            base_offset, i));

            if (ir->type->base_type == GLSL_TYPE_BOOL)
               emit(CMP(result, result, fs_reg(0), BRW_CONDITIONAL_NZ));
@@ -718,7 +717,7 @@ fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
   /* If last_rhs_inst wrote a different number of components than our LHS,
    * we can't safely rewrite it.
    */
-   if (virtual_grf_sizes[dst.reg] != modify->regs_written())
+   if (virtual_grf_sizes[dst.reg] != modify->regs_written)
      return false;

   /* Success!  Rewrite the instruction. */
@@ -888,11 +887,10 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
       * this weirdness around to the expected layout.
       */
      orig_dst = dst;
-      const glsl_type *vec_type =
-	 glsl_type::get_instance(ir->type->base_type, 4, 1);
-      dst = fs_reg(this, glsl_type::get_array_instance(vec_type, 2));
-      dst.type = intel->is_g4x ? brw_type_for_base_type(ir->type)
-			       : BRW_REGISTER_TYPE_F;
+      dst = fs_reg(GRF, virtual_grf_alloc(8),
+                   (intel->is_g4x ?
+                    brw_type_for_base_type(ir->type) :
+                    BRW_REGISTER_TYPE_F));
   }

   fs_inst *inst = NULL;
@@ -919,6 +917,7 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
   inst->base_mrf = base_mrf;
   inst->mlen = mlen;
   inst->header_present = true;
+   inst->regs_written = simd16 ? 8 : 4;

   if (simd16) {
      for (int i = 0; i < 4; i++) {
@@ -1048,6 +1047,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
   inst->base_mrf = base_mrf;
   inst->mlen = mlen;
   inst->header_present = header_present;
+   inst->regs_written = 4;

   if (mlen > 11) {
      fail("Message length >11 disallowed by hardware\n");
@@ -1178,6 +1178,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
   inst->base_mrf = base_mrf;
   inst->mlen = mlen;
   inst->header_present = header_present;
+   inst->regs_written = 4;

   if (mlen > 11) {
      fail("Message length >11 disallowed by hardware\n");
@@ -2320,6 +2321,9 @@ fs_visitor::fs_visitor(struct brw_context *brw,
   this->virtual_grf_use = NULL;
   this->live_intervals_valid = false;

+   this->params_remap = NULL;
+   this->nr_params_remap = 0;
+
   this->force_uncompressed_stack = 0;
   this->force_sechalf_stack = 0;

--- a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
+++ b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
@@ -28,12 +28,14 @@
 #include "glsl/ir.h"
 #include "glsl/ir_builder.h"
 #include "program/prog_instruction.h"
+#include "brw_context.h"

 using namespace ir_builder;

 class lower_texture_grad_visitor : public ir_hierarchical_visitor {
 public:
-   lower_texture_grad_visitor()
+   lower_texture_grad_visitor(bool has_sample_d_c)
+      : has_sample_d_c(has_sample_d_c)
   {
      progress = false;
   }
@@ -42,6 +44,7 @@ public:


   bool progress;
+   bool has_sample_d_c;

 private:
   void emit(ir_variable *, ir_rvalue *);
@@ -75,6 +78,7 @@ txs_type(const glsl_type *type)
      break;
   default:
      assert(!"Should not get here: invalid sampler dimensionality");
+      dims = 2;
   }

   if (type->sampler_array)
@@ -90,6 +94,22 @@ lower_texture_grad_visitor::visit_leave(ir_texture *ir)
   if (ir->op != ir_txd || !ir->shadow_comparitor)
      return visit_continue;

+   /* Lower textureGrad() with samplerCubeShadow even if we have the sample_d_c
+    * message.  GLSL provides gradients for the 'r' coordinate.  Unfortunately:
+    *
+    * From the Ivybridge PRM, Volume 4, Part 1, sample_d message description:
+    * "The r coordinate contains the faceid, and the r gradients are ignored
+    *  by hardware."
+    *
+    * We likely need to do a similar treatment for samplerCube and
+    * samplerCubeArray, but we have insufficient testing for that at the moment.
+    */
+   bool need_lowering = !has_sample_d_c ||
+      ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE;
+
+   if (!need_lowering)
+      return visit_continue;
+
   void *mem_ctx = ralloc_parent(ir);

   const glsl_type *grad_type = ir->lod_info.grad.dPdx->type;
@@ -145,9 +165,11 @@ lower_texture_grad_visitor::visit_leave(ir_texture *ir)
 extern "C" {

 bool
-brw_lower_texture_gradients(struct exec_list *instructions)
+brw_lower_texture_gradients(struct intel_context *intel,
+                            struct exec_list *instructions)
 {
-   lower_texture_grad_visitor v;
+   bool has_sample_d_c = intel->gen >= 8 || intel->is_haswell;
+   lower_texture_grad_visitor v(has_sample_d_c);

   visit_list_elements(&v, instructions);

--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -540,7 +540,8 @@ brw_workaround_depthstencil_alignment(struct brw_context *brw)
   }
 }

-static void emit_depthbuffer(struct brw_context *brw)
+void
+brw_emit_depthbuffer(struct brw_context *brw)
 {
   struct intel_context *intel = &brw->intel;
   struct gl_context *ctx = &intel->ctx;
@@ -553,18 +554,17 @@ static void emit_depthbuffer(struct brw_context *brw)
   struct intel_mipmap_tree *hiz_mt = brw->depthstencil.hiz_mt;
   uint32_t tile_x = brw->depthstencil.tile_x;
   uint32_t tile_y = brw->depthstencil.tile_y;
-   unsigned int len;
   bool separate_stencil = false;
+   uint32_t depth_surface_type = BRW_SURFACE_NULL;
+   uint32_t depthbuffer_format = BRW_DEPTHFORMAT_D32_FLOAT;
+   uint32_t depth_offset = 0;
+   uint32_t width = 1, height = 1;

-   if (stencil_mt && stencil_mt->format == MESA_FORMAT_S8)
-      separate_stencil = true;
+   if (stencil_mt) {
+      separate_stencil = stencil_mt->format == MESA_FORMAT_S8;

-   /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both
-    * non-pipelined state that will need the PIPE_CONTROL workaround.
-    */
-   if (intel->gen == 6) {
-      intel_emit_post_sync_nonzero_flush(intel);
-      intel_emit_depth_stall_flushes(intel);
+      /* Gen7 supports only separate stencil */
+      assert(separate_stencil || intel->gen < 7);
   }

   /* If there's a packed depth/stencil bound to stencil only, we need to
@@ -575,31 +575,33 @@ static void emit_depthbuffer(struct brw_context *brw)
      depth_mt = stencil_mt;
   }

-   if (intel->gen >= 6)
-      len = 7;
-   else if (intel->is_g4x || intel->gen == 5)
-      len = 6;
-   else
-      len = 5;
+   if (depth_irb && depth_mt) {
+      struct intel_region *region = depth_mt->region;

-   if (!depth_irb && !separate_stencil) {
-      BEGIN_BATCH(len);
-      OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
-      OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
-		(BRW_SURFACE_NULL << 29));
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
+      /* When 3DSTATE_DEPTH_BUFFER.Separate_Stencil_Enable is set, then
+       * 3DSTATE_DEPTH_BUFFER.Surface_Format is not permitted to be a packed
+       * depthstencil format.
+       *
+       * Gens prior to 7 require that HiZ_Enable and Separate_Stencil_Enable be
+       * set to the same value. Gens after 7 implicitly always set
+       * Separate_Stencil_Enable; software cannot disable it.
+       */
+      if ((intel->gen < 7 && depth_mt->hiz_mt) || intel->gen >= 7) {
+         assert(!_mesa_is_format_packed_depth_stencil(depth_mt->format));
+      }

-      if (intel->is_g4x || intel->gen >= 5)
-         OUT_BATCH(0);
+      /* Prior to Gen7, if using separate stencil, hiz must be enabled. */
+      assert(intel->gen >= 7 || !separate_stencil || hiz_mt);

-      if (intel->gen >= 6)
-	 OUT_BATCH(0);
+      assert(intel->gen < 6 || region->tiling == I915_TILING_Y);
+      assert(!hiz_mt || region->tiling == I915_TILING_Y);

-      ADVANCE_BATCH();
-
-   } else if (!depth_irb && separate_stencil) {
+      depthbuffer_format = brw_depthbuffer_format(brw);
+      depth_surface_type = BRW_SURFACE_2D;
+      depth_offset = brw->depthstencil.depth_offset;
+      width = depth_irb->Base.Base.Width;
+      height = depth_irb->Base.Base.Height;
+   } else if (separate_stencil) {
      /*
       * There exists a separate stencil buffer but no depth buffer.
       *
@@ -607,81 +609,96 @@ static void emit_depthbuffer(struct brw_context *brw)
       * 3DSTATE_DEPTH_BUFFER: namely the tile walk, surface type, width, and
       * height.
       *
-       * Enable the hiz bit because it and the separate stencil bit must have
-       * the same value. From Section 2.11.5.6.1.1 3DSTATE_DEPTH_BUFFER, Bit
-       * 1.21 "Separate Stencil Enable":
-       *     [DevIL]: If this field is enabled, Hierarchical Depth Buffer
-       *     Enable must also be enabled.
-       *
-       *     [DevGT]: This field must be set to the same value (enabled or
-       *     disabled) as Hierarchical Depth Buffer Enable
-       *
       * The tiled bit must be set. From the Sandybridge PRM, Volume 2, Part 1,
       * Section 7.5.5.1.1 3DSTATE_DEPTH_BUFFER, Bit 1.27 Tiled Surface:
       *     [DevGT+]: This field must be set to TRUE.
       */
      assert(intel->has_separate_stencil);

-      BEGIN_BATCH(len);
-      OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
-      OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
-	        (1 << 21) | /* separate stencil enable */
-	        (1 << 22) | /* hiz enable */
-	        (BRW_TILEWALK_YMAJOR << 26) |
-	        (1 << 27) | /* tiled surface */
-	        (BRW_SURFACE_2D << 29));
-      OUT_BATCH(0);
-      OUT_BATCH(((stencil_irb->Base.Base.Width + tile_x - 1) << 6) |
-	         (stencil_irb->Base.Base.Height + tile_y - 1) << 19);
-      OUT_BATCH(0);
-
-      if (intel->is_g4x || intel->gen >= 5)
-         OUT_BATCH(tile_x | (tile_y << 16));
-      else
-	 assert(tile_x == 0 && tile_y == 0);
-
-      if (intel->gen >= 6)
-	 OUT_BATCH(0);
-
-      ADVANCE_BATCH();
-
-   } else {
-      struct intel_region *region = depth_mt->region;
-
-      /* If using separate stencil, hiz must be enabled. */
-      assert(!separate_stencil || hiz_mt);
-
-      assert(intel->gen < 6 || region->tiling == I915_TILING_Y);
-      assert(!hiz_mt || region->tiling == I915_TILING_Y);
-
-      BEGIN_BATCH(len);
-      OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
-      OUT_BATCH((region->pitch - 1) |
-		(brw_depthbuffer_format(brw) << 18) |
-		((hiz_mt ? 1 : 0) << 21) | /* separate stencil enable */
-		((hiz_mt ? 1 : 0) << 22) | /* hiz enable */
-		(BRW_TILEWALK_YMAJOR << 26) |
-		((region->tiling != I915_TILING_NONE) << 27) |
-		(BRW_SURFACE_2D << 29));
-      OUT_RELOC(region->bo,
-		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-		brw->depthstencil.depth_offset);
-      OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
-		(((depth_irb->Base.Base.Width + tile_x) - 1) << 6) |
-		(((depth_irb->Base.Base.Height + tile_y) - 1) << 19));
-      OUT_BATCH(0);
-
-      if (intel->is_g4x || intel->gen >= 5)
-         OUT_BATCH(tile_x | (tile_y << 16));
-      else
-	 assert(tile_x == 0 && tile_y == 0);
-
-      if (intel->gen >= 6)
-	 OUT_BATCH(0);
-
-      ADVANCE_BATCH();
+      depth_surface_type = BRW_SURFACE_2D;
+      width = stencil_irb->Base.Base.Width;
+      height = stencil_irb->Base.Base.Height;
   }

+   intel->vtbl.emit_depth_stencil_hiz(brw, depth_mt, depth_offset,
+                                      depthbuffer_format, depth_surface_type,
+                                      stencil_mt, hiz_mt, separate_stencil,
+                                      width, height, tile_x, tile_y);
+}
+
+void
+brw_emit_depth_stencil_hiz(struct brw_context *brw,
+                           struct intel_mipmap_tree *depth_mt,
+                           uint32_t depth_offset, uint32_t depthbuffer_format,
+                           uint32_t depth_surface_type,
+                           struct intel_mipmap_tree *stencil_mt,
+                           struct intel_mipmap_tree *hiz_mt,
+                           bool separate_stencil, uint32_t width,
+                           uint32_t height, uint32_t tile_x, uint32_t tile_y)
+{
+   struct intel_context *intel = &brw->intel;
+
+   /* Enable the hiz bit if we're doing separate stencil, because it and the
+    * separate stencil bit must have the same value. From Section 2.11.5.6.1.1
+    * 3DSTATE_DEPTH_BUFFER, Bit 1.21 "Separate Stencil Enable":
+    *     [DevIL]: If this field is enabled, Hierarchical Depth Buffer
+    *     Enable must also be enabled.
+    *
+    *     [DevGT]: This field must be set to the same value (enabled or
+    *     disabled) as Hierarchical Depth Buffer Enable
+    */
+   bool enable_hiz_ss = hiz_mt || separate_stencil;
+
+
+   /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both
+    * non-pipelined state that will need the PIPE_CONTROL workaround.
+    */
+   if (intel->gen == 6) {
+      intel_emit_post_sync_nonzero_flush(intel);
+      intel_emit_depth_stall_flushes(intel);
+   }
+
+   unsigned int len;
+   if (intel->gen >= 6)
+      len = 7;
+   else if (intel->is_g4x || intel->gen == 5)
+      len = 6;
+   else
+      len = 5;
+
+   BEGIN_BATCH(len);
+   OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2));
+   OUT_BATCH((depth_mt ? depth_mt->region->pitch - 1 : 0) |
+             (depthbuffer_format << 18) |
+             ((enable_hiz_ss ? 1 : 0) << 21) | /* separate stencil enable */
+             ((enable_hiz_ss ? 1 : 0) << 22) | /* hiz enable */
+             (BRW_TILEWALK_YMAJOR << 26) |
+             ((depth_mt ? depth_mt->region->tiling != I915_TILING_NONE : 1)
+              << 27) |
+             (depth_surface_type << 29));
+
+   if (depth_mt) {
+      OUT_RELOC(depth_mt->region->bo,
+		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+		depth_offset);
+   } else {
+      OUT_BATCH(0);
+   }
+
+   OUT_BATCH(((width + tile_x - 1) << 6) |
+             ((height + tile_y - 1) << 19));
+   OUT_BATCH(0);
+
+   if (intel->is_g4x || intel->gen >= 5)
+      OUT_BATCH(tile_x | (tile_y << 16));
+   else
+      assert(tile_x == 0 && tile_y == 0);
+
+   if (intel->gen >= 6)
+      OUT_BATCH(0);
+
+   ADVANCE_BATCH();
+
   if (hiz_mt || separate_stencil) {
      /*
       * In the 3DSTATE_DEPTH_BUFFER batch emitted above, the 'separate
@@ -749,7 +766,7 @@ static void emit_depthbuffer(struct brw_context *brw)
      OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 |
 		GEN5_DEPTH_CLEAR_VALID |
 		(2 - 2));
-      OUT_BATCH(depth_irb ? depth_irb->mt->depth_clear_value : 0);
+      OUT_BATCH(depth_mt ? depth_mt->depth_clear_value : 0);
      ADVANCE_BATCH();
   }
 }
@@ -760,7 +777,7 @@ const struct brw_tracked_state brw_depthbuffer = {
      .brw = BRW_NEW_BATCH,
      .cache = 0,
   },
-   .emit = emit_depthbuffer,
+   .emit = brw_emit_depthbuffer,
 };


--- a/src/mesa/drivers/dri/i965/brw_primitive_restart.c
+++ b/src/mesa/drivers/dri/i965/brw_primitive_restart.c
@@ -27,6 +27,7 @@

 #include "main/imports.h"
 #include "main/bufferobj.h"
+#include "main/varray.h"

 #include "brw_context.h"
 #include "brw_defines.h"
@@ -36,29 +37,29 @@

 /**
 * Check if the hardware's cut index support can handle the primitive
- * restart index value.
+ * restart index value (pre-Haswell only).
 */
 static bool
 can_cut_index_handle_restart_index(struct gl_context *ctx,
                                   const struct _mesa_index_buffer *ib)
 {
-   struct intel_context *intel = intel_context(ctx);
-
-   /* Haswell supports an arbitrary cut index. */
-   if (intel->is_haswell)
+   /* The FixedIndex variant means 0xFF, 0xFFFF, or 0xFFFFFFFF based on
+    * the index buffer type, which corresponds exactly to the hardware.
+    */
+   if (ctx->Array.PrimitiveRestartFixedIndex)
      return true;

   bool cut_index_will_work;

   switch (ib->type) {
   case GL_UNSIGNED_BYTE:
-      cut_index_will_work = (ctx->Array._RestartIndex & 0xff) == 0xff;
+      cut_index_will_work = ctx->Array.RestartIndex == 0xff;
      break;
   case GL_UNSIGNED_SHORT:
-      cut_index_will_work = (ctx->Array._RestartIndex & 0xffff) == 0xffff;
+      cut_index_will_work = ctx->Array.RestartIndex == 0xffff;
      break;
   case GL_UNSIGNED_INT:
-      cut_index_will_work = ctx->Array._RestartIndex == 0xffffffff;
+      cut_index_will_work = ctx->Array.RestartIndex == 0xffffffff;
      break;
   default:
      cut_index_will_work = false;
@@ -78,6 +79,7 @@ can_cut_index_handle_prims(struct gl_context *ctx,
                           GLuint nr_prims,
                           const struct _mesa_index_buffer *ib)
 {
+   struct intel_context *intel = intel_context(ctx);
   struct brw_context *brw = brw_context(ctx);

   if (brw->sol.counting_primitives_generated ||
@@ -90,6 +92,10 @@ can_cut_index_handle_prims(struct gl_context *ctx,
      return false;
   }

+   /* Otherwise Haswell can do it all. */
+   if (intel->is_haswell)
+      return true;
+
   if (!can_cut_index_handle_restart_index(ctx, ib)) {
      /* The primitive restart index can't be handled, so take
       * the software path
@@ -198,16 +204,29 @@ haswell_upload_cut_index(struct brw_context *brw)
   const unsigned cut_index_setting =
      ctx->Array._PrimitiveRestart ? HSW_CUT_INDEX_ENABLE : 0;

+   /* BRW_NEW_INDEX_BUFFER */
+   unsigned cut_index;
+   if (brw->ib.ib) {
+      cut_index = _mesa_primitive_restart_index(ctx, brw->ib.type);
+   } else {
+      /* There's no index buffer, but primitive restart may still apply
+       * to glDrawArrays and such.  FIXED_INDEX mode only applies to drawing
+       * operations that use an index buffer, so we can ignore it and use
+       * the GL restart index directly.
+       */
+      cut_index = ctx->Array.RestartIndex;
+   }
+
   BEGIN_BATCH(2);
   OUT_BATCH(_3DSTATE_VF << 16 | cut_index_setting | (2 - 2));
-   OUT_BATCH(ctx->Array._RestartIndex);
+   OUT_BATCH(cut_index);
   ADVANCE_BATCH();
 }

 const struct brw_tracked_state haswell_cut_index = {
   .dirty = {
      .mesa  = _NEW_TRANSFORM,
-      .brw   = 0,
+      .brw   = BRW_NEW_INDEX_BUFFER,
      .cache = 0,
   },
   .emit = haswell_upload_cut_index,
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -164,8 +164,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
 	 lower_if_to_cond_assign(shader->ir, 16);

      do_lower_texture_projection(shader->ir);
-      if (intel->gen < 8 && !intel->is_haswell)
-         brw_lower_texture_gradients(shader->ir);
+      brw_lower_texture_gradients(intel, shader->ir);
      do_vec_index_to_cond_assign(shader->ir);
      brw_do_cubemap_normalize(shader->ir);
      lower_noise(shader->ir);
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -187,11 +187,6 @@ void *brw_state_batch(struct brw_context *brw,
 void gen4_init_vtable_surface_functions(struct brw_context *brw);
 uint32_t brw_get_surface_tiling_bits(uint32_t tiling);
 uint32_t brw_get_surface_num_multisamples(unsigned num_samples);
-void brw_create_constant_surface(struct brw_context *brw,
-				 drm_intel_bo *bo,
-				 uint32_t offset,
-				 int width,
-				 uint32_t *out_offset);

 uint32_t brw_format_for_mesa_format(gl_format mesa_format);

--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -239,6 +239,8 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst)
   case SHADER_OPCODE_SIN:
   case SHADER_OPCODE_COS:
      return 1;
+   case SHADER_OPCODE_INT_QUOTIENT:
+   case SHADER_OPCODE_INT_REMAINDER:
   case SHADER_OPCODE_POW:
      return 2;
   case VS_OPCODE_URB_WRITE:
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -335,6 +335,8 @@ vec4_generator::generate_tex(vec4_instruction *inst,
    */
   if (inst->texture_offset) {
      /* Explicitly set up the message header by copying g0 to the MRF. */
+      brw_push_insn_state(p);
+      brw_set_mask_control(p, BRW_MASK_DISABLE);
      brw_MOV(p, retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD),
 	         retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));

@@ -344,7 +346,7 @@ vec4_generator::generate_tex(vec4_instruction *inst,
 	      retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, inst->base_mrf, 2),
 		     BRW_REGISTER_TYPE_UD),
 	      brw_imm_uw(inst->texture_offset));
-      brw_set_access_mode(p, BRW_ALIGN_16);
+      brw_pop_insn_state(p);
   } else if (inst->header_present) {
      /* Set up an implied move from g0 to the MRF. */
      src = brw_vec8_grf(0, 0);
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -2194,7 +2194,7 @@ vec4_visitor::visit(ir_texture *ir)
      emit(MOV(dst_reg(MRF, param_base, ir->coordinate->type, zero_mask),
 	       src_reg(0)));
      /* Load the shadow comparitor */
-      if (ir->shadow_comparitor) {
+      if (ir->shadow_comparitor && ir->op != ir_txd) {
 	 emit(MOV(dst_reg(MRF, param_base + 1, ir->shadow_comparitor->type,
 			  WRITEMASK_X),
 		  shadow_comparitor));
@@ -2231,12 +2231,18 @@ vec4_visitor::visit(ir_texture *ir)
 	    emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_YW), dPdy));
 	    inst->mlen++;

-	    if (ir->type->vector_elements == 3) {
+	    if (ir->type->vector_elements == 3 || ir->shadow_comparitor) {
 	       dPdx.swizzle = BRW_SWIZZLE_ZZZZ;
 	       dPdy.swizzle = BRW_SWIZZLE_ZZZZ;
 	       emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_X), dPdx));
 	       emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_Y), dPdy));
 	       inst->mlen++;
+
+               if (ir->shadow_comparitor) {
+                  emit(MOV(dst_reg(MRF, param_base + 2,
+                                   ir->shadow_comparitor->type, WRITEMASK_Z),
+                           shadow_comparitor));
+               }
 	    }
 	 } else /* intel->gen == 4 */ {
 	    emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_XYZ), dPdx));
--- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
@@ -68,9 +68,9 @@ brw_upload_vs_pull_constants(struct brw_context *brw)

   /* _NEW_PROGRAM_CONSTANTS */
   drm_intel_bo_unreference(brw->vs.const_bo);
+   uint32_t size = brw->vs.prog_data->nr_pull_params * 4;
   brw->vs.const_bo = drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer",
-					 brw->vs.prog_data->nr_pull_params * 4,
-					 64);
+					 size, 64);

   drm_intel_gem_bo_map_gtt(brw->vs.const_bo);
   for (i = 0; i < brw->vs.prog_data->nr_pull_params; i++) {
@@ -90,8 +90,7 @@ brw_upload_vs_pull_constants(struct brw_context *brw)
   drm_intel_gem_bo_unmap_gtt(brw->vs.const_bo);

   const int surf = SURF_INDEX_VERT_CONST_BUFFER;
-   intel->vtbl.create_constant_surface(brw, brw->vs.const_bo, 0,
-				       ALIGN(brw->vs.prog_data->nr_pull_params, 4) / 4,
+   intel->vtbl.create_constant_surface(brw, brw->vs.const_bo, 0, size,
 				       &brw->vs.surf_offset[surf]);

   brw->state.dirty.brw |= BRW_NEW_VS_CONSTBUF;
--- a/src/mesa/drivers/dri/i965/brw_vtbl.c
+++ b/src/mesa/drivers/dri/i965/brw_vtbl.c
@@ -267,7 +267,9 @@ void brwInitVtbl( struct brw_context *brw )
   assert(brw->intel.gen >= 4);
   if (brw->intel.gen >= 7) {
      gen7_init_vtable_surface_functions(brw);
+      brw->intel.vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz;
   } else if (brw->intel.gen >= 4) {
      gen4_init_vtable_surface_functions(brw);
+      brw->intel.vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz;
   }
 }
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -899,15 +899,17 @@ brw_update_texture_surface(struct gl_context *ctx,
 * Create the constant buffer surface.  Vertex/fragment shader constants will be
 * read from this buffer with Data Port Read instructions/messages.
 */
-void
+static void
 brw_create_constant_surface(struct brw_context *brw,
 			    drm_intel_bo *bo,
 			    uint32_t offset,
-			    int width,
+			    uint32_t size,
 			    uint32_t *out_offset)
 {
   struct intel_context *intel = &brw->intel;
-   const GLint w = width - 1;
+   uint32_t stride = 16;
+   uint32_t elements = ALIGN(size, stride) / stride;
+   const GLint w = elements - 1;
   uint32_t *surf;

   surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
@@ -926,7 +928,7 @@ brw_create_constant_surface(struct brw_context *brw,
 	      ((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT);

   surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
-	      (16 - 1) << BRW_SURFACE_PITCH_SHIFT); /* ignored */
+	      (stride - 1) << BRW_SURFACE_PITCH_SHIFT);

   surf[4] = 0;
   surf[5] = 0;
@@ -1073,8 +1075,7 @@ brw_upload_wm_pull_constants(struct brw_context *brw)
   }
   drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo);

-   intel->vtbl.create_constant_surface(brw, brw->wm.const_bo, 0,
-				       ALIGN(brw->wm.prog_data->nr_pull_params, 4) / 4,
+   intel->vtbl.create_constant_surface(brw, brw->wm.const_bo, 0, size,
 				       &brw->wm.surf_offset[surf_index]);

   brw->state.dirty.brw |= BRW_NEW_SURFACES;
@@ -1426,11 +1427,8 @@ brw_upload_ubo_surfaces(struct brw_context *brw,
       * glBindBufferRange case is undefined, we can just bind the whole buffer
       * glBindBufferBase wants and be a correct implementation.
       */
-      int size = bo->size - binding->Offset;
-      size = ALIGN(size, 16) / 16; /* The interface takes a number of vec4s */
-
      intel->vtbl.create_constant_surface(brw, bo, binding->Offset,
-					  size,
+					  bo->size - binding->Offset,
 					  &surf_offsets[i]);
   }

--- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
@@ -594,6 +594,15 @@ gen6_blorp_emit_vs_disable(struct brw_context *brw,
      intel_emit_post_sync_nonzero_flush(intel);
   }

+   /* Disable the push constant buffers. */
+   BEGIN_BATCH(5);
+   OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (5 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
   BEGIN_BATCH(6);
   OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
   OUT_BATCH(0);
@@ -615,6 +624,15 @@ gen6_blorp_emit_gs_disable(struct brw_context *brw,
 {
   struct intel_context *intel = &brw->intel;

+   /* Disable all the constant buffers. */
+   BEGIN_BATCH(5);
+   OUT_BATCH(_3DSTATE_CONSTANT_GS << 16 | (5 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
   BEGIN_BATCH(7);
   OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
   OUT_BATCH(0);
@@ -797,6 +815,21 @@ gen6_blorp_emit_constant_ps(struct brw_context *brw,
   ADVANCE_BATCH();
 }

+static void
+gen6_blorp_emit_constant_ps_disable(struct brw_context *brw,
+                                    const brw_blorp_params *params)
+{
+   struct intel_context *intel = &brw->intel;
+
+   /* Disable the push constant buffers. */
+   BEGIN_BATCH(5);
+   OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (5 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}

 /**
 * 3DSTATE_BINDING_TABLE_POINTERS
@@ -1074,6 +1107,8 @@ gen6_blorp_exec(struct intel_context *intel,
   gen6_blorp_emit_sf_config(brw, params);
   if (params->use_wm_prog)
      gen6_blorp_emit_constant_ps(brw, params, wm_push_const_offset);
+   else
+      gen6_blorp_emit_constant_ps_disable(brw, params);
   gen6_blorp_emit_wm_config(brw, params, prog_offset, prog_data);
   if (params->use_wm_prog)
      gen6_blorp_emit_binding_table_pointers(brw, params, wm_bind_bo_offset);
--- a/src/mesa/drivers/dri/i965/gen6_cc.c
+++ b/src/mesa/drivers/dri/i965/gen6_cc.c
@@ -33,6 +33,7 @@
 #include "main/macros.h"
 #include "main/enums.h"
 #include "main/glformats.h"
+#include "main/stencil.h"

 static void
 gen6_upload_blend_state(struct brw_context *brw)
@@ -251,8 +252,8 @@ gen6_upload_color_calc_state(struct brw_context *brw)
   UNCLAMPED_FLOAT_TO_UBYTE(cc->cc1.alpha_ref_fi.ui, ctx->Color.AlphaRef);

   /* _NEW_STENCIL */
-   cc->cc0.stencil_ref = ctx->Stencil.Ref[0];
-   cc->cc0.bf_stencil_ref = ctx->Stencil.Ref[ctx->Stencil._BackFace];
+   cc->cc0.stencil_ref = _mesa_get_stencil_ref(ctx, 0);
+   cc->cc0.bf_stencil_ref = _mesa_get_stencil_ref(ctx, ctx->Stencil._BackFace);

   /* _NEW_COLOR */
   cc->constant_r = ctx->Color.BlendColorUnclamped[0];
--- a/src/mesa/drivers/dri/i965/gen6_sol.c
+++ b/src/mesa/drivers/dri/i965/gen6_sol.c
@@ -147,7 +147,7 @@ gen6_update_sol_indices(struct brw_context *brw)
 const struct brw_tracked_state gen6_sol_indices = {
   .dirty = {
      .mesa = 0,
-      .brw = (BRW_NEW_BATCH |
+      .brw = (BRW_NEW_CONTEXT |
              BRW_NEW_SOL_INDICES),
      .cache = 0
   },
@@ -159,6 +159,7 @@ brw_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
 			     struct gl_transform_feedback_object *obj)
 {
   struct brw_context *brw = brw_context(ctx);
+   struct intel_context *intel = &brw->intel;
   const struct gl_shader_program *vs_prog =
      ctx->Shader.CurrentVertexProgram;
   const struct gl_transform_feedback_info *linked_xfb_info =
@@ -180,6 +181,14 @@ brw_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
   brw->sol.svbi_0_starting_index = 0;
   brw->sol.svbi_0_max_index = max_index;
   brw->sol.offset_0_batch_start = 0;
+
+   if (intel->gen >= 7) {
+      /* Ask the kernel to reset the SO offsets for any previous transform
+       * feedback, so we start at the start of the user's buffer. (note: these
+       * are not the query counters)
+       */
+      intel->batch.needs_sol_reset = true;
+   }
 }

 void
--- a/src/mesa/drivers/dri/i965/gen6_urb.c
+++ b/src/mesa/drivers/dri/i965/gen6_urb.c
@@ -54,7 +54,7 @@ gen6_upload_urb( struct brw_context *brw )
   int total_urb_size = brw->urb.size * 1024; /* in bytes */

   /* CACHE_NEW_VS_PROG */
-   brw->urb.vs_size = MAX2(brw->vs.prog_data->urb_entry_size, 1);
+   unsigned vs_size = MAX2(brw->vs.prog_data->urb_entry_size, 1);

   /* We use the same VUE layout for VS outputs and GS outputs (as it's what
    * the SF and Clipper expect), so we can simply make the GS URB entry size
@@ -62,14 +62,14 @@ gen6_upload_urb( struct brw_context *brw )
    * where we have few vertex attributes and a lot of varyings, since the VS
    * size is determined by the larger of the two.  For now, it's safe.
    */
-   brw->urb.gs_size = brw->urb.vs_size;
+   unsigned gs_size = vs_size;

   /* Calculate how many entries fit in each stage's section of the URB */
   if (brw->gs.prog_active) {
-      nr_vs_entries = (total_urb_size/2) / (brw->urb.vs_size * 128);
-      nr_gs_entries = (total_urb_size/2) / (brw->urb.gs_size * 128);
+      nr_vs_entries = (total_urb_size/2) / (vs_size * 128);
+      nr_gs_entries = (total_urb_size/2) / (gs_size * 128);
   } else {
-      nr_vs_entries = total_urb_size / (brw->urb.vs_size * 128);
+      nr_vs_entries = total_urb_size / (vs_size * 128);
      nr_gs_entries = 0;
   }

@@ -87,14 +87,14 @@ gen6_upload_urb( struct brw_context *brw )
   assert(brw->urb.nr_vs_entries >= 24);
   assert(brw->urb.nr_vs_entries % 4 == 0);
   assert(brw->urb.nr_gs_entries % 4 == 0);
-   assert(brw->urb.vs_size < 5);
-   assert(brw->urb.gs_size < 5);
+   assert(vs_size < 5);
+   assert(gs_size < 5);

   BEGIN_BATCH(3);
   OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2));
-   OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_VS_SIZE_SHIFT) |
+   OUT_BATCH(((vs_size - 1) << GEN6_URB_VS_SIZE_SHIFT) |
 	     ((brw->urb.nr_vs_entries) << GEN6_URB_VS_ENTRIES_SHIFT));
-   OUT_BATCH(((brw->urb.gs_size - 1) << GEN6_URB_GS_SIZE_SHIFT) |
+   OUT_BATCH(((gs_size - 1) << GEN6_URB_GS_SIZE_SHIFT) |
 	     ((brw->urb.nr_gs_entries) << GEN6_URB_GS_ENTRIES_SHIFT));
   ADVANCE_BATCH();

--- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
@@ -276,6 +276,37 @@ gen7_blorp_emit_sampler_state(struct brw_context *brw,
 }


+/* 3DSTATE_VS
+ *
+ * Disable vertex shader.
+ */
+static void
+gen7_blorp_emit_vs_disable(struct brw_context *brw,
+                           const brw_blorp_params *params)
+{
+   struct intel_context *intel = &brw->intel;
+
+   BEGIN_BATCH(7);
+   OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (7 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
+   BEGIN_BATCH(6);
+   OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}
+
+
 /* 3DSTATE_HS
 *
 * Disable the hull shader.
@@ -286,6 +317,16 @@ gen7_blorp_emit_hs_disable(struct brw_context *brw,
 {
   struct intel_context *intel = &brw->intel;

+   BEGIN_BATCH(7);
+   OUT_BATCH(_3DSTATE_CONSTANT_HS << 16 | (7 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
   BEGIN_BATCH(7);
   OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2));
   OUT_BATCH(0);
@@ -327,6 +368,16 @@ gen7_blorp_emit_ds_disable(struct brw_context *brw,
 {
   struct intel_context *intel = &brw->intel;

+   BEGIN_BATCH(7);
+   OUT_BATCH(_3DSTATE_CONSTANT_DS << 16 | (7 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
   BEGIN_BATCH(6);
   OUT_BATCH(_3DSTATE_DS << 16 | (6 - 2));
   OUT_BATCH(0);
@@ -337,6 +388,36 @@ gen7_blorp_emit_ds_disable(struct brw_context *brw,
   ADVANCE_BATCH();
 }

+/* 3DSTATE_GS
+ *
+ * Disable the geometry shader.
+ */
+static void
+gen7_blorp_emit_gs_disable(struct brw_context *brw,
+                           const brw_blorp_params *params)
+{
+   struct intel_context *intel = &brw->intel;
+
+   BEGIN_BATCH(7);
+   OUT_BATCH(_3DSTATE_CONSTANT_GS << 16 | (7 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
+   BEGIN_BATCH(7);
+   OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}

 /* 3DSTATE_STREAMOUT
 *
@@ -573,6 +654,22 @@ gen7_blorp_emit_constant_ps(struct brw_context *brw,
   ADVANCE_BATCH();
 }

+static void
+gen7_blorp_emit_constant_ps_disable(struct brw_context *brw,
+                                    const brw_blorp_params *params)
+{
+   struct intel_context *intel = &brw->intel;
+
+   BEGIN_BATCH(7);
+   OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (7 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}

 static void
 gen7_blorp_emit_depth_stencil_config(struct brw_context *brw,
@@ -777,11 +874,11 @@ gen7_blorp_exec(struct intel_context *intel,
                                       wm_surf_offset_texture);
      sampler_offset = gen7_blorp_emit_sampler_state(brw, params);
   }
-   gen6_blorp_emit_vs_disable(brw, params);
+   gen7_blorp_emit_vs_disable(brw, params);
   gen7_blorp_emit_hs_disable(brw, params);
   gen7_blorp_emit_te_disable(brw, params);
   gen7_blorp_emit_ds_disable(brw, params);
-   gen6_blorp_emit_gs_disable(brw, params);
+   gen7_blorp_emit_gs_disable(brw, params);
   gen7_blorp_emit_streamout_disable(brw, params);
   gen6_blorp_emit_clip_disable(brw, params);
   gen7_blorp_emit_sf_config(brw, params);
@@ -791,6 +888,8 @@ gen7_blorp_exec(struct intel_context *intel,
                                                wm_bind_bo_offset);
      gen7_blorp_emit_sampler_state_pointers_ps(brw, params, sampler_offset);
      gen7_blorp_emit_constant_ps(brw, params, wm_push_const_offset);
+   } else {
+      gen7_blorp_emit_constant_ps_disable(brw, params);
   }
   gen7_blorp_emit_ps_config(brw, params, prog_offset, prog_data);
   gen7_blorp_emit_cc_viewport(brw, params);
--- a/src/mesa/drivers/dri/i965/gen7_misc_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_misc_state.c
@@ -29,77 +29,46 @@
 #include "brw_state.h"
 #include "brw_defines.h"

-static void emit_depthbuffer(struct brw_context *brw)
+void
+gen7_emit_depth_stencil_hiz(struct brw_context *brw,
+                            struct intel_mipmap_tree *depth_mt,
+                            uint32_t depth_offset, uint32_t depthbuffer_format,
+                            uint32_t depth_surface_type,
+                            struct intel_mipmap_tree *stencil_mt,
+                            struct intel_mipmap_tree *hiz_mt,
+                            bool separate_stencil, uint32_t width,
+                            uint32_t height, uint32_t tile_x, uint32_t tile_y)
 {
   struct intel_context *intel = &brw->intel;
   struct gl_context *ctx = &intel->ctx;
-   struct gl_framebuffer *fb = ctx->DrawBuffer;
-
-   /* _NEW_BUFFERS */
-   struct intel_renderbuffer *drb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
-   struct intel_renderbuffer *srb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
-   struct intel_mipmap_tree *depth_mt = brw->depthstencil.depth_mt;
-   struct intel_mipmap_tree *stencil_mt = brw->depthstencil.stencil_mt;
-   struct intel_mipmap_tree *hiz_mt = brw->depthstencil.hiz_mt;
-   uint32_t tile_x = brw->depthstencil.tile_x;
-   uint32_t tile_y = brw->depthstencil.tile_y;
-
-   /* Gen7 only supports separate stencil */
-   assert(!stencil_mt || stencil_mt->format == MESA_FORMAT_S8);
-   assert(!depth_mt || !_mesa_is_format_packed_depth_stencil(depth_mt->format));

   intel_emit_depth_stall_flushes(intel);

-   if (depth_mt == NULL) {
-      uint32_t dw1 = BRW_DEPTHFORMAT_D32_FLOAT << 18;
-      uint32_t dw3 = 0;
+   /* _NEW_DEPTH, _NEW_STENCIL */
+   BEGIN_BATCH(7);
+   OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
+   OUT_BATCH((depth_mt ? depth_mt->region->pitch - 1 : 0) |
+             (depthbuffer_format << 18) |
+             ((hiz_mt ? 1 : 0) << 22) |
+             ((stencil_mt != NULL && ctx->Stencil._WriteEnabled) << 27) |
+             ((ctx->Depth.Mask != 0) << 28) |
+             (depth_surface_type << 29));

-      if (stencil_mt == NULL) {
-	 dw1 |= (BRW_SURFACE_NULL << 29);
-      } else {
-	 /* _NEW_STENCIL: enable stencil buffer writes */
-	 dw1 |= ((ctx->Stencil.WriteMask != 0) << 27);
-
-	 /* 3DSTATE_STENCIL_BUFFER inherits surface type and dimensions. */
-	 dw1 |= (BRW_SURFACE_2D << 29);
-	 dw3 = ((srb->Base.Base.Width + tile_x - 1) << 4) |
-	       ((srb->Base.Base.Height + tile_y - 1) << 18);
-      }
-
-      BEGIN_BATCH(7);
-      OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
-      OUT_BATCH(dw1);
-      OUT_BATCH(0);
-      OUT_BATCH(dw3);
-      OUT_BATCH(0);
-      OUT_BATCH(tile_x | (tile_y << 16));
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-   } else {
-      struct intel_region *region = depth_mt->region;
-
-      assert(region->tiling == I915_TILING_Y);
-
-      /* _NEW_DEPTH, _NEW_STENCIL */
-      BEGIN_BATCH(7);
-      OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
-      OUT_BATCH((region->pitch - 1) |
-		(brw_depthbuffer_format(brw) << 18) |
-		((hiz_mt ? 1 : 0) << 22) | /* hiz enable */
-		((stencil_mt != NULL && ctx->Stencil.WriteMask != 0) << 27) |
-		((ctx->Depth.Mask != 0) << 28) |
-		(BRW_SURFACE_2D << 29));
-      OUT_RELOC(region->bo,
+   if (depth_mt) {
+      OUT_RELOC(depth_mt->region->bo,
 	        I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-		brw->depthstencil.depth_offset);
-      OUT_BATCH((((drb->Base.Base.Width + tile_x) - 1) << 4) |
-                (((drb->Base.Base.Height + tile_y) - 1) << 18));
+		depth_offset);
+   } else {
      OUT_BATCH(0);
-      OUT_BATCH(tile_x | (tile_y << 16));
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
   }

+   OUT_BATCH(((width + tile_x - 1) << 4) |
+             ((height + tile_y - 1) << 18));
+   OUT_BATCH(0);
+   OUT_BATCH(tile_x | (tile_y << 16));
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
   if (hiz_mt == NULL) {
      BEGIN_BATCH(3);
      OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2));
@@ -166,5 +135,5 @@ const struct brw_tracked_state gen7_depthbuffer = {
      .brw = BRW_NEW_BATCH,
      .cache = 0,
   },
-   .emit = emit_depthbuffer,
+   .emit = brw_emit_depthbuffer,
 };
--- a/src/mesa/drivers/dri/i965/gen7_sol_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sol_state.c
@@ -82,12 +82,14 @@ upload_3dstate_so_buffers(struct brw_context *brw)
      end = ALIGN(start + xfb_obj->Size[i], 4);
      assert(end <= bo->size);

-      /* Offset the starting offset by the current vertex index into the
-       * feedback buffer, offset register is always set to 0 at the start of the
-       * batchbuffer.
+      /* If we don't have hardware contexts, then we reset our offsets at the
+       * start of every batch, so we track the number of vertices written in
+       * software and increment our pointers by that many.
       */
-      start += brw->sol.offset_0_batch_start * stride;
-      assert(start <= end);
+      if (!intel->hw_ctx) {
+         start += brw->sol.offset_0_batch_start * stride;
+         assert(start <= end);
+      }

      BEGIN_BATCH(4);
      OUT_BATCH(_3DSTATE_SO_BUFFER << 16 | (4 - 2));
@@ -244,7 +246,11 @@ upload_sol_state(struct brw_context *brw)
      /* CACHE_NEW_VS_PROG */
      upload_3dstate_so_decl_list(brw, &brw->vs.prog_data->vue_map);

-      intel->batch.needs_sol_reset = true;
+      /* If we don't have hardware contexts, then some other client may have
+       * changed the SO write offsets, and we need to rewrite them.
+       */
+      if (!intel->hw_ctx)
+         intel->batch.needs_sol_reset = true;
   }

   /* Finally, set up the SOL stage.  This command must always follow updates to
--- a/src/mesa/drivers/dri/i965/gen7_urb.c
+++ b/src/mesa/drivers/dri/i965/gen7_urb.c
@@ -39,29 +39,37 @@
 * +-------------------------------------------------------------+
 *
 * Notably, push constants must be stored at the beginning of the URB
- * space, while entries can be stored anywhere.  Ivybridge has a maximum
- * constant buffer size of 16kB.
+ * space, while entries can be stored anywhere.  Ivybridge and Haswell
+ * GT1/GT2 have a maximum constant buffer size of 16kB, while Haswell GT3
+ * doubles this (32kB).
 *
 * Currently we split the constant buffer space evenly between VS and FS.
 * This is probably not ideal, but simple.
 *
- * Ivybridge GT1 has 128kB of URB space.
- * Ivybridge GT2 has 256kB of URB space.
+ * Ivybridge GT1 and Haswell GT1 have 128kB of URB space.
+ * Ivybridge GT2 and Haswell GT2 have 256kB of URB space.
+ * Haswell GT3 has 512kB of URB space.
 *
- * See "Volume 2a: 3D Pipeline," section 1.8.
+ * See "Volume 2a: 3D Pipeline," section 1.8, "Volume 1b: Configurations",
+ * and the documentation for 3DSTATE_PUSH_CONSTANT_ALLOC_xS.
 */
 void
 gen7_allocate_push_constants(struct brw_context *brw)
 {
   struct intel_context *intel = &brw->intel;
+
+   unsigned size = 8;
+   if (intel->is_haswell && intel->gt == 3)
+      size = 16;
+
   BEGIN_BATCH(2);
   OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_VS << 16 | (2 - 2));
-   OUT_BATCH(8);
+   OUT_BATCH(size);
   ADVANCE_BATCH();

   BEGIN_BATCH(2);
   OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_PS << 16 | (2 - 2));
-   OUT_BATCH(8 | 8 << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
+   OUT_BATCH(size | size << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
   ADVANCE_BATCH();
 }

@@ -78,13 +86,15 @@ static void
 gen7_upload_urb(struct brw_context *brw)
 {
   struct intel_context *intel = &brw->intel;
+   const int push_size_kB = intel->is_haswell && intel->gt == 3 ? 32 : 16;
+
   /* Total space for entries is URB size - 16kB for push constants */
-   int handle_region_size = (brw->urb.size - 16) * 1024; /* bytes */
+   int handle_region_size = (brw->urb.size - push_size_kB) * 1024; /* bytes */

   /* CACHE_NEW_VS_PROG */
-   brw->urb.vs_size = MAX2(brw->vs.prog_data->urb_entry_size, 1);
+   unsigned vs_size = MAX2(brw->vs.prog_data->urb_entry_size, 1);

-   int nr_vs_entries = handle_region_size / (brw->urb.vs_size * 64);
+   int nr_vs_entries = handle_region_size / (vs_size * 64);
   if (nr_vs_entries > brw->urb.max_vs_entries)
      nr_vs_entries = brw->urb.max_vs_entries;

@@ -92,7 +102,7 @@ gen7_upload_urb(struct brw_context *brw)
   brw->urb.nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, 8);

   /* URB Starting Addresses are specified in multiples of 8kB. */
-   brw->urb.vs_start = 2; /* skip over push constants */
+   brw->urb.vs_start = push_size_kB / 8; /* skip over push constants */

   assert(brw->urb.nr_vs_entries % 8 == 0);
   assert(brw->urb.nr_gs_entries % 8 == 0);
@@ -100,8 +110,7 @@ gen7_upload_urb(struct brw_context *brw)
   assert(!brw->gs.prog_active);

   gen7_emit_vs_workaround_flush(intel);
-   gen7_emit_urb_state(brw, brw->urb.nr_vs_entries, brw->urb.vs_size,
-                       brw->urb.vs_start);
+   gen7_emit_urb_state(brw, brw->urb.nr_vs_entries, vs_size, brw->urb.vs_start);
 }

 void
--- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
@@ -372,11 +372,13 @@ static void
 gen7_create_constant_surface(struct brw_context *brw,
 			     drm_intel_bo *bo,
 			     uint32_t offset,
-			     int width,
+			     uint32_t size,
 			     uint32_t *out_offset)
 {
   struct intel_context *intel = &brw->intel;
-   const GLint w = width - 1;
+   uint32_t stride = 16;
+   uint32_t elements = ALIGN(size, stride) / stride;
+   const GLint w = elements - 1;

   uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
                                    8 * 4, 32, out_offset);
@@ -392,7 +394,7 @@ gen7_create_constant_surface(struct brw_context *brw,
   surf[2] = SET_FIELD(w & 0x7f, GEN7_SURFACE_WIDTH) |
             SET_FIELD((w >> 7) & 0x1fff, GEN7_SURFACE_HEIGHT);
   surf[3] = SET_FIELD((w >> 20) & 0x7f, BRW_SURFACE_DEPTH) |
-             (16 - 1); /* stride between samples */
+             (stride - 1);

   if (intel->is_haswell) {
      surf[7] = SET_FIELD(HSW_SCS_RED,   GEN7_SURFACE_SCS_R) |
--- a/src/mesa/drivers/dri/intel/intel_chipset.h
+++ b/src/mesa/drivers/dri/intel/intel_chipset.h
@@ -87,42 +87,72 @@
 #define PCI_CHIP_IVYBRIDGE_S_GT1        0x015a  /* Server */
 #define PCI_CHIP_IVYBRIDGE_S_GT2        0x016a

+#define PCI_CHIP_BAYTRAIL_M_1           0x0F31
+#define PCI_CHIP_BAYTRAIL_M_2           0x0F32
+#define PCI_CHIP_BAYTRAIL_M_3           0x0F33
+#define PCI_CHIP_BAYTRAIL_M_4           0x0157
+#define PCI_CHIP_BAYTRAIL_D             0x0155
+
 #define PCI_CHIP_HASWELL_GT1            0x0402 /* Desktop */
 #define PCI_CHIP_HASWELL_GT2            0x0412
-#define PCI_CHIP_HASWELL_GT2_PLUS       0x0422
+#define PCI_CHIP_HASWELL_GT3            0x0422
 #define PCI_CHIP_HASWELL_M_GT1          0x0406 /* Mobile */
 #define PCI_CHIP_HASWELL_M_GT2          0x0416
-#define PCI_CHIP_HASWELL_M_GT2_PLUS     0x0426
+#define PCI_CHIP_HASWELL_M_GT3          0x0426
 #define PCI_CHIP_HASWELL_S_GT1          0x040A /* Server */
 #define PCI_CHIP_HASWELL_S_GT2          0x041A
-#define PCI_CHIP_HASWELL_S_GT2_PLUS     0x042A
+#define PCI_CHIP_HASWELL_S_GT3          0x042A
+#define PCI_CHIP_HASWELL_B_GT1          0x040B /* Reserved */
+#define PCI_CHIP_HASWELL_B_GT2          0x041B
+#define PCI_CHIP_HASWELL_B_GT3          0x042B
+#define PCI_CHIP_HASWELL_E_GT1          0x040E /* Reserved */
+#define PCI_CHIP_HASWELL_E_GT2          0x041E
+#define PCI_CHIP_HASWELL_E_GT3          0x042E
 #define PCI_CHIP_HASWELL_SDV_GT1        0x0C02 /* Desktop */
 #define PCI_CHIP_HASWELL_SDV_GT2        0x0C12
-#define PCI_CHIP_HASWELL_SDV_GT2_PLUS   0x0C22
+#define PCI_CHIP_HASWELL_SDV_GT3        0x0C22
 #define PCI_CHIP_HASWELL_SDV_M_GT1      0x0C06 /* Mobile */
 #define PCI_CHIP_HASWELL_SDV_M_GT2      0x0C16
-#define PCI_CHIP_HASWELL_SDV_M_GT2_PLUS 0x0C26
+#define PCI_CHIP_HASWELL_SDV_M_GT3      0x0C26
 #define PCI_CHIP_HASWELL_SDV_S_GT1      0x0C0A /* Server */
 #define PCI_CHIP_HASWELL_SDV_S_GT2      0x0C1A
-#define PCI_CHIP_HASWELL_SDV_S_GT2_PLUS 0x0C2A
+#define PCI_CHIP_HASWELL_SDV_S_GT3      0x0C2A
+#define PCI_CHIP_HASWELL_SDV_B_GT1      0x0C0B /* Reserved */
+#define PCI_CHIP_HASWELL_SDV_B_GT2      0x0C1B
+#define PCI_CHIP_HASWELL_SDV_B_GT3      0x0C2B
+#define PCI_CHIP_HASWELL_SDV_E_GT1      0x0C0E /* Reserved */
+#define PCI_CHIP_HASWELL_SDV_E_GT2      0x0C1E
+#define PCI_CHIP_HASWELL_SDV_E_GT3      0x0C2E
 #define PCI_CHIP_HASWELL_ULT_GT1        0x0A02 /* Desktop */
 #define PCI_CHIP_HASWELL_ULT_GT2        0x0A12
-#define PCI_CHIP_HASWELL_ULT_GT2_PLUS   0x0A22
+#define PCI_CHIP_HASWELL_ULT_GT3        0x0A22
 #define PCI_CHIP_HASWELL_ULT_M_GT1      0x0A06 /* Mobile */
 #define PCI_CHIP_HASWELL_ULT_M_GT2      0x0A16
-#define PCI_CHIP_HASWELL_ULT_M_GT2_PLUS 0x0A26
+#define PCI_CHIP_HASWELL_ULT_M_GT3      0x0A26
 #define PCI_CHIP_HASWELL_ULT_S_GT1      0x0A0A /* Server */
 #define PCI_CHIP_HASWELL_ULT_S_GT2      0x0A1A
-#define PCI_CHIP_HASWELL_ULT_S_GT2_PLUS 0x0A2A
+#define PCI_CHIP_HASWELL_ULT_S_GT3      0x0A2A
+#define PCI_CHIP_HASWELL_ULT_B_GT1      0x0A0B /* Reserved */
+#define PCI_CHIP_HASWELL_ULT_B_GT2      0x0A1B
+#define PCI_CHIP_HASWELL_ULT_B_GT3      0x0A2B
+#define PCI_CHIP_HASWELL_ULT_E_GT1      0x0A0E /* Reserved */
+#define PCI_CHIP_HASWELL_ULT_E_GT2      0x0A1E
+#define PCI_CHIP_HASWELL_ULT_E_GT3      0x0A2E
 #define PCI_CHIP_HASWELL_CRW_GT1        0x0D02 /* Desktop */
 #define PCI_CHIP_HASWELL_CRW_GT2        0x0D12
-#define PCI_CHIP_HASWELL_CRW_GT2_PLUS   0x0D22
+#define PCI_CHIP_HASWELL_CRW_GT3        0x0D22
 #define PCI_CHIP_HASWELL_CRW_M_GT1      0x0D06 /* Mobile */
 #define PCI_CHIP_HASWELL_CRW_M_GT2      0x0D16
-#define PCI_CHIP_HASWELL_CRW_M_GT2_PLUS 0x0D26
+#define PCI_CHIP_HASWELL_CRW_M_GT3      0x0D26
 #define PCI_CHIP_HASWELL_CRW_S_GT1      0x0D0A /* Server */
 #define PCI_CHIP_HASWELL_CRW_S_GT2      0x0D1A
-#define PCI_CHIP_HASWELL_CRW_S_GT2_PLUS 0x0D2A
+#define PCI_CHIP_HASWELL_CRW_S_GT3      0x0D2A
+#define PCI_CHIP_HASWELL_CRW_B_GT1      0x0D0B /* Reserved */
+#define PCI_CHIP_HASWELL_CRW_B_GT2      0x0D1B
+#define PCI_CHIP_HASWELL_CRW_B_GT3      0x0D2B
+#define PCI_CHIP_HASWELL_CRW_E_GT1      0x0D0E /* Reserved */
+#define PCI_CHIP_HASWELL_CRW_E_GT2      0x0D1E
+#define PCI_CHIP_HASWELL_CRW_E_GT3      0x0D2E

 #define IS_MOBILE(devid)	(devid == PCI_CHIP_I855_GM || \
 				 devid == PCI_CHIP_I915_GM || \
@@ -190,47 +220,80 @@

 #define IS_IVYBRIDGE(devid)     (IS_IVB_GT1(devid) || IS_IVB_GT2(devid))

+#define IS_BAYTRAIL(devid)      (devid == PCI_CHIP_BAYTRAIL_M_1 || \
+                                 devid == PCI_CHIP_BAYTRAIL_M_2 || \
+                                 devid == PCI_CHIP_BAYTRAIL_M_3 || \
+                                 devid == PCI_CHIP_BAYTRAIL_M_4 || \
+                                 devid == PCI_CHIP_BAYTRAIL_D)
+
 #define IS_GEN7(devid)	        (IS_IVYBRIDGE(devid) || \
+				 IS_BAYTRAIL(devid) || \
 				 IS_HASWELL(devid))

 #define IS_HSW_GT1(devid)	(devid == PCI_CHIP_HASWELL_GT1 || \
 				 devid == PCI_CHIP_HASWELL_M_GT1 || \
 				 devid == PCI_CHIP_HASWELL_S_GT1 || \
+				 devid == PCI_CHIP_HASWELL_B_GT1 || \
+				 devid == PCI_CHIP_HASWELL_E_GT1 || \
 				 devid == PCI_CHIP_HASWELL_SDV_GT1 || \
 				 devid == PCI_CHIP_HASWELL_SDV_M_GT1 || \
 				 devid == PCI_CHIP_HASWELL_SDV_S_GT1 || \
+				 devid == PCI_CHIP_HASWELL_SDV_B_GT1 || \
+				 devid == PCI_CHIP_HASWELL_SDV_E_GT1 || \
 				 devid == PCI_CHIP_HASWELL_ULT_GT1 || \
 				 devid == PCI_CHIP_HASWELL_ULT_M_GT1 || \
 				 devid == PCI_CHIP_HASWELL_ULT_S_GT1 || \
+				 devid == PCI_CHIP_HASWELL_ULT_B_GT1 || \
+				 devid == PCI_CHIP_HASWELL_ULT_E_GT1 || \
 				 devid == PCI_CHIP_HASWELL_CRW_GT1 || \
 				 devid == PCI_CHIP_HASWELL_CRW_M_GT1 || \
-				 devid == PCI_CHIP_HASWELL_CRW_S_GT1)
+				 devid == PCI_CHIP_HASWELL_CRW_S_GT1 || \
+				 devid == PCI_CHIP_HASWELL_CRW_B_GT1 || \
+				 devid == PCI_CHIP_HASWELL_CRW_E_GT1)
 #define IS_HSW_GT2(devid)	(devid == PCI_CHIP_HASWELL_GT2 || \
 				 devid == PCI_CHIP_HASWELL_M_GT2 || \
 				 devid == PCI_CHIP_HASWELL_S_GT2 || \
+				 devid == PCI_CHIP_HASWELL_B_GT2 || \
+				 devid == PCI_CHIP_HASWELL_E_GT2 || \
 				 devid == PCI_CHIP_HASWELL_SDV_GT2 || \
 				 devid == PCI_CHIP_HASWELL_SDV_M_GT2 || \
 				 devid == PCI_CHIP_HASWELL_SDV_S_GT2 || \
+				 devid == PCI_CHIP_HASWELL_SDV_B_GT2 || \
+				 devid == PCI_CHIP_HASWELL_SDV_E_GT2 || \
 				 devid == PCI_CHIP_HASWELL_ULT_GT2 || \
 				 devid == PCI_CHIP_HASWELL_ULT_M_GT2 || \
 				 devid == PCI_CHIP_HASWELL_ULT_S_GT2 || \
+				 devid == PCI_CHIP_HASWELL_ULT_B_GT2 || \
+				 devid == PCI_CHIP_HASWELL_ULT_E_GT2 || \
 				 devid == PCI_CHIP_HASWELL_CRW_GT2 || \
 				 devid == PCI_CHIP_HASWELL_CRW_M_GT2 || \
 				 devid == PCI_CHIP_HASWELL_CRW_S_GT2 || \
-				 devid == PCI_CHIP_HASWELL_M_GT2_PLUS || \
-				 devid == PCI_CHIP_HASWELL_S_GT2_PLUS || \
-				 devid == PCI_CHIP_HASWELL_SDV_GT2_PLUS || \
-				 devid == PCI_CHIP_HASWELL_SDV_M_GT2_PLUS || \
-				 devid == PCI_CHIP_HASWELL_SDV_S_GT2_PLUS || \
-				 devid == PCI_CHIP_HASWELL_ULT_GT2_PLUS || \
-				 devid == PCI_CHIP_HASWELL_ULT_M_GT2_PLUS || \
-				 devid == PCI_CHIP_HASWELL_ULT_S_GT2_PLUS || \
-				 devid == PCI_CHIP_HASWELL_CRW_GT2_PLUS || \
-				 devid == PCI_CHIP_HASWELL_CRW_M_GT2_PLUS || \
-				 devid == PCI_CHIP_HASWELL_CRW_S_GT2_PLUS)
+				 devid == PCI_CHIP_HASWELL_CRW_B_GT2 || \
+				 devid == PCI_CHIP_HASWELL_CRW_E_GT2)
+#define IS_HSW_GT3(devid)	(devid == PCI_CHIP_HASWELL_GT3 || \
+				 devid == PCI_CHIP_HASWELL_M_GT3 || \
+				 devid == PCI_CHIP_HASWELL_S_GT3 || \
+				 devid == PCI_CHIP_HASWELL_B_GT3 || \
+				 devid == PCI_CHIP_HASWELL_E_GT3 || \
+				 devid == PCI_CHIP_HASWELL_SDV_GT3 || \
+				 devid == PCI_CHIP_HASWELL_SDV_M_GT3 || \
+				 devid == PCI_CHIP_HASWELL_SDV_S_GT3 || \
+				 devid == PCI_CHIP_HASWELL_SDV_B_GT3 || \
+				 devid == PCI_CHIP_HASWELL_SDV_E_GT3 || \
+				 devid == PCI_CHIP_HASWELL_ULT_GT3 || \
+				 devid == PCI_CHIP_HASWELL_ULT_M_GT3 || \
+				 devid == PCI_CHIP_HASWELL_ULT_S_GT3 || \
+				 devid == PCI_CHIP_HASWELL_ULT_B_GT3 || \
+				 devid == PCI_CHIP_HASWELL_ULT_E_GT3 || \
+				 devid == PCI_CHIP_HASWELL_CRW_GT3 || \
+				 devid == PCI_CHIP_HASWELL_CRW_M_GT3 || \
+				 devid == PCI_CHIP_HASWELL_CRW_S_GT3 || \
+				 devid == PCI_CHIP_HASWELL_CRW_B_GT3 || \
+				 devid == PCI_CHIP_HASWELL_CRW_E_GT3)

 #define IS_HASWELL(devid)       (IS_HSW_GT1(devid) || \
-				 IS_HSW_GT2(devid))
+				 IS_HSW_GT2(devid) || \
+				 IS_HSW_GT3(devid))

 #define IS_965(devid)		(IS_GEN4(devid) || \
 				 IS_G4X(devid) || \
--- a/Show More
+++ b/Show More