docs: 9.1.4 release notes

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
mesa: Bump version to 9.1.4
2013-07-01 14:05:00 -07:00 · 2013-07-01 13:58:56 -07:00 · 2013-07-01 08:49:08 +02:00 · 2013-06-30 21:41:57 -07:00 · 2013-06-29 15:21:29 -07:00 · 2013-06-27 13:17:20 -07:00
105 changed files with 1313 additions and 534 deletions
--- a/Makefile.am
+++ b/Makefile.am
@@ -36,7 +36,7 @@ check-local:

 # Rules for making release tarballs

-PACKAGE_VERSION=9.1.3
+PACKAGE_VERSION=9.1.4
 PACKAGE_DIR = Mesa-$(PACKAGE_VERSION)
 PACKAGE_NAME = MesaLib-$(PACKAGE_VERSION)

--- a/bin/.cherry-ignore
+++ b/bin/.cherry-ignore
@@ -14,3 +14,19 @@ dbf94d105a48b7aafb2c8cf64d8b4392d87efea1 glsl: Replace constant-index vector arr

 # This patch was backported as c3eb301
 a8246927e35a49097f70cffb7fa8dd05ec1365e1 r600g: Fix UMAD on Cayman
+
+# These patches cannot be backported without other, too invasive changes
+eb19163a4dd3d7bfeed63229820c926f99ed00d9 radeonsi: Initial support for multiple constant buffers
+e3befbca5ed9f22effcdc91c5886c86b644bc190 radeonsi: Handle TGSI_SEMANTIC_CLIPVERTEX
+
+# These patches are performance improvements that are difficult to backport and cause regressions
+740350c982bd2735b9eb9063c2b91856b6f1ad31 i965: Make the fragment shader pull constants index by dwords, not vec4s.
+dca5fc14358a8b267b3854c39c976a822885898f i965/fs: Improve performance of varying-index uniform loads on IVB.
+70b27e0e4b5d15e575ea477d63c0f6cb19d645c2 i965/fs: Use LD messages for pre-gen7 varying-index uniform loads
+62501c3af85089b423218a41a2e2433ac849c2d3 i965/fs: Allow CSE on pre-gen7 varying-index uniform loads
+
+# Reverted in master
+98dfd59a0445666060c97b0dccaf0e9f030b547a i965: fix problem with constant out of bounds access (v2)
+
+# Already cherry-picked, but squashed with the commit that broke what this fixed
+4405ff4055685841c9d9545da52c7edc8708b14b i965: Fix haswell_upload_cut_index when there's no index buffer.
--- a/configure.ac
+++ b/configure.ac
@@ -6,7 +6,7 @@ dnl Tell the user about autoconf.html in the --help output
 m4_divert_once([HELP_END], [
 See docs/autoconf.html for more details on the options for Mesa.])

-AC_INIT([Mesa], [9.1.3],
+AC_INIT([Mesa], [9.1.4],
    [https://bugs.freedesktop.org/enter_bug.cgi?product=Mesa])
 AC_CONFIG_AUX_DIR([bin])
 AC_CONFIG_MACRO_DIR([m4])
@@ -1059,26 +1059,24 @@ if test "x$enable_dri" = xyes; then
    DRI_DIRS=`echo "$DRI_DIRS" | $SED 's/  */ /g'`

    # Check for expat
-    if test "x$enable_dri" = xyes; then
-        EXPAT_INCLUDES=""
-        EXPAT_LIB=-lexpat
-        AC_ARG_WITH([expat],
-            [AS_HELP_STRING([--with-expat=DIR],
-                [expat install directory])],[
-            EXPAT_INCLUDES="-I$withval/include"
-            CPPFLAGS="$CPPFLAGS $EXPAT_INCLUDES"
-            LDFLAGS="$LDFLAGS -L$withval/$LIB_DIR"
-            EXPAT_LIB="-L$withval/$LIB_DIR -lexpat"
-            ])
-        AC_CHECK_HEADER([expat.h],[],[AC_MSG_ERROR([Expat required for DRI.])])
-	save_LIBS="$LIBS"
-        AC_CHECK_LIB([expat],[XML_ParserCreate],[],
-            [AC_MSG_ERROR([Expat required for DRI.])])
-	LIBS="$save_LIBS"
-    fi
+    EXPAT_INCLUDES=""
+    EXPAT_LIB=-lexpat
+    AC_ARG_WITH([expat],
+        [AS_HELP_STRING([--with-expat=DIR],
+            [expat install directory])],[
+        EXPAT_INCLUDES="-I$withval/include"
+        CPPFLAGS="$CPPFLAGS $EXPAT_INCLUDES"
+        LDFLAGS="$LDFLAGS -L$withval/$LIB_DIR"
+        EXPAT_LIB="-L$withval/$LIB_DIR -lexpat"
+        ])
+    AC_CHECK_HEADER([expat.h],[],[AC_MSG_ERROR([Expat required for DRI.])])
+    save_LIBS="$LIBS"
+    AC_CHECK_LIB([expat],[XML_ParserCreate],[],
+        [AC_MSG_ERROR([Expat required for DRI.])])
+    LIBS="$save_LIBS"

-    # if we are building any dri driver other than swrast or using the dri state tracker ...
-    if test -n "$DRI_DIRS" -a x"$DRI_DIRS" != xswrast || test "x$enable_dri" = xyes; then
+    # If we are building any DRI driver other than swrast.
+    if test -n "$DRI_DIRS" -a x"$DRI_DIRS" != xswrast; then
        # ... libdrm is required
        if test "x$have_libdrm" != xyes; then
            AC_MSG_ERROR([DRI drivers requires libdrm >= $LIBDRM_REQUIRED])
@@ -1146,14 +1144,6 @@ case $DRI_DIRS in
    ;;
 esac

-AM_CONDITIONAL(HAVE_I915_DRI, test x$HAVE_I915_DRI = xyes)
-AM_CONDITIONAL(HAVE_I965_DRI, test x$HAVE_I965_DRI = xyes)
-AM_CONDITIONAL(HAVE_NOUVEAU_DRI, test x$HAVE_NOUVEAU_DRI = xyes)
-AM_CONDITIONAL(HAVE_R200_DRI, test x$HAVE_R200_DRI = xyes)
-AM_CONDITIONAL(HAVE_RADEON_DRI, test x$HAVE_RADEON_DRI = xyes)
-AM_CONDITIONAL(HAVE_SWRAST_DRI, test x$HAVE_SWRAST_DRI = xyes)
-AM_CONDITIONAL(HAVE_COMMON_DRI, test x$HAVE_COMMON_DRI = xyes)
-
 dnl
 dnl OSMesa configuration
 dnl
@@ -1752,6 +1742,7 @@ gallium_check_st() {
    fi
    if test "x$HAVE_ST_DRI" = xyes && test "x$2" != x; then
         GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS $2"
+         HAVE_COMMON_DRI=yes
    fi
    if test "x$HAVE_ST_XORG" = xyes && test "x$3" != x; then
         GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS $3"
@@ -1877,6 +1868,7 @@ if test "x$with_gallium_drivers" != x; then

            if test "x$HAVE_ST_DRI" = xyes; then
                GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS dri-swrast"
+                HAVE_COMMON_DRI=yes
            fi
            if test "x$HAVE_ST_VDPAU" = xyes; then
                GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS vdpau-softpipe"
@@ -1993,6 +1985,14 @@ for driver in $GALLIUM_DRIVERS_DIRS; do
    esac
 done

+AM_CONDITIONAL(HAVE_I915_DRI, test x$HAVE_I915_DRI = xyes)
+AM_CONDITIONAL(HAVE_I965_DRI, test x$HAVE_I965_DRI = xyes)
+AM_CONDITIONAL(HAVE_NOUVEAU_DRI, test x$HAVE_NOUVEAU_DRI = xyes)
+AM_CONDITIONAL(HAVE_R200_DRI, test x$HAVE_R200_DRI = xyes)
+AM_CONDITIONAL(HAVE_RADEON_DRI, test x$HAVE_RADEON_DRI = xyes)
+AM_CONDITIONAL(HAVE_SWRAST_DRI, test x$HAVE_SWRAST_DRI = xyes)
+AM_CONDITIONAL(HAVE_COMMON_DRI, test x$HAVE_COMMON_DRI = xyes)
+
 AM_CONDITIONAL(HAVE_GALAHAD_GALLIUM, test x$HAVE_GALAHAD_GALLIUM = xyes)
 AM_CONDITIONAL(HAVE_IDENTITY_GALLIUM, test x$HAVE_IDENTITY_GALLIUM = xyes)
 AM_CONDITIONAL(HAVE_NOOP_GALLIUM, test x$HAVE_NOOP_GALLIUM = xyes)
--- a/docs/relnotes-9.1.3.html
+++ b/docs/relnotes-9.1.3.html
@@ -30,7 +30,9 @@ because GL_ARB_compatibility is not supported.

 <h2>MD5 checksums</h2>
 <pre>
-TBD
+952ccd03547ed72333b64e1746cf8ada  MesaLib-9.1.3.tar.bz2
+26d2f1aa8e9db388d51fcbd163c61fb7  MesaLib-9.1.3.tar.gz
+7017b7bdf0ebfd39a5c46cee7cf6b567  MesaLib-9.1.3.zip
 </pre>

 <h2>New features</h2>
--- a/docs/relnotes-9.1.4.html
+++ b/docs/relnotes-9.1.4.html
@@ -0,0 +1,319 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 9.1.4 Release Notes / July 1st, 2013</h1>
+
+<p>
+Mesa 9.1.4 is a bug fix release which fixes bugs found since the 9.1.3 release.
+</p>
+<p>
+Mesa 9.1 implements the OpenGL 3.1 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.1.  OpenGL
+3.1 is <strong>only</strong> available if requested at context creation
+because GL_ARB_compatibility is not supported.
+</p>
+
+<h2>MD5 checksums</h2>
+<pre>
+TBD
+</pre>
+
+<h2>New features</h2>
+<p>None.</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=37871">Bug 37871</a> - [bisected i965] Bus error (core dumped) on oglc texdecaltile</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=42182">Bug 42182</a> - egl/opengles1/tri_x11 renders wrong</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=44958">Bug 44958</a> - [SNB IVB HSW] mesa demo test texleak bus error</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=53494">Bug 53494</a> - [snb] crash in texsubimage to a large atlas in clutter</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=60518">Bug 60518</a> - glDrawElements segfault when compiled into display list</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=61821">Bug 61821</a> - src/mesa/drivers/dri/common/xmlpool.h:96:29: fatal error: xmlpool/options.h</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=63520">Bug 63520</a> - r300g regression (RV380): Strange rendering of light sources in Penumbra  (bisected)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=63701">Bug 63701</a> - [HSW] support new haswell graphics [8086:0a2e]</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=64727">Bug 64727</a> - [gm45, bisected] some piglit glsl 1.10 built-in-functions tests crash</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=64745">Bug 64745</a> - [llvmpipe] SIGSEGV src/gallium/state_trackers/glx/xlib/glx_api.c:1374</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=64934">Bug 64934</a> - [llvmpipe] SIGSEGV src/gallium/state_trackers/glx/xlib/glx_api.c:1363</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=65173">Bug 65173</a> - segfault in _mesa_get_format_datatype and _mesa_get_color_read_type when state dumping with glretrace</li>
+
+</ul>
+
+<h2>Changes</h2>
+<p>The full set of changes can be viewed by using the following GIT command:</p>
+
+<pre>
+  git log mesa-9.1.3..mesa-9.1.4
+</pre>
+
+<p>Alan Coopersmith (2):</p>
+<ul>
+  <li>integer overflow in XF86DRIOpenConnection() [CVE-2013-1993 1/2]</li>
+  <li>integer overflow in XF86DRIGetClientDriverName() [CVE-2013-1993 2/2]</li>
+</ul>
+
+<p>Alex Deucher (3):</p>
+<ul>
+  <li>radeonsi: add support for hainan chips</li>
+  <li>radeonsi: add Hainan pci ids</li>
+  <li>winsys/radeon: add env var to disable VM on Cayman/Trinity</li>
+</ul>
+pp
+<p>Andreas Boll (1):</p>
+<ul>
+  <li>glapi: Add some missing static_dispatch="false" annotations to es_EXT.xml</li>
+</ul>
+
+<p>Anuj Phogat (1):</p>
+<ul>
+  <li>intel: Add a null pointer check before dereferencing the pointer</li>
+</ul>
+
+<p>Armin K (1):</p>
+<ul>
+  <li>gallivm: Fix build with LLVM 3.3</li>
+</ul>
+
+<p>Brian Paul (9):</p>
+<ul>
+  <li>mesa: fix the compressed TexSubImage size checking code</li>
+  <li>st/mesa: generate GL_OUT_OF_MEMORY if we can't create the index buffer</li>
+  <li>mesa: fix error checking of DXT sRGB formats in _mesa_base_tex_format()</li>
+  <li>st/glx/xlib: check for null ctx pointer in glXIsDirect()</li>
+  <li>xlib: check for null ctx pointer in glXIsDirect()</li>
+  <li>st/glx: add null ctx check in glXDestroyContext()</li>
+  <li>xlib: add null ctx check in glXDestroyContext()</li>
+  <li>meta: move vertex array enables for mipmap generation</li>
+  <li>mesa: handle missing read buffer in _mesa_get_color_read_format/type()</li>
+</ul>
+
+<p>Bryan Cain (1):</p>
+<ul>
+  <li>nv50: initialize kick_notify callback in nv50_create</li>
+</ul>
+
+<p>Chad Versace (3):</p>
+<ul>
+  <li>egl/android: Fix error condition for EGL_ANDROID_image_native_buffer</li>
+  <li>i965: Fix glColorPointer(GL_FIXED)</li>
+  <li>intel: Return early if miptree allocation fails</li>
+</ul>
+
+<p>Chia-I Wu (1):</p>
+<ul>
+  <li>u_vbuf: fix index buffer leak</li>
+</ul>
+
+<p>Chris Forbes (8):</p>
+<ul>
+  <li>mesa: add accessor for effective stencil ref</li>
+  <li>intel: Use accessor for stencil reference values</li>
+  <li>nouveau: Use accessor for stencil reference values</li>
+  <li>radeon: Use accessor for stencil reference values</li>
+  <li>st: Use accessor for stencil reference values</li>
+  <li>swrast: Use accessor for stencil reference values</li>
+  <li>mesa: Stop clamping stencil reference value at specification time</li>
+  <li>mesa: Use accessor for stencil reference values in glGet</li>
+</ul>
+
+<p>Chí-Thanh Christopher Nguyễn (1):</p>
+<ul>
+  <li>targets/dri-i915: Force c++ linker in all cases</li>
+</ul>
+
+<p>Daniel Martin (1):</p>
+<ul>
+  <li>Fix build of swrast only without libdrm</li>
+</ul>
+
+<p>Dave Airlie (1):</p>
+<ul>
+  <li>i965: fix problem with constant out of bounds access (v3)</li>
+</ul>
+
+<p>Eric Anholt (10):</p>
+<ul>
+  <li>mesa: Make core Mesa allocate the texture renderbuffer wrapper.</li>
+  <li>mesa: Make gl_renderbuffers backed by EGL images use FinishRenderTexture.</li>
+  <li>i965/fs: Bake regs_written into the IR instead of recomputing it later.</li>
+  <li>i965/vs: Fix implied_mrf_writes() for integer division pre-gen6.</li>
+  <li>intel: Add support for writing to our linear-temporary-CPU-map case.</li>
+  <li>intel: Do temporary CPU maps of textures that are too big to GTT map.</li>
+  <li>intel: Avoid making tiled miptrees we won't be able to blit.</li>
+  <li>intel: Fix MRT handling of glBitmap().</li>
+  <li>intel: Fix format handling of blit glBitmap()</li>
+  <li>i965: Shut up the last release build warning.</li>
+</ul>
+
+<p>Fabian Bieler (2):</p>
+<ul>
+  <li>mesa/st: Don't copy propagate from swizzles.</li>
+  <li>mesa/program: Don't copy propagate from swizzles.</li>
+</ul>
+
+<p>Frank Henigman (1):</p>
+<ul>
+  <li>intel: initialize fs_visitor::params_remap in constructor</li>
+</ul>
+
+<p>Ian Romanick (2):</p>
+<ul>
+  <li>docs: Add 9.1.3 release md5sums</li>
+  <li>mesa: Bump version to 9.1.4</li>
+</ul>
+
+<p>José Fonseca (1):</p>
+<ul>
+  <li>scons: Fix implicit python dependency discovery on Windows.</li>
+</ul>
+
+<p>Kenneth Graunke (17):</p>
+<ul>
+  <li>mesa: Add i965 varying index patches to .cherry-ignore.</li>
+  <li>i965: Turn brw-&gt;urb.vs_size and gs_size into local variables.</li>
+  <li>i965: Use a variable for the push constant size in kB.</li>
+  <li>i965: Update URB partitioning code for Haswell's GT3 variant.</li>
+  <li>i965: Add chipset limits for the Haswell GT3 variant.</li>
+  <li>i965: Enable the Bay Trail platform.</li>
+  <li>mesa: Add a reverted commit to cherry-ignore.</li>
+  <li>vbo: Ignore PRIMITIVE_RESTART_FIXED_INDEX for glDrawArrays().</li>
+  <li>mesa: Add a helper function for determining the restart index.</li>
+  <li>vbo: Use the new primitive restart index helper function.</li>
+  <li>i965: Use the correct restart index for fixed index mode on Haswell.</li>
+  <li>mesa: Cherry-ignore a patch that got picked but squashed.</li>
+  <li>i965: Fix can_cut_index_handle_restart_index() for byte/short types.</li>
+  <li>st/mesa: Go back to using ctx-&gt;Array.RestartIndex, not _RestartIndex.</li>
+  <li>mesa: Ignore fixed-index primitive restart in ArrayElement().</li>
+  <li>mesa: Delete the ctx-&gt;Array._RestartIndex derived state.</li>
+  <li>glsl: Bail on parsing if the #version directive is bogus.</li>
+</ul>
+
+<p>Lauri Kasanen (1):</p>
+<ul>
+  <li>r600g: Correctly initialize the shader key, v2</li>
+</ul>
+
+<p>Maarten Lankhorst (4):</p>
+<ul>
+  <li>nvc0: fix up video buffer alignment requirements</li>
+  <li>nvc0: kill assert in ppp code</li>
+  <li>nvc0: set rsvd_kick correctly</li>
+  <li>nvc0: allow frame dropping in h264</li>
+</ul>
+
+<p>Marek Olšák (7):</p>
+<ul>
+  <li>radeonsi: increase array size for shader inputs and outputs</li>
+  <li>vbo: fix possible use-after-free segfault after a VAO is deleted</li>
+  <li>glsl: fix the value of gl_MaxFragmentUniformVectors</li>
+  <li>st/mesa: initialize all program constants and UBO limits</li>
+  <li>st/mesa: initialize Const.MaxColorAttachments</li>
+  <li>st/mesa: fix a couple of issues in st_bind_ubos</li>
+  <li>mesa: declare UniformBufferBindings as an array with a static size</li>
+</ul>
+
+<p>Matt Turner (3):</p>
+<ul>
+  <li>configure.ac: Remove redundant checks of enable_dri.</li>
+  <li>configure.ac: Build dricommon for DRI gallium drivers</li>
+  <li>i965: NULL check depth_mt to quiet static analysis.</li>
+</ul>
+
+<p>Michel Dänzer (3):</p>
+<ul>
+  <li>radeonsi: Fix handling of TGSI_SEMANTIC_PSIZE</li>
+  <li>radeonsi: Fix user clip planes</li>
+  <li>mesa: Note that two radeonsi fixes cannot be backported after all</li>
+</ul>
+
+<p>Mike Stroyan (1):</p>
+<ul>
+  <li>configure.ac: Build dricommon for gallium swrast</li>
+</ul>
+
+<p>Naohiro Aota (1):</p>
+<ul>
+  <li>xmlpool/build: Make sure to set mo properly</li>
+</ul>
+
+<p>Paul Berry (2):</p>
+<ul>
+  <li>glsl: Fix error checking on "flat" keyword to match GLSL ES 3.00, GLSL 1.50.</li>
+  <li>i965/gen7.5: Allow HW primitive restart for all primitive types.</li>
+</ul>
+
+<p>Paulo Zanoni (1):</p>
+<ul>
+  <li>i965: make GT3 machines work as GT3 instead of GT2</li>
+</ul>
+
+<p>Rodrigo Vivi (2):</p>
+<ul>
+  <li>i965: Add missing Haswell GT3 Desktop to IS_HSW_GT3 check.</li>
+  <li>i965: Adding more reserved PCI IDs for Haswell.</li>
+</ul>
+
+<p>Roland Scheidegger (1):</p>
+<ul>
+  <li>gallivm: fix out-of-bounds access with mirror_clamp_to_edge address mode</li>
+</ul>
+
+<p>Stéphane Marchesin (2):</p>
+<ul>
+  <li>st/xlib: Fix upside down coordinates for CopySubBuffer</li>
+  <li>st/xlib: Flush the front buffer before doing CopySubBuffer</li>
+</ul>
+
+<p>Sven Joachim (1):</p>
+<ul>
+  <li>mesa: Fix ieee fp on Alpha</li>
+</ul>
+
+<p>Tapani Pälli (1):</p>
+<ul>
+  <li>mesa: fix type comparison errors in sub-texture error checking code</li>
+</ul>
+
+<p>Tom Stellard (2):</p>
+<ul>
+  <li>gallivm: Fix build with LLVM &gt;= r180063</li>
+  <li>r300g/compiler: Prevent regalloc from swizzling texture operands v2</li>
+</ul>
+
+<p>Vinson Lee (1):</p>
+<ul>
+  <li>radeon: Initialize variables in radeon_llvm_context_init.</li>
+</ul>
+
+</div>
+</body>
+</html>
--- a/include/pci_ids/i965_pci_ids.h
+++ b/include/pci_ids/i965_pci_ids.h
@@ -28,37 +28,66 @@ CHIPSET(0x015a, IVYBRIDGE_S_GT1, ivb_gt1)
 CHIPSET(0x016a, IVYBRIDGE_S_GT2, ivb_gt2)
 CHIPSET(0x0402, HASWELL_GT1, hsw_gt1)
 CHIPSET(0x0412, HASWELL_GT2, hsw_gt2)
-CHIPSET(0x0422, HASWELL_GT2_PLUS, hsw_gt2)
+CHIPSET(0x0422, HASWELL_GT3, hsw_gt3)
 CHIPSET(0x0406, HASWELL_M_GT1, hsw_gt1)
 CHIPSET(0x0416, HASWELL_M_GT2, hsw_gt2)
-CHIPSET(0x0426, HASWELL_M_GT2_PLUS, hsw_gt2)
+CHIPSET(0x0426, HASWELL_M_GT3, hsw_gt3)
 CHIPSET(0x040A, HASWELL_S_GT1, hsw_gt1)
 CHIPSET(0x041A, HASWELL_S_GT2, hsw_gt2)
-CHIPSET(0x042A, HASWELL_S_GT2_PLUS, hsw_gt2)
+CHIPSET(0x042A, HASWELL_S_GT3, hsw_gt3)
+CHIPSET(0x040B, HASWELL_B_GT1, hsw_gt1)
+CHIPSET(0x041B, HASWELL_B_GT2, hsw_gt2)
+CHIPSET(0x042B, HASWELL_B_GT3, hsw_gt3)
+CHIPSET(0x040E, HASWELL_E_GT1, hsw_gt1)
+CHIPSET(0x041E, HASWELL_E_GT2, hsw_gt2)
+CHIPSET(0x042E, HASWELL_E_GT3, hsw_gt3)
 CHIPSET(0x0C02, HASWELL_SDV_GT1, hsw_gt1)
 CHIPSET(0x0C12, HASWELL_SDV_GT2, hsw_gt2)
-CHIPSET(0x0C22, HASWELL_SDV_GT2_PLUS, hsw_gt2)
+CHIPSET(0x0C22, HASWELL_SDV_GT3, hsw_gt3)
 CHIPSET(0x0C06, HASWELL_SDV_M_GT1, hsw_gt1)
 CHIPSET(0x0C16, HASWELL_SDV_M_GT2, hsw_gt2)
-CHIPSET(0x0C26, HASWELL_SDV_M_GT2_PLUS, hsw_gt2)
+CHIPSET(0x0C26, HASWELL_SDV_M_GT3, hsw_gt3)
 CHIPSET(0x0C0A, HASWELL_SDV_S_GT1, hsw_gt1)
 CHIPSET(0x0C1A, HASWELL_SDV_S_GT2, hsw_gt2)
-CHIPSET(0x0C2A, HASWELL_SDV_S_GT2_PLUS, hsw_gt2)
+CHIPSET(0x0C2A, HASWELL_SDV_S_GT3, hsw_gt3)
+CHIPSET(0x0C0B, HASWELL_SDV_B_GT1, hsw_gt1)
+CHIPSET(0x0C1B, HASWELL_SDV_B_GT2, hsw_gt2)
+CHIPSET(0x0C2B, HASWELL_SDV_B_GT3, hsw_gt3)
+CHIPSET(0x0C0E, HASWELL_SDV_E_GT1, hsw_gt1)
+CHIPSET(0x0C1E, HASWELL_SDV_E_GT2, hsw_gt2)
+CHIPSET(0x0C2E, HASWELL_SDV_E_GT3, hsw_gt3)
 CHIPSET(0x0A02, HASWELL_ULT_GT1, hsw_gt1)
 CHIPSET(0x0A12, HASWELL_ULT_GT2, hsw_gt2)
-CHIPSET(0x0A22, HASWELL_ULT_GT2_PLUS, hsw_gt2)
+CHIPSET(0x0A22, HASWELL_ULT_GT3, hsw_gt3)
 CHIPSET(0x0A06, HASWELL_ULT_M_GT1, hsw_gt1)
 CHIPSET(0x0A16, HASWELL_ULT_M_GT2, hsw_gt2)
-CHIPSET(0x0A26, HASWELL_ULT_M_GT2_PLUS, hsw_gt2)
+CHIPSET(0x0A26, HASWELL_ULT_M_GT3, hsw_gt3)
 CHIPSET(0x0A0A, HASWELL_ULT_S_GT1, hsw_gt1)
 CHIPSET(0x0A1A, HASWELL_ULT_S_GT2, hsw_gt2)
-CHIPSET(0x0A2A, HASWELL_ULT_S_GT2_PLUS, hsw_gt2)
+CHIPSET(0x0A2A, HASWELL_ULT_S_GT3, hsw_gt3)
+CHIPSET(0x0A0B, HASWELL_ULT_B_GT1, hsw_gt1)
+CHIPSET(0x0A1B, HASWELL_ULT_B_GT2, hsw_gt2)
+CHIPSET(0x0A2B, HASWELL_ULT_B_GT3, hsw_gt3)
+CHIPSET(0x0A0E, HASWELL_ULT_E_GT1, hsw_gt1)
+CHIPSET(0x0A1E, HASWELL_ULT_E_GT2, hsw_gt2)
+CHIPSET(0x0A2E, HASWELL_ULT_E_GT3, hsw_gt3)
 CHIPSET(0x0D02, HASWELL_CRW_GT1, hsw_gt1)
 CHIPSET(0x0D12, HASWELL_CRW_GT2, hsw_gt2)
-CHIPSET(0x0D22, HASWELL_CRW_GT2_PLUS, hsw_gt2)
+CHIPSET(0x0D22, HASWELL_CRW_GT3, hsw_gt3)
 CHIPSET(0x0D06, HASWELL_CRW_M_GT1, hsw_gt1)
 CHIPSET(0x0D16, HASWELL_CRW_M_GT2, hsw_gt2)
-CHIPSET(0x0D26, HASWELL_CRW_M_GT2_PLUS, hsw_gt2)
+CHIPSET(0x0D26, HASWELL_CRW_M_GT3, hsw_gt3)
 CHIPSET(0x0D0A, HASWELL_CRW_S_GT1, hsw_gt1)
 CHIPSET(0x0D1A, HASWELL_CRW_S_GT2, hsw_gt2)
-CHIPSET(0x0D2A, HASWELL_CRW_S_GT2_PLUS, hsw_gt2)
+CHIPSET(0x0D2A, HASWELL_CRW_S_GT3, hsw_gt3)
+CHIPSET(0x0D0B, HASWELL_CRW_B_GT1, hsw_gt1)
+CHIPSET(0x0D1B, HASWELL_CRW_B_GT2, hsw_gt2)
+CHIPSET(0x0D2B, HASWELL_CRW_B_GT3, hsw_gt3)
+CHIPSET(0x0D0E, HASWELL_CRW_E_GT1, hsw_gt1)
+CHIPSET(0x0D1E, HASWELL_CRW_E_GT2, hsw_gt2)
+CHIPSET(0x0D2E, HASWELL_CRW_E_GT3, hsw_gt3)
+CHIPSET(0x0F31, BAYTRAIL_M_1, byt)
+CHIPSET(0x0F32, BAYTRAIL_M_2, byt)
+CHIPSET(0x0F33, BAYTRAIL_M_3, byt)
+CHIPSET(0x0157, BAYTRAIL_M_4, byt)
+CHIPSET(0x0155, BAYTRAIL_D, byt)
--- a/include/pci_ids/radeonsi_pci_ids.h
+++ b/include/pci_ids/radeonsi_pci_ids.h
@@ -63,3 +63,10 @@ CHIPSET(0x6620, OLAND_6620, OLAND)
 CHIPSET(0x6621, OLAND_6621, OLAND)
 CHIPSET(0x6623, OLAND_6623, OLAND)
 CHIPSET(0x6631, OLAND_6631, OLAND)
+
+CHIPSET(0x6660, HAINAN_6660, HAINAN)
+CHIPSET(0x6663, HAINAN_6663, HAINAN)
+CHIPSET(0x6664, HAINAN_6664, HAINAN)
+CHIPSET(0x6665, HAINAN_6665, HAINAN)
+CHIPSET(0x6667, HAINAN_6667, HAINAN)
+CHIPSET(0x666F, HAINAN_666F, HAINAN)
--- a/scons/custom.py
+++ b/scons/custom.py
@@ -95,7 +95,7 @@ def createConvenienceLibBuilder(env):

 # TODO: handle import statements with multiple modules
 # TODO: handle from import statements
-import_re = re.compile(r'^import\s+(\S+)$', re.M)
+import_re = re.compile(r'^\s*import\s+(\S+)\s*$', re.M)

 def python_scan(node, env, path):
    # http://www.scons.org/doc/0.98.5/HTML/scons-user/c2781.html#AEN2789
@@ -113,6 +113,7 @@ def python_scan(node, env, path):
            if os.path.exists(file):
                results.append(env.File(file))
                break
+    #print node, map(str, results)
    return results

 python_scanner = SCons.Scanner.Scanner(function = python_scan, skeys = ['.py'])
--- a/src/egl/drivers/dri2/platform_android.c
+++ b/src/egl/drivers/dri2/platform_android.c
@@ -338,7 +338,7 @@ droid_swap_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw)
 }

 static _EGLImage *
-dri2_create_image_android_native_buffer(_EGLDisplay *disp,
+dri2_create_image_android_native_buffer(_EGLDisplay *disp, _EGLContext *ctx,
                                        struct ANativeWindowBuffer *buf)
 {
   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
@@ -346,6 +346,18 @@ dri2_create_image_android_native_buffer(_EGLDisplay *disp,
   int name;
   EGLint format;

+   if (ctx != NULL) {
+      /* From the EGL_ANDROID_image_native_buffer spec:
+       *
+       *     * If <target> is EGL_NATIVE_BUFFER_ANDROID and <ctx> is not
+       *       EGL_NO_CONTEXT, the error EGL_BAD_CONTEXT is generated.
+       */
+      _eglError(EGL_BAD_CONTEXT, "eglCreateEGLImageKHR: for "
+                "EGL_NATIVE_BUFFER_ANDROID, the context must be "
+                "EGL_NO_CONTEXT");
+      return NULL;
+   }
+
   if (!buf || buf->common.magic != ANDROID_NATIVE_BUFFER_MAGIC ||
       buf->common.version != sizeof(*buf)) {
      _eglError(EGL_BAD_PARAMETER, "eglCreateEGLImageKHR");
@@ -417,7 +429,7 @@ droid_create_image_khr(_EGLDriver *drv, _EGLDisplay *disp,
 {
   switch (target) {
   case EGL_NATIVE_BUFFER_ANDROID:
-      return dri2_create_image_android_native_buffer(disp,
+      return dri2_create_image_android_native_buffer(disp, ctx,
            (struct ANativeWindowBuffer *) buffer);
   default:
      return dri2_create_image_khr(drv, disp, ctx, target, buffer, attr_list);
--- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
@@ -55,6 +55,10 @@
 #include <llvm/MC/MCRegisterInfo.h>
 #endif /* HAVE_LLVM >= 0x0301 */

+#if HAVE_LLVM >= 0x0303
+#include <llvm/ADT/OwningPtr.h>
+#endif
+
 #include "util/u_math.h"
 #include "util/u_debug.h"

--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -60,6 +60,12 @@
 #include <llvm/Target/TargetSelect.h>
 #endif /* HAVE_LLVM < 0x0300 */

+#if HAVE_LLVM >= 0x0303
+#include <llvm/IR/IRBuilder.h>
+#include <llvm/IR/Module.h>
+#include <llvm/Support/CBindingWrapping.h>
+#endif
+
 #include "pipe/p_config.h"
 #include "util/u_debug.h"
 #include "util/u_cpu_detect.h"
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -406,7 +406,6 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,

   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
      {
-         LLVMValueRef min, max;
         struct lp_build_context abs_coord_bld = bld->coord_bld;
         abs_coord_bld.type.sign = FALSE;
         coord = lp_build_abs(coord_bld, coord);
@@ -416,16 +415,18 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
            coord = lp_build_mul(coord_bld, coord, length_f);
         }

-         /* clamp to [0.5, length - 0.5] */
-         min = half;
-         max = lp_build_sub(coord_bld, length_f, min);
-         coord = lp_build_clamp(coord_bld, coord, min, max);
-
+         /* clamp to length max */
+         coord = lp_build_min(coord_bld, coord, length_f);
+         /* subtract 0.5 */
         coord = lp_build_sub(coord_bld, coord, half);
+         /* clamp to [0, length - 0.5] */
+         coord = lp_build_max(coord_bld, coord, coord_bld->zero);

         /* convert to int, compute lerp weight */
         lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
         coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
+         /* coord1 = min(coord1, length-1) */
+         coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
      }
      break;

--- a/src/gallium/auxiliary/util/u_vbuf.c
+++ b/src/gallium/auxiliary/util/u_vbuf.c
@@ -307,6 +307,9 @@ void u_vbuf_destroy(struct u_vbuf *mgr)
   unsigned num_vb = screen->get_shader_param(screen, PIPE_SHADER_VERTEX,
                                              PIPE_SHADER_CAP_MAX_INPUTS);

+   mgr->pipe->set_index_buffer(mgr->pipe, NULL);
+   pipe_resource_reference(&mgr->index_buffer.buffer, NULL);
+
   mgr->pipe->set_vertex_buffers(mgr->pipe, 0, num_vb, NULL);

   for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
--- a/src/gallium/drivers/nv50/nv50_context.c
+++ b/src/gallium/drivers/nv50/nv50_context.c
@@ -242,6 +242,7 @@ nv50_create(struct pipe_screen *pscreen, void *priv)
      screen->cur_ctx = nv50;
      nouveau_pushbuf_bufctx(screen->base.pushbuf, nv50->bufctx);
   }
+   nv50->base.pushbuf->kick_notify = nv50_default_kick_notify;

   nv50_init_query_functions(nv50);
   nv50_init_surface_functions(nv50);
--- a/src/gallium/drivers/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nvc0/nvc0_screen.c
@@ -445,6 +445,7 @@ nvc0_screen_create(struct nouveau_device *dev)
   chan = screen->base.channel;
   push = screen->base.pushbuf;
   push->user_priv = screen;
+   push->rsvd_kick = 5;

   screen->base.vidmem_bindings |= PIPE_BIND_CONSTANT_BUFFER |
      PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER;
--- a/src/gallium/drivers/nvc0/nvc0_video.c
+++ b/src/gallium/drivers/nvc0/nvc0_video.c
@@ -558,8 +558,6 @@ nvc0_video_buffer_create(struct pipe_context *pipe,
   buffer = CALLOC_STRUCT(nvc0_video_buffer);
   if (!buffer)
      return NULL;
-   assert(!(templat->height % 4));
-   assert(!(templat->width % 2));

   buffer->base.buffer_format = templat->buffer_format;
   buffer->base.context = pipe;
@@ -578,7 +576,7 @@ nvc0_video_buffer_create(struct pipe_context *pipe,
   templ.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
   templ.format = PIPE_FORMAT_R8_UNORM;
   templ.width0 = buffer->base.width;
-   templ.height0 = buffer->base.height/2;
+   templ.height0 = (buffer->base.height + 1)/2;
   templ.flags = NVC0_RESOURCE_FLAG_VIDEO;
   templ.last_level = 0;
   templ.array_size = 1;
@@ -589,8 +587,8 @@ nvc0_video_buffer_create(struct pipe_context *pipe,

   templ.format = PIPE_FORMAT_R8G8_UNORM;
   buffer->num_planes = 2;
-   templ.width0 /= 2;
-   templ.height0 /= 2;
+   templ.width0 = (templ.width0 + 1) / 2;
+   templ.height0 = (templ.height0 + 1) / 2;
   for (i = 1; i < buffer->num_planes; ++i) {
      buffer->resources[i] = pipe->screen->resource_create(pipe->screen, &templ);
      if (!buffer->resources[i])
--- a/src/gallium/drivers/nvc0/nvc0_video_ppp.c
+++ b/src/gallium/drivers/nvc0/nvc0_video_ppp.c
@@ -62,8 +62,6 @@ nvc0_decoder_setup_ppp(struct nvc0_decoder *dec, struct nvc0_video_buffer *targe
   PUSH_DATA (push, in_addr + y2); // 70c
   PUSH_DATA (push, in_addr + cbcr); // 710
   PUSH_DATA (push, in_addr + cbcr2); // 714
-   assert(target->resources[0]->width0 >= 16 * dec_w);
-   assert(target->resources[0]->height0 >= dec->base.height/2);

   for (i = 0; i < 2; ++i) {
      struct nv50_miptree *mt = (struct nv50_miptree *)target->resources[i];
--- a/src/gallium/drivers/nvc0/nvc0_video_vp.c
+++ b/src/gallium/drivers/nvc0/nvc0_video_vp.c
@@ -185,8 +185,6 @@ nvc0_decoder_handle_references(struct nvc0_decoder *dec, struct nvc0_video_buffe
              (!dec->refs[idx].decoded_bottom || !dec->refs[idx].decoded_top)));
      if (target == refs[i])
         empty_spot = 0;
-      assert(!h264 ||
-             dec->refs[idx].last_used == seq - 1);

      if (dec->refs[idx].vidbuf != refs[i]) {
         debug_printf("%p is not a real ref\n", refs[i]);
@@ -338,7 +336,6 @@ nvc0_decoder_fill_picparm_h264_vp(struct nvc0_decoder *dec,
   unsigned ring, i, j = 0;
   assert(offsetof(struct h264_picparm_vp, u224) == 0x224);
   *is_ref = d->is_reference;
-   assert(!d->frame_num || dec->last_frame_num + 1 == d->frame_num || dec->last_frame_num == d->frame_num);
   dec->last_frame_num = d->frame_num;

   h->width = mb(dec->base.width);
--- a/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c
+++ b/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c
@@ -383,6 +383,14 @@ static enum rc_reg_class variable_get_class(
 							RC_INSTRUCTION_PAIR ) {
 						old_swizzle = r.U.P.Arg->Swizzle;
 					} else {
+						/* Source operands of TEX
+						 * instructions can't be
+						 * swizzle on r300/r400 GPUs.
+						 */
+						if (!variable->C->is_r500) {
+							can_change_writemask = 0;
+							break;
+						}
 						old_swizzle = r.U.I.Src->Swizzle;
 					}
 					new_swizzle = rc_adjust_channels(
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -720,6 +720,7 @@ static int r600_shader_select(struct pipe_context *ctx,
 	struct r600_pipe_shader * shader = NULL;
 	int r;

+	memset(&key, 0, sizeof(key));
 	key = r600_shader_selector_key(ctx, sel);

 	/* Check if we don't need to change anything.
@@ -766,7 +767,7 @@ static int r600_shader_select(struct pipe_context *ctx,
 			key = r600_shader_selector_key(ctx, sel);
 		}

-		shader->key = key;
+		memcpy(&shader->key, &key, sizeof(key));
 		sel->num_shaders++;
 	}

--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -1126,7 +1126,9 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
 	/* XXX: We need to revisit this.I think the correct way to do this is
 	 * to use length = 4 here and use the elem_bld for everything. */
 	type.floating = TRUE;
+	type.fixed = FALSE;
 	type.sign = TRUE;
+	type.norm = FALSE;
 	type.width = 32;
 	type.length = 1;

--- a/src/gallium/drivers/radeonsi/radeonsi_pipe.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
@@ -281,6 +281,7 @@ static const char *r600_get_family_name(enum radeon_family family)
 	case CHIP_PITCAIRN: return "AMD PITCAIRN";
 	case CHIP_VERDE: return "AMD CAPE VERDE";
 	case CHIP_OLAND: return "AMD OLAND";
+	case CHIP_HAINAN: return "AMD HAINAN";
 	default: return "AMD unknown";
 	}
 }
--- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h
+++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
@@ -126,7 +126,6 @@ struct r600_context {
 	unsigned			pa_sc_line_stipple;
 	unsigned			pa_su_sc_mode_cntl;
 	unsigned			pa_cl_clip_cntl;
-	unsigned			pa_cl_vs_out_cntl;
 	/* for saving when using blitter */
 	struct pipe_stencil_ref		stencil_ref;
 	struct si_pipe_shader_selector	*ps_shader;
--- a/src/gallium/drivers/radeonsi/radeonsi_shader.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c
@@ -35,6 +35,7 @@
 #include "gallivm/lp_bld_tgsi.h"
 #include "radeon_llvm.h"
 #include "radeon_llvm_emit.h"
+#include "util/u_memory.h"
 #include "tgsi/tgsi_info.h"
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_scan.h"
@@ -626,6 +627,7 @@ static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
 		switch (d->Declaration.File) {
 		case TGSI_FILE_INPUT:
 			i = shader->ninput++;
+			assert(i < Elements(shader->input));
 			shader->input[i].name = d->Semantic.Name;
 			shader->input[i].sid = d->Semantic.Index;
 			shader->input[i].interpolate = d->Interp.Interpolate;
@@ -634,6 +636,7 @@ static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)

 		case TGSI_FILE_OUTPUT:
 			i = shader->noutput++;
+			assert(i < Elements(shader->output));
 			shader->output[i].name = d->Semantic.Name;
 			shader->output[i].sid = d->Semantic.Index;
 			shader->output[i].interpolate = d->Interp.Interpolate;
@@ -647,7 +650,9 @@ static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
 			/* Select the correct target */
 			switch(d->Semantic.Name) {
 			case TGSI_SEMANTIC_PSIZE:
-				target = V_008DFC_SQ_EXP_POS;
+				shader->vs_out_misc_write = 1;
+				shader->vs_out_point_size = 1;
+				target = V_008DFC_SQ_EXP_POS + 1;
 				break;
 			case TGSI_SEMANTIC_POSITION:
 				if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
--- a/src/gallium/drivers/radeonsi/radeonsi_shader.h
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.h
@@ -66,14 +66,16 @@ struct si_pipe_shader_selector {

 struct si_shader {
 	unsigned		ninput;
-	struct si_shader_io	input[32];
+	struct si_shader_io	input[40];

 	unsigned		noutput;
-	struct si_shader_io	output[32];
+	struct si_shader_io	output[40];

 	unsigned		ninterp;
 	bool			uses_kill;
 	bool			fs_write_all;
+	bool			vs_out_misc_write;
+	bool			vs_out_point_size;
 	unsigned		nr_cbufs;
 };

--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -383,6 +383,7 @@ static void *si_create_rs_state(struct pipe_context *ctx,
 	}

 	rs->two_side = state->light_twoside;
+	rs->clip_plane_enable = state->clip_plane_enable;

 	polygon_dual_mode = (state->fill_front != PIPE_POLYGON_MODE_FILL ||
 				state->fill_back != PIPE_POLYGON_MODE_FILL);
@@ -411,9 +412,6 @@ static void *si_create_rs_state(struct pipe_context *ctx,
 		S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) |
 		S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) |
 		S_028810_DX_LINEAR_ATTR_CLIP_ENA(1);
-	rs->pa_cl_vs_out_cntl =
-		S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) |
-		S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex);

 	clip_rule = state->scissor ? 0xAAAA : 0xFFFF;

@@ -484,7 +482,6 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state)
 	rctx->pa_sc_line_stipple = rs->pa_sc_line_stipple;
 	rctx->pa_su_sc_mode_cntl = rs->pa_su_sc_mode_cntl;
 	rctx->pa_cl_clip_cntl = rs->pa_cl_clip_cntl;
-	rctx->pa_cl_vs_out_cntl = rs->pa_cl_vs_out_cntl;

 	si_pm4_bind_state(rctx, rasterizer, rs);
 	si_update_fb_rs_state(rctx);
@@ -2738,6 +2735,9 @@ void si_init_config(struct r600_context *rctx)
 	case CHIP_OLAND:
 		si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000082);
 		break;
+	case CHIP_HAINAN:
+		si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000);
+		break;
 	default:
 		si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000);
 		break;
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -49,6 +49,7 @@ struct si_state_rasterizer {
 	unsigned		pa_su_sc_mode_cntl;
 	unsigned		pa_cl_clip_cntl;
 	unsigned		pa_cl_vs_out_cntl;
+	unsigned		clip_plane_enable;
 	float			offset_units;
 	float			offset_scale;
 };
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -55,8 +55,13 @@ static void si_pipe_shader_vs(struct pipe_context *ctx, struct si_pipe_shader *s
 	 * takes care of adding a dummy export.
 	 */
 	for (nparams = 0, i = 0 ; i < shader->shader.noutput; i++) {
-		if (shader->shader.output[i].name != TGSI_SEMANTIC_POSITION)
+		switch (shader->shader.output[i].name) {
+		case TGSI_SEMANTIC_POSITION:
+		case TGSI_SEMANTIC_PSIZE:
+			break;
+		default:
 			nparams++;
+		}
 	}
 	if (nparams < 1)
 		nparams = 1;
@@ -66,7 +71,9 @@ static void si_pipe_shader_vs(struct pipe_context *ctx, struct si_pipe_shader *s

 	si_pm4_set_reg(pm4, R_02870C_SPI_SHADER_POS_FORMAT,
 		       S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) |
-		       S_02870C_POS1_EXPORT_FORMAT(V_02870C_SPI_SHADER_NONE) |
+		       S_02870C_POS1_EXPORT_FORMAT(shader->shader.vs_out_misc_write ?
+						   V_02870C_SPI_SHADER_4COMP :
+						   V_02870C_SPI_SHADER_NONE) |
 		       S_02870C_POS2_EXPORT_FORMAT(V_02870C_SPI_SHADER_NONE) |
 		       S_02870C_POS3_EXPORT_FORMAT(V_02870C_SPI_SHADER_NONE));

@@ -258,6 +265,7 @@ static bool si_update_draw_info_state(struct r600_context *rctx,
 			       const struct pipe_draw_info *info)
 {
 	struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
+	struct si_shader *vs = &rctx->vs_shader->current->shader;
 	unsigned prim = si_conv_pipe_prim(info->mode);
 	unsigned ls_mask = 0;

@@ -296,13 +304,14 @@ static bool si_update_draw_info_state(struct r600_context *rctx,
 		si_pm4_set_reg(pm4, R_028814_PA_SU_SC_MODE_CNTL, rctx->pa_su_sc_mode_cntl);
        }
 	si_pm4_set_reg(pm4, R_02881C_PA_CL_VS_OUT_CNTL,
-		       prim == PIPE_PRIM_POINTS ? rctx->pa_cl_vs_out_cntl : 0
+		       S_02881C_USE_VTX_POINT_SIZE(vs->vs_out_point_size) |
+		       S_02881C_VS_OUT_MISC_VEC_ENA(vs->vs_out_misc_write)
 		       /*| (rctx->rasterizer->clip_plane_enable &
 		       rctx->vs_shader->shader.clip_dist_write)*/);
 	si_pm4_set_reg(pm4, R_028810_PA_CL_CLIP_CNTL, rctx->pa_cl_clip_cntl
-			/*| (rctx->vs_shader->shader.clip_dist_write ||
+			| (/*rctx->vs_shader->shader.clip_dist_write ||
 			rctx->vs_shader->shader.vs_prohibit_ucps ?
-			0 : rctx->rasterizer->clip_plane_enable & 0x3F)*/);
+			0 :*/ rctx->queued.named.rasterizer->clip_plane_enable & 0x3F));

 	si_pm4_set_state(rctx, draw_info, pm4);
 	return true;
--- a/src/gallium/state_trackers/glx/xlib/glx_api.c
+++ b/src/gallium/state_trackers/glx/xlib/glx_api.c
@@ -1352,25 +1352,25 @@ glXQueryExtension( Display *dpy, int *errorBase, int *eventBase )
 PUBLIC void
 glXDestroyContext( Display *dpy, GLXContext ctx )
 {
-   GLXContext glxCtx = ctx;
-   (void) dpy;
-   MakeCurrent_PrevContext = 0;
-   MakeCurrent_PrevDrawable = 0;
-   MakeCurrent_PrevReadable = 0;
-   MakeCurrent_PrevDrawBuffer = 0;
-   MakeCurrent_PrevReadBuffer = 0;
-   XMesaDestroyContext( glxCtx->xmesaContext );
-   XMesaGarbageCollect();
-   free(glxCtx);
+   if (ctx) {
+      GLXContext glxCtx = ctx;
+      (void) dpy;
+      MakeCurrent_PrevContext = 0;
+      MakeCurrent_PrevDrawable = 0;
+      MakeCurrent_PrevReadable = 0;
+      MakeCurrent_PrevDrawBuffer = 0;
+      MakeCurrent_PrevReadBuffer = 0;
+      XMesaDestroyContext( glxCtx->xmesaContext );
+      XMesaGarbageCollect();
+      free(glxCtx);
+   }
 }


 PUBLIC Bool
 glXIsDirect( Display *dpy, GLXContext ctx )
 {
-   GLXContext glxCtx = ctx;
-   (void) ctx;
-   return glxCtx->isDirect;
+   return ctx ? ctx->isDirect : False;
 }


--- a/src/gallium/state_trackers/glx/xlib/xm_api.c
+++ b/src/gallium/state_trackers/glx/xlib/xm_api.c
@@ -1238,9 +1238,13 @@ void XMesaSwapBuffers( XMesaBuffer b )
 */
 void XMesaCopySubBuffer( XMesaBuffer b, int x, int y, int width, int height )
 {
+   XMesaContext xmctx = XMesaGetCurrentContext();
+
+   xmctx->st->flush( xmctx->st, ST_FLUSH_FRONT, NULL);
+
   xmesa_copy_st_framebuffer(b->stfb,
         ST_ATTACHMENT_BACK_LEFT, ST_ATTACHMENT_FRONT_LEFT,
-         x, y, width, height);
+         x, b->height - y - height, width, height);
 }


--- a/src/gallium/targets/dri-i915/Makefile.am
+++ b/src/gallium/targets/dri-i915/Makefile.am
@@ -62,17 +62,11 @@ i915_dri_la_LIBADD = \
 	$(GALLIUM_DRI_LIB_DEPS) \
 	$(INTEL_LIBS)

-if HAVE_MESA_LLVM
-i915_dri_la_LINK = $(CXXLINK) $(i915_dri_la_LDFLAGS)
-# Mention a dummy pure C++ file to trigger generation of the $(LINK) variable
-nodist_EXTRA_i915_dri_la_SOURCES = dummy-cpp.cpp
+nodist_EXTRA_i915_dri_la_SOURCES = dummy.cpp

+if HAVE_MESA_LLVM
 AM_CPPFLAGS += -DGALLIUM_LLVMPIPE
 i915_dri_la_LIBADD += $(top_builddir)/src/gallium/drivers/llvmpipe/libllvmpipe.la $(LLVM_LIBS)
-else
-i915_dri_la_LINK = $(LINK) $(i915_dri_la_LDFLAGS)
-# Mention a dummy pure C file to trigger generation of the $(LINK) variable
-nodist_EXTRA_i915_dri_la_SOURCES = dummy-c.c
 endif

 # Provide compatibility with scripts for the old Mesa build system for
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -313,6 +313,7 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
    case CHIP_PITCAIRN:
    case CHIP_VERDE:
    case CHIP_OLAND:
+    case CHIP_HAINAN:
        ws->info.chip_class = TAHITI;
        break;
    }
@@ -381,6 +382,8 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
                                      &ws->info.r600_ib_vm_max_size))
                ws->info.r600_virtual_address = FALSE;
        }
+	if (ws->gen == DRV_R600 && !debug_get_bool_option("RADEON_VA", TRUE))
+		ws->info.r600_virtual_address = FALSE;
    }

    /* Get max pipes, this is only needed for compute shaders.  All evergreen+
--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
@@ -124,6 +124,7 @@ enum radeon_family {
    CHIP_PITCAIRN,
    CHIP_VERDE,
    CHIP_OLAND,
+    CHIP_HAINAN,
    CHIP_LAST,
 };

--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -2821,30 +2821,46 @@ ast_declarator_list::hir(exec_list *instructions,
 	 }
      }

-      /* Integer vertex outputs must be qualified with 'flat'.
+      /* Integer fragment inputs must be qualified with 'flat'.  In GLSL ES,
+       * so must integer vertex outputs.
       *
-       * From section 4.3.6 of the GLSL 1.30 spec:
-       *    "If a vertex output is a signed or unsigned integer or integer
-       *    vector, then it must be qualified with the interpolation qualifier
+       * From section 4.3.4 ("Inputs") of the GLSL 1.50 spec:
+       *    "Fragment shader inputs that are signed or unsigned integers or
+       *    integer vectors must be qualified with the interpolation qualifier
       *    flat."
       *
-       * From section 4.3.4 of the GLSL 3.00 ES spec:
+       * From section 4.3.4 ("Input Variables") of the GLSL 3.00 ES spec:
       *    "Fragment shader inputs that are, or contain, signed or unsigned
       *    integers or integer vectors must be qualified with the
       *    interpolation qualifier flat."
       *
-       * Since vertex outputs and fragment inputs must have matching
-       * qualifiers, these two requirements are equivalent.
+       * From section 4.3.6 ("Output Variables") of the GLSL 3.00 ES spec:
+       *    "Vertex shader outputs that are, or contain, signed or unsigned
+       *    integers or integer vectors must be qualified with the
+       *    interpolation qualifier flat."
+       *
+       * Note that prior to GLSL 1.50, this requirement applied to vertex
+       * outputs rather than fragment inputs.  That creates problems in the
+       * presence of geometry shaders, so we adopt the GLSL 1.50 rule for all
+       * desktop GL shaders.  For GLSL ES shaders, we follow the spec and
+       * apply the restriction to both vertex outputs and fragment inputs.
+       *
+       * Note also that the desktop GLSL specs are missing the text "or
+       * contain"; this is presumably an oversight, since there is no
+       * reasonable way to interpolate a fragment shader input that contains
+       * an integer.
       */
-      if (state->is_version(130, 300)
-          && state->target == vertex_shader
-          && state->current_function == NULL
-          && var->type->contains_integer()
-          && var->mode == ir_var_shader_out
-          && var->interpolation != INTERP_QUALIFIER_FLAT) {
-
-         _mesa_glsl_error(&loc, state, "If a vertex output is (or contains) "
-                          "an integer, then it must be qualified with 'flat'");
+      if (state->is_version(130, 300) &&
+          var->type->contains_integer() &&
+          var->interpolation != INTERP_QUALIFIER_FLAT &&
+          ((state->target == fragment_shader && var->mode == ir_var_shader_in)
+           || (state->target == vertex_shader && var->mode == ir_var_shader_out
+               && state->es_shader))) {
+         const char *var_type = (state->target == vertex_shader) ?
+            "vertex output" : "fragment input";
+         _mesa_glsl_error(&loc, state, "If a %s is (or contains) "
+                          "an integer, then it must be qualified with 'flat'",
+                          var_type);
      }


--- a/src/glsl/builtin_variables.cpp
+++ b/src/glsl/builtin_variables.cpp
@@ -535,7 +535,7 @@ generate_common_ES_uniforms(exec_list *instructions,
   add_builtin_constant(instructions, symtab, "gl_MaxTextureImageUnits",
 			state->Const.MaxTextureImageUnits);
   add_builtin_constant(instructions, symtab, "gl_MaxFragmentUniformVectors",
-			state->Const.MaxFragmentUniformComponents);
+			state->Const.MaxFragmentUniformComponents / 4);

   add_uniform(instructions, symtab, "gl_DepthRange",
 	       state->symbols->get_type("gl_DepthRangeParameters"));
--- a/src/glsl/glsl_parser.yy
+++ b/src/glsl/glsl_parser.yy
@@ -265,10 +265,16 @@ version_statement:
 	| VERSION_TOK INTCONSTANT EOL
 	{
           state->process_version_directive(&@2, $2, NULL);
+	   if (state->error) {
+	      YYERROR;
+	   }
 	}
        | VERSION_TOK INTCONSTANT any_identifier EOL
        {
           state->process_version_directive(&@2, $2, $3);
+	   if (state->error) {
+	      YYERROR;
+	   }
        }
 	;

--- a/src/glx/XF86dri.c
+++ b/src/glx/XF86dri.c
@@ -43,6 +43,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include <X11/extensions/Xext.h>
 #include <X11/extensions/extutil.h>
 #include "xf86dristr.h"
+#include <limits.h>

 static XExtensionInfo _xf86dri_info_data;
 static XExtensionInfo *xf86dri_info = &_xf86dri_info_data;
@@ -201,7 +202,11 @@ XF86DRIOpenConnection(Display * dpy, int screen, drm_handle_t * hSAREA,
   }

   if (rep.length) {
-      if (!(*busIdString = calloc(rep.busIdStringLength + 1, 1))) {
+      if (rep.busIdStringLength < INT_MAX)
+         *busIdString = calloc(rep.busIdStringLength + 1, 1);
+      else
+         *busIdString = NULL;
+      if (*busIdString == NULL) {
         _XEatData(dpy, ((rep.busIdStringLength + 3) & ~3));
         UnlockDisplay(dpy);
         SyncHandle();
@@ -300,9 +305,11 @@ XF86DRIGetClientDriverName(Display * dpy, int screen,
   *ddxDriverPatchVersion = rep.ddxDriverPatchVersion;

   if (rep.length) {
-      if (!
-          (*clientDriverName =
-           calloc(rep.clientDriverNameLength + 1, 1))) {
+      if (rep.clientDriverNameLength < INT_MAX)
+         *clientDriverName = calloc(rep.clientDriverNameLength + 1, 1);
+      else
+         *clientDriverName = NULL;
+      if (*clientDriverName == NULL) {
         _XEatData(dpy, ((rep.clientDriverNameLength + 3) & ~3));
         UnlockDisplay(dpy);
         SyncHandle();
--- a/src/mapi/glapi/gen/es_EXT.xml
+++ b/src/mapi/glapi/gen/es_EXT.xml
@@ -675,22 +675,25 @@

 <!-- 71. GL_OES_vertex_array_object -->
 <category name="GL_OES_vertex_array_object" number="71">
-    <function name="BindVertexArrayOES" alias="BindVertexArray" es2="2.0">
+    <function name="BindVertexArrayOES" alias="BindVertexArray"
+              static_dispatch="false" es2="2.0">
        <param name="array" type="GLuint"/>
    </function>

    <function name="DeleteVertexArraysOES" alias="DeleteVertexArrays"
-              es2="2.0">
+              static_dispatch="false" es2="2.0">
        <param name="n" type="GLsizei"/>
        <param name="arrays" type="const GLuint *" count="n"/>
    </function>

-    <function name="GenVertexArraysOES" alias="GenVertexArrays" es2="2.0">
+    <function name="GenVertexArraysOES" alias="GenVertexArrays"
+              static_dispatch="false" es2="2.0">
        <param name="n" type="GLsizei"/>
        <param name="arrays" type="GLuint *" output="true" count="n"/>
    </function>

-    <function name="IsVertexArrayOES" alias="IsVertexArray" es2="2.0">
+    <function name="IsVertexArrayOES" alias="IsVertexArray"
+              static_dispatch="false" es2="2.0">
        <param name="array" type="GLuint"/>
        <return type="GLboolean"/>
    </function>
@@ -765,7 +768,8 @@
        <size name="Get" mode="get"/>
    </enum>

-    <function name="DrawBuffersNV" alias="DrawBuffers" es2="2.0">
+    <function name="DrawBuffersNV" alias="DrawBuffers"
+              static_dispatch="false" es2="2.0">
        <param name="n" type="GLsizei" counter="true"/>
        <param name="bufs" type="const GLenum *" count="n"/>
    </function>
@@ -773,7 +777,8 @@

 <!-- 93. GL_NV_read_buffer -->
 <category name="GL_NV_read_buffer">
-    <function name="ReadBufferNV" alias="ReadBuffer" es2="2.0">
+    <function name="ReadBufferNV" alias="ReadBuffer"
+              static_dispatch="false" es2="2.0">
        <param name="mode" type="GLenum"/>
    </function>
 </category>
@@ -801,8 +806,8 @@
    <enum name="MAP_FLUSH_EXPLICIT_BIT_EXT"               value="0x0010"/>
    <enum name="MAP_UNSYNCHRONIZED_BIT_EXT"               value="0x0020"/>

-    <function name="MapBufferRangeEXT" alias="MapBufferRange" es1="1.0"
-              es2="2.0">
+    <function name="MapBufferRangeEXT" alias="MapBufferRange"
+              static_dispatch="false" es1="1.0" es2="2.0">
        <param name="target" type="GLenum"/>
        <param name="offset" type="GLintptr"/>
        <param name="size" type="GLsizeiptr"/>
@@ -811,7 +816,7 @@
    </function>

    <function name="FlushMappedBufferRangeEXT" alias="FlushMappedBufferRange"
-              es1="1.0" es2="2.0">
+              static_dispatch="false" es1="1.0" es2="2.0">
        <param name="target" type="GLenum"/>
        <param name="offset" type="GLintptr"/>
        <param name="length" type="GLsizeiptr"/>
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -3364,6 +3364,8 @@ setup_glsl_generate_mipmap(struct gl_context *ctx,
                                   sizeof(struct vertex), OFFSET(x));
      _mesa_VertexAttribPointer(1, 3, GL_FLOAT, GL_FALSE,
                                   sizeof(struct vertex), OFFSET(tex));
+      _mesa_EnableVertexAttribArray(0);
+      _mesa_EnableVertexAttribArray(1);
   }

   /* Generate a fragment shader program appropriate for the texture target */
@@ -3435,8 +3437,6 @@ setup_glsl_generate_mipmap(struct gl_context *ctx,
   _mesa_DeleteObjectARB(vs);
   _mesa_BindAttribLocation(mipmap->ShaderProg, 0, "position");
   _mesa_BindAttribLocation(mipmap->ShaderProg, 1, "texcoords");
-   _mesa_EnableVertexAttribArray(0);
-   _mesa_EnableVertexAttribArray(1);
   link_program_with_debug(ctx, mipmap->ShaderProg);
   sampler->shader_prog = mipmap->ShaderProg;
   ralloc_free(mem_ctx);
--- a/src/mesa/drivers/dri/common/xmlpool/Makefile.am
+++ b/src/mesa/drivers/dri/common/xmlpool/Makefile.am
@@ -66,7 +66,7 @@ options.h: t_options.h $(MOS)

 # Update .mo files from the corresponding .po files.
 %/LC_MESSAGES/options.mo: %.po
-	@mo="$@" \
+	@mo="$@"; \
 	lang=$${mo%%/*}; \
 	echo "Updating ($$lang) $@ from $?."; \
 	mkdir -p $$lang/LC_MESSAGES; \
--- a/src/mesa/drivers/dri/i915/i915_state.c
+++ b/src/mesa/drivers/dri/i915/i915_state.c
@@ -33,6 +33,7 @@
 #include "main/fbobject.h"
 #include "main/dd.h"
 #include "main/state.h"
+#include "main/stencil.h"
 #include "tnl/tnl.h"
 #include "tnl/t_context.h"

@@ -63,14 +64,14 @@ i915_update_stencil(struct gl_context * ctx)
    */
   /* _NEW_POLYGON | _NEW_STENCIL */
   if (ctx->Polygon.FrontFace == GL_CW) {
-      front_ref = ctx->Stencil.Ref[0];
+      front_ref = _mesa_get_stencil_ref(ctx, 0);
      front_mask = ctx->Stencil.ValueMask[0];
      front_writemask = ctx->Stencil.WriteMask[0];
      front_func = ctx->Stencil.Function[0];
      front_fail = ctx->Stencil.FailFunc[0];
      front_pass_z_fail = ctx->Stencil.ZFailFunc[0];
      front_pass_z_pass = ctx->Stencil.ZPassFunc[0];
-      back_ref = ctx->Stencil.Ref[ctx->Stencil._BackFace];
+      back_ref = _mesa_get_stencil_ref(ctx, ctx->Stencil._BackFace);
      back_mask = ctx->Stencil.ValueMask[ctx->Stencil._BackFace];
      back_writemask = ctx->Stencil.WriteMask[ctx->Stencil._BackFace];
      back_func = ctx->Stencil.Function[ctx->Stencil._BackFace];
@@ -78,14 +79,14 @@ i915_update_stencil(struct gl_context * ctx)
      back_pass_z_fail = ctx->Stencil.ZFailFunc[ctx->Stencil._BackFace];
      back_pass_z_pass = ctx->Stencil.ZPassFunc[ctx->Stencil._BackFace];
   } else {
-      front_ref = ctx->Stencil.Ref[ctx->Stencil._BackFace];
+      front_ref = _mesa_get_stencil_ref(ctx, ctx->Stencil._BackFace);
      front_mask = ctx->Stencil.ValueMask[ctx->Stencil._BackFace];
      front_writemask = ctx->Stencil.WriteMask[ctx->Stencil._BackFace];
      front_func = ctx->Stencil.Function[ctx->Stencil._BackFace];
      front_fail = ctx->Stencil.FailFunc[ctx->Stencil._BackFace];
      front_pass_z_fail = ctx->Stencil.ZFailFunc[ctx->Stencil._BackFace];
      front_pass_z_pass = ctx->Stencil.ZPassFunc[ctx->Stencil._BackFace];
-      back_ref = ctx->Stencil.Ref[0];
+      back_ref = _mesa_get_stencil_ref(ctx, 0);
      back_mask = ctx->Stencil.ValueMask[0];
      back_writemask = ctx->Stencil.WriteMask[0];
      back_func = ctx->Stencil.Function[0];
--- a/src/mesa/drivers/dri/i965/brw_cc.c
+++ b/src/mesa/drivers/dri/i965/brw_cc.c
@@ -35,6 +35,7 @@
 #include "brw_defines.h"
 #include "brw_util.h"
 #include "main/macros.h"
+#include "main/stencil.h"
 #include "intel_batchbuffer.h"

 static void
@@ -116,7 +117,7 @@ static void upload_cc_unit(struct brw_context *brw)
 	 intel_translate_stencil_op(ctx->Stencil.ZFailFunc[0]);
      cc->cc0.stencil_pass_depth_pass_op =
 	 intel_translate_stencil_op(ctx->Stencil.ZPassFunc[0]);
-      cc->cc1.stencil_ref = ctx->Stencil.Ref[0];
+      cc->cc1.stencil_ref = _mesa_get_stencil_ref(ctx, 0);
      cc->cc1.stencil_write_mask = ctx->Stencil.WriteMask[0];
      cc->cc1.stencil_test_mask = ctx->Stencil.ValueMask[0];

@@ -130,7 +131,7 @@ static void upload_cc_unit(struct brw_context *brw)
 	    intel_translate_stencil_op(ctx->Stencil.ZFailFunc[back]);
 	 cc->cc0.bf_stencil_pass_depth_pass_op =
 	    intel_translate_stencil_op(ctx->Stencil.ZPassFunc[back]);
-	 cc->cc1.bf_stencil_ref = ctx->Stencil.Ref[back];
+	 cc->cc1.bf_stencil_ref = _mesa_get_stencil_ref(ctx, back);
 	 cc->cc2.bf_stencil_write_mask = ctx->Stencil.WriteMask[back];
 	 cc->cc2.bf_stencil_test_mask = ctx->Stencil.ValueMask[back];
      }
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -293,6 +293,12 @@ brwCreateContext(int api,
 	 brw->urb.size = 256;
 	 brw->urb.max_vs_entries = 1664;
 	 brw->urb.max_gs_entries = 640;
+      } else if (intel->gt == 3) {
+	 brw->max_wm_threads = 408;
+	 brw->max_vs_threads = 280;
+	 brw->urb.size = 512;
+	 brw->urb.max_vs_entries = 1664;
+	 brw->urb.max_gs_entries = 640;
      }
   } else if (intel->gen == 7) {
      if (intel->gt == 1) {
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -844,15 +844,6 @@ struct brw_context
      GLuint nr_sf_entries;
      GLuint nr_cs_entries;

-      /* gen6:
-       * The length of each URB entry owned by the VS (or GS), as
-       * a number of 1024-bit (128-byte) rows.  Should be >= 1.
-       *
-       * gen7: Same meaning, but in 512-bit (64-byte) rows.
-       */
-      GLuint vs_size;
-      GLuint gs_size;
-
      GLuint vs_start;
      GLuint gs_start;
      GLuint clip_start;
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -263,6 +263,14 @@ get_surface_type(struct intel_context *intel,
         else {
            return ubyte_types_norm[size];
         }
+      case GL_FIXED:
+         if (intel->gen >= 8 || intel->is_haswell)
+            return fixed_point_types[size];
+
+         /* This produces GL_FIXED inputs as values between INT32_MIN and
+          * INT32_MAX, which will be scaled down by 1/65536 by the VS.
+          */
+         return int_types_scale[size];
      /* See GL_ARB_vertex_type_2_10_10_10_rev.
       * W/A: Pre-Haswell, the hardware doesn't really support the formats we'd
       * like to use here, so upload everything as UINT and fix
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -60,6 +60,9 @@ fs_inst::init()
   this->src[0] = reg_undef;
   this->src[1] = reg_undef;
   this->src[2] = reg_undef;
+
+   /* This will be the case for almost all instructions. */
+   this->regs_written = 1;
 }

 fs_inst::fs_inst()
@@ -233,6 +236,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(fs_reg dst, fs_reg surf_index,
   if (intel->gen >= 7) {
      inst = new(mem_ctx) fs_inst(FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
                                  dst, surf_index, offset);
+      inst->regs_written = 1;
      instructions.push_tail(inst);
   } else {
      int base_mrf = 13;
@@ -302,26 +306,13 @@ fs_inst::equals(fs_inst *inst)
           offset == inst->offset);
 }

-int
-fs_inst::regs_written()
-{
-   if (is_tex())
-      return 4;
-
-   /* The SINCOS and INT_DIV_QUOTIENT_AND_REMAINDER math functions return 2,
-    * but we don't currently use them...nor do we have an opcode for them.
-    */
-
-   return 1;
-}
-
 bool
 fs_inst::overwrites_reg(const fs_reg &reg)
 {
   return (reg.file == dst.file &&
           reg.reg == dst.reg &&
           reg.reg_offset >= dst.reg_offset  &&
-           reg.reg_offset < dst.reg_offset + regs_written());
+           reg.reg_offset < dst.reg_offset + regs_written);
 }

 bool
@@ -829,6 +820,7 @@ fs_visitor::import_uniforms(fs_visitor *v)
 			   import_uniforms_callback,
 			   variable_ht);
   this->params_remap = v->params_remap;
+   this->nr_params_remap = v->nr_params_remap;
 }

 /* Our support for uniforms is piggy-backed on the struct
@@ -1368,7 +1360,7 @@ fs_visitor::split_virtual_grfs()
      /* If there's a SEND message that requires contiguous destination
       * registers, no splitting is allowed.
       */
-      if (inst->regs_written() > 1) {
+      if (inst->regs_written > 1) {
 	 split_grf[inst->dst.reg] = false;
      }
   }
@@ -1501,6 +1493,7 @@ fs_visitor::remove_dead_constants()
 {
   if (dispatch_width == 8) {
      this->params_remap = ralloc_array(mem_ctx, int, c->prog_data.nr_params);
+      this->nr_params_remap = c->prog_data.nr_params;

      for (unsigned int i = 0; i < c->prog_data.nr_params; i++)
 	 this->params_remap[i] = -1;
@@ -1515,7 +1508,14 @@ fs_visitor::remove_dead_constants()
 	    if (inst->src[i].file != UNIFORM)
 	       continue;

-	    assert(constant_nr < (int)c->prog_data.nr_params);
+	    /* Section 5.11 of the OpenGL 4.3 spec says:
+	     *
+	     *     "Out-of-bounds reads return undefined values, which include
+	     *     values from other variables of the active program or zero."
+	     */
+	    if (constant_nr < 0 || constant_nr >= (int)c->prog_data.nr_params) {
+	       constant_nr = 0;
+	    }

 	    /* For now, set this to non-negative.  We'll give it the
 	     * actual new number in a moment, in order to keep the
@@ -1563,6 +1563,10 @@ fs_visitor::remove_dead_constants()
 	 if (inst->src[i].file != UNIFORM)
 	    continue;

+	 /* as above alias to 0 */
+	 if (constant_nr < 0 || constant_nr >= (int)this->nr_params_remap) {
+	    constant_nr = 0;
+	 }
 	 assert(this->params_remap[constant_nr] != -1);
 	 inst->src[i].reg = this->params_remap[constant_nr];
 	 inst->src[i].reg_offset = 0;
@@ -2094,7 +2098,7 @@ fs_visitor::compute_to_mrf()
            /* Things returning more than one register would need us to
             * understand coalescing out more than one MOV at a time.
             */
-            if (scan_inst->regs_written() > 1)
+            if (scan_inst->regs_written > 1)
               break;

 	    /* SEND instructions can't have MRF as a destination. */
@@ -2311,7 +2315,7 @@ void
 fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst)
 {
   int reg_size = dispatch_width / 8;
-   int write_len = inst->regs_written() * reg_size;
+   int write_len = inst->regs_written * reg_size;
   int first_write_grf = inst->dst.reg;
   bool needs_dep[BRW_MAX_MRF];
   assert(write_len < (int)sizeof(needs_dep) - 1);
@@ -2351,7 +2355,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst)
       * dependency has more latency than a MOV.
       */
      if (scan_inst->dst.file == GRF) {
-         for (int i = 0; i < scan_inst->regs_written(); i++) {
+         for (int i = 0; i < scan_inst->regs_written; i++) {
            int reg = scan_inst->dst.reg + i * reg_size;

            if (reg >= first_write_grf &&
@@ -2390,7 +2394,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(fs_inst *inst)
 void
 fs_visitor::insert_gen4_post_send_dependency_workarounds(fs_inst *inst)
 {
-   int write_len = inst->regs_written() * dispatch_width / 8;
+   int write_len = inst->regs_written * dispatch_width / 8;
   int first_write_grf = inst->dst.reg;
   bool needs_dep[BRW_MAX_MRF];
   assert(write_len < (int)sizeof(needs_dep) - 1);
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -174,7 +174,6 @@ public:
           fs_reg src0, fs_reg src1,fs_reg src2);

   bool equals(fs_inst *inst);
-   int regs_written();
   bool overwrites_reg(const fs_reg &reg);
   bool is_tex();
   bool is_math();
@@ -192,6 +191,7 @@ public:
   uint8_t flag_subreg;

   int mlen; /**< SEND message length */
+   int regs_written; /**< Number of vgrfs written by a SEND message, or 1 */
   int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */
   uint32_t texture_offset; /**< Texture offset bitfield */
   int sampler;
@@ -440,6 +440,7 @@ public:
    * uniform index.
    */
   int *params_remap;
+   int nr_params_remap;

   struct hash_table *variable_ht;
   fs_reg frag_depth;
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -127,7 +127,7 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
 	     */
 	    bool no_existing_temp = entry->tmp.file == BAD_FILE;
 	    if (no_existing_temp) {
-               int written = entry->generator->regs_written();
+               int written = entry->generator->regs_written;

               fs_reg orig_dst = entry->generator->dst;
               fs_reg tmp = fs_reg(GRF, virtual_grf_alloc(written),
@@ -147,8 +147,8 @@ fs_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
 	    }

 	    /* dest <- temp */
-            int written = inst->regs_written();
-            assert(written == entry->generator->regs_written());
+            int written = inst->regs_written;
+            assert(written == entry->generator->regs_written);
            assert(inst->dst.type == entry->tmp.type);
            fs_reg dst = inst->dst;
            fs_reg tmp = entry->tmp;
--- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
@@ -77,7 +77,7 @@ fs_live_variables::setup_def_use()
 	  * variable, and thus qualify for being in def[].
 	  */
 	 if (inst->dst.file == GRF &&
-	     inst->regs_written() == v->virtual_grf_sizes[inst->dst.reg] &&
+	     inst->regs_written == v->virtual_grf_sizes[inst->dst.reg] &&
 	     !inst->predicate &&
 	     !inst->force_uncompressed &&
 	     !inst->force_sechalf) {
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -549,7 +549,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
      }

      if (inst->dst.file == GRF) {
-	 spill_costs[inst->dst.reg] += inst->regs_written() * loop_scale;
+	 spill_costs[inst->dst.reg] += inst->regs_written * loop_scale;

         if (inst->dst.smear >= 0) {
            no_spill[inst->dst.reg] = true;
@@ -618,7 +618,7 @@ fs_visitor::spill_reg(int spill_reg)
 	  inst->dst.reg == spill_reg) {
         int subset_spill_offset = (spill_offset +
                                    REG_SIZE * inst->dst.reg_offset);
-         inst->dst.reg = virtual_grf_alloc(inst->regs_written());
+         inst->dst.reg = virtual_grf_alloc(inst->regs_written);
         inst->dst.reg_offset = 0;

 	 /* If our write is going to affect just part of the
@@ -627,7 +627,7 @@ fs_visitor::spill_reg(int spill_reg)
 	  */
 	 if (inst->predicate || inst->force_uncompressed || inst->force_sechalf) {
            fs_reg unspill_reg = inst->dst;
-            for (int chan = 0; chan < inst->regs_written(); chan++) {
+            for (int chan = 0; chan < inst->regs_written; chan++) {
               emit_unspill(inst, unspill_reg,
                            subset_spill_offset + REG_SIZE * chan);
               unspill_reg.reg_offset++;
@@ -640,7 +640,7 @@ fs_visitor::spill_reg(int spill_reg)
 	 spill_src.negate = false;
 	 spill_src.smear = -1;

-	 for (int chan = 0; chan < inst->regs_written(); chan++) {
+	 for (int chan = 0; chan < inst->regs_written; chan++) {
 	    fs_inst *spill_inst = new(mem_ctx) fs_inst(FS_OPCODE_SPILL,
 						       reg_null_f, spill_src);
 	    spill_src.reg_offset++;
--- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
@@ -510,7 +510,7 @@ instruction_scheduler::calculate_deps()
      /* write-after-write deps. */
      if (inst->dst.file == GRF) {
         if (post_reg_alloc) {
-            for (int r = 0; r < inst->regs_written() * reg_width; r++) {
+            for (int r = 0; r < inst->regs_written * reg_width; r++) {
               add_dep(last_grf_write[inst->dst.reg + r], n);
               last_grf_write[inst->dst.reg + r] = n;
            }
@@ -617,7 +617,7 @@ instruction_scheduler::calculate_deps()
       */
      if (inst->dst.file == GRF) {
         if (post_reg_alloc) {
-            for (int r = 0; r < inst->regs_written() * reg_width; r++)
+            for (int r = 0; r < inst->regs_written * reg_width; r++)
               last_grf_write[inst->dst.reg + r] = n;
         } else {
            last_grf_write[inst->dst.reg] = n;
@@ -716,7 +716,7 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header)
            schedule_node *n = (schedule_node *)node;

            chosen = n;
-            if (chosen->inst->regs_written() <= 1)
+            if (chosen->inst->regs_written <= 1)
               break;
         }

--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -237,7 +237,7 @@ fs_visitor::try_emit_saturate(ir_expression *ir)
    * src, generate a saturated MOV
    */
   fs_inst *modify = get_instruction_generating_reg(pre_inst, last_inst, src);
-   if (!modify || modify->regs_written() != 1) {
+   if (!modify || modify->regs_written != 1) {
      this->result = fs_reg(this, ir->type);
      fs_inst *inst = emit(MOV(this->result, src));
      inst->saturate = true;
@@ -717,7 +717,7 @@ fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
   /* If last_rhs_inst wrote a different number of components than our LHS,
    * we can't safely rewrite it.
    */
-   if (virtual_grf_sizes[dst.reg] != modify->regs_written())
+   if (virtual_grf_sizes[dst.reg] != modify->regs_written)
      return false;

   /* Success!  Rewrite the instruction. */
@@ -917,6 +917,7 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
   inst->base_mrf = base_mrf;
   inst->mlen = mlen;
   inst->header_present = true;
+   inst->regs_written = simd16 ? 8 : 4;

   if (simd16) {
      for (int i = 0; i < 4; i++) {
@@ -1046,6 +1047,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
   inst->base_mrf = base_mrf;
   inst->mlen = mlen;
   inst->header_present = header_present;
+   inst->regs_written = 4;

   if (mlen > 11) {
      fail("Message length >11 disallowed by hardware\n");
@@ -1176,6 +1178,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
   inst->base_mrf = base_mrf;
   inst->mlen = mlen;
   inst->header_present = header_present;
+   inst->regs_written = 4;

   if (mlen > 11) {
      fail("Message length >11 disallowed by hardware\n");
@@ -2318,6 +2321,9 @@ fs_visitor::fs_visitor(struct brw_context *brw,
   this->virtual_grf_use = NULL;
   this->live_intervals_valid = false;

+   this->params_remap = NULL;
+   this->nr_params_remap = 0;
+
   this->force_uncompressed_stack = 0;
   this->force_sechalf_stack = 0;

--- a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
+++ b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp
@@ -78,6 +78,7 @@ txs_type(const glsl_type *type)
      break;
   default:
      assert(!"Should not get here: invalid sampler dimensionality");
+      dims = 2;
   }

   if (type->sampler_array)
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -575,7 +575,7 @@ brw_emit_depthbuffer(struct brw_context *brw)
      depth_mt = stencil_mt;
   }

-   if (depth_irb) {
+   if (depth_irb && depth_mt) {
      struct intel_region *region = depth_mt->region;

      /* When 3DSTATE_DEPTH_BUFFER.Separate_Stencil_Enable is set, then
--- a/src/mesa/drivers/dri/i965/brw_primitive_restart.c
+++ b/src/mesa/drivers/dri/i965/brw_primitive_restart.c
@@ -27,6 +27,7 @@

 #include "main/imports.h"
 #include "main/bufferobj.h"
+#include "main/varray.h"

 #include "brw_context.h"
 #include "brw_defines.h"
@@ -36,29 +37,29 @@

 /**
 * Check if the hardware's cut index support can handle the primitive
- * restart index value.
+ * restart index value (pre-Haswell only).
 */
 static bool
 can_cut_index_handle_restart_index(struct gl_context *ctx,
                                   const struct _mesa_index_buffer *ib)
 {
-   struct intel_context *intel = intel_context(ctx);
-
-   /* Haswell supports an arbitrary cut index. */
-   if (intel->is_haswell)
+   /* The FixedIndex variant means 0xFF, 0xFFFF, or 0xFFFFFFFF based on
+    * the index buffer type, which corresponds exactly to the hardware.
+    */
+   if (ctx->Array.PrimitiveRestartFixedIndex)
      return true;

   bool cut_index_will_work;

   switch (ib->type) {
   case GL_UNSIGNED_BYTE:
-      cut_index_will_work = (ctx->Array._RestartIndex & 0xff) == 0xff;
+      cut_index_will_work = ctx->Array.RestartIndex == 0xff;
      break;
   case GL_UNSIGNED_SHORT:
-      cut_index_will_work = (ctx->Array._RestartIndex & 0xffff) == 0xffff;
+      cut_index_will_work = ctx->Array.RestartIndex == 0xffff;
      break;
   case GL_UNSIGNED_INT:
-      cut_index_will_work = ctx->Array._RestartIndex == 0xffffffff;
+      cut_index_will_work = ctx->Array.RestartIndex == 0xffffffff;
      break;
   default:
      cut_index_will_work = false;
@@ -78,6 +79,7 @@ can_cut_index_handle_prims(struct gl_context *ctx,
                           GLuint nr_prims,
                           const struct _mesa_index_buffer *ib)
 {
+   struct intel_context *intel = intel_context(ctx);
   struct brw_context *brw = brw_context(ctx);

   if (brw->sol.counting_primitives_generated ||
@@ -90,6 +92,10 @@ can_cut_index_handle_prims(struct gl_context *ctx,
      return false;
   }

+   /* Otherwise Haswell can do it all. */
+   if (intel->is_haswell)
+      return true;
+
   if (!can_cut_index_handle_restart_index(ctx, ib)) {
      /* The primitive restart index can't be handled, so take
       * the software path
@@ -198,16 +204,29 @@ haswell_upload_cut_index(struct brw_context *brw)
   const unsigned cut_index_setting =
      ctx->Array._PrimitiveRestart ? HSW_CUT_INDEX_ENABLE : 0;

+   /* BRW_NEW_INDEX_BUFFER */
+   unsigned cut_index;
+   if (brw->ib.ib) {
+      cut_index = _mesa_primitive_restart_index(ctx, brw->ib.type);
+   } else {
+      /* There's no index buffer, but primitive restart may still apply
+       * to glDrawArrays and such.  FIXED_INDEX mode only applies to drawing
+       * operations that use an index buffer, so we can ignore it and use
+       * the GL restart index directly.
+       */
+      cut_index = ctx->Array.RestartIndex;
+   }
+
   BEGIN_BATCH(2);
   OUT_BATCH(_3DSTATE_VF << 16 | cut_index_setting | (2 - 2));
-   OUT_BATCH(ctx->Array._RestartIndex);
+   OUT_BATCH(cut_index);
   ADVANCE_BATCH();
 }

 const struct brw_tracked_state haswell_cut_index = {
   .dirty = {
      .mesa  = _NEW_TRANSFORM,
-      .brw   = 0,
+      .brw   = BRW_NEW_INDEX_BUFFER,
      .cache = 0,
   },
   .emit = haswell_upload_cut_index,
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -239,6 +239,8 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst)
   case SHADER_OPCODE_SIN:
   case SHADER_OPCODE_COS:
      return 1;
+   case SHADER_OPCODE_INT_QUOTIENT:
+   case SHADER_OPCODE_INT_REMAINDER:
   case SHADER_OPCODE_POW:
      return 2;
   case VS_OPCODE_URB_WRITE:
--- a/src/mesa/drivers/dri/i965/gen6_cc.c
+++ b/src/mesa/drivers/dri/i965/gen6_cc.c
@@ -33,6 +33,7 @@
 #include "main/macros.h"
 #include "main/enums.h"
 #include "main/glformats.h"
+#include "main/stencil.h"

 static void
 gen6_upload_blend_state(struct brw_context *brw)
@@ -251,8 +252,8 @@ gen6_upload_color_calc_state(struct brw_context *brw)
   UNCLAMPED_FLOAT_TO_UBYTE(cc->cc1.alpha_ref_fi.ui, ctx->Color.AlphaRef);

   /* _NEW_STENCIL */
-   cc->cc0.stencil_ref = ctx->Stencil.Ref[0];
-   cc->cc0.bf_stencil_ref = ctx->Stencil.Ref[ctx->Stencil._BackFace];
+   cc->cc0.stencil_ref = _mesa_get_stencil_ref(ctx, 0);
+   cc->cc0.bf_stencil_ref = _mesa_get_stencil_ref(ctx, ctx->Stencil._BackFace);

   /* _NEW_COLOR */
   cc->constant_r = ctx->Color.BlendColorUnclamped[0];
--- a/src/mesa/drivers/dri/i965/gen6_urb.c
+++ b/src/mesa/drivers/dri/i965/gen6_urb.c
@@ -54,7 +54,7 @@ gen6_upload_urb( struct brw_context *brw )
   int total_urb_size = brw->urb.size * 1024; /* in bytes */

   /* CACHE_NEW_VS_PROG */
-   brw->urb.vs_size = MAX2(brw->vs.prog_data->urb_entry_size, 1);
+   unsigned vs_size = MAX2(brw->vs.prog_data->urb_entry_size, 1);

   /* We use the same VUE layout for VS outputs and GS outputs (as it's what
    * the SF and Clipper expect), so we can simply make the GS URB entry size
@@ -62,14 +62,14 @@ gen6_upload_urb( struct brw_context *brw )
    * where we have few vertex attributes and a lot of varyings, since the VS
    * size is determined by the larger of the two.  For now, it's safe.
    */
-   brw->urb.gs_size = brw->urb.vs_size;
+   unsigned gs_size = vs_size;

   /* Calculate how many entries fit in each stage's section of the URB */
   if (brw->gs.prog_active) {
-      nr_vs_entries = (total_urb_size/2) / (brw->urb.vs_size * 128);
-      nr_gs_entries = (total_urb_size/2) / (brw->urb.gs_size * 128);
+      nr_vs_entries = (total_urb_size/2) / (vs_size * 128);
+      nr_gs_entries = (total_urb_size/2) / (gs_size * 128);
   } else {
-      nr_vs_entries = total_urb_size / (brw->urb.vs_size * 128);
+      nr_vs_entries = total_urb_size / (vs_size * 128);
      nr_gs_entries = 0;
   }

@@ -87,14 +87,14 @@ gen6_upload_urb( struct brw_context *brw )
   assert(brw->urb.nr_vs_entries >= 24);
   assert(brw->urb.nr_vs_entries % 4 == 0);
   assert(brw->urb.nr_gs_entries % 4 == 0);
-   assert(brw->urb.vs_size < 5);
-   assert(brw->urb.gs_size < 5);
+   assert(vs_size < 5);
+   assert(gs_size < 5);

   BEGIN_BATCH(3);
   OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2));
-   OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_VS_SIZE_SHIFT) |
+   OUT_BATCH(((vs_size - 1) << GEN6_URB_VS_SIZE_SHIFT) |
 	     ((brw->urb.nr_vs_entries) << GEN6_URB_VS_ENTRIES_SHIFT));
-   OUT_BATCH(((brw->urb.gs_size - 1) << GEN6_URB_GS_SIZE_SHIFT) |
+   OUT_BATCH(((gs_size - 1) << GEN6_URB_GS_SIZE_SHIFT) |
 	     ((brw->urb.nr_gs_entries) << GEN6_URB_GS_ENTRIES_SHIFT));
   ADVANCE_BATCH();

--- a/src/mesa/drivers/dri/i965/gen7_urb.c
+++ b/src/mesa/drivers/dri/i965/gen7_urb.c
@@ -39,29 +39,37 @@
 * +-------------------------------------------------------------+
 *
 * Notably, push constants must be stored at the beginning of the URB
- * space, while entries can be stored anywhere.  Ivybridge has a maximum
- * constant buffer size of 16kB.
+ * space, while entries can be stored anywhere.  Ivybridge and Haswell
+ * GT1/GT2 have a maximum constant buffer size of 16kB, while Haswell GT3
+ * doubles this (32kB).
 *
 * Currently we split the constant buffer space evenly between VS and FS.
 * This is probably not ideal, but simple.
 *
- * Ivybridge GT1 has 128kB of URB space.
- * Ivybridge GT2 has 256kB of URB space.
+ * Ivybridge GT1 and Haswell GT1 have 128kB of URB space.
+ * Ivybridge GT2 and Haswell GT2 have 256kB of URB space.
+ * Haswell GT3 has 512kB of URB space.
 *
- * See "Volume 2a: 3D Pipeline," section 1.8.
+ * See "Volume 2a: 3D Pipeline," section 1.8, "Volume 1b: Configurations",
+ * and the documentation for 3DSTATE_PUSH_CONSTANT_ALLOC_xS.
 */
 void
 gen7_allocate_push_constants(struct brw_context *brw)
 {
   struct intel_context *intel = &brw->intel;
+
+   unsigned size = 8;
+   if (intel->is_haswell && intel->gt == 3)
+      size = 16;
+
   BEGIN_BATCH(2);
   OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_VS << 16 | (2 - 2));
-   OUT_BATCH(8);
+   OUT_BATCH(size);
   ADVANCE_BATCH();

   BEGIN_BATCH(2);
   OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_PS << 16 | (2 - 2));
-   OUT_BATCH(8 | 8 << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
+   OUT_BATCH(size | size << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
   ADVANCE_BATCH();
 }

@@ -78,13 +86,15 @@ static void
 gen7_upload_urb(struct brw_context *brw)
 {
   struct intel_context *intel = &brw->intel;
+   const int push_size_kB = intel->is_haswell && intel->gt == 3 ? 32 : 16;
+
   /* Total space for entries is URB size - 16kB for push constants */
-   int handle_region_size = (brw->urb.size - 16) * 1024; /* bytes */
+   int handle_region_size = (brw->urb.size - push_size_kB) * 1024; /* bytes */

   /* CACHE_NEW_VS_PROG */
-   brw->urb.vs_size = MAX2(brw->vs.prog_data->urb_entry_size, 1);
+   unsigned vs_size = MAX2(brw->vs.prog_data->urb_entry_size, 1);

-   int nr_vs_entries = handle_region_size / (brw->urb.vs_size * 64);
+   int nr_vs_entries = handle_region_size / (vs_size * 64);
   if (nr_vs_entries > brw->urb.max_vs_entries)
      nr_vs_entries = brw->urb.max_vs_entries;

@@ -92,7 +102,7 @@ gen7_upload_urb(struct brw_context *brw)
   brw->urb.nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, 8);

   /* URB Starting Addresses are specified in multiples of 8kB. */
-   brw->urb.vs_start = 2; /* skip over push constants */
+   brw->urb.vs_start = push_size_kB / 8; /* skip over push constants */

   assert(brw->urb.nr_vs_entries % 8 == 0);
   assert(brw->urb.nr_gs_entries % 8 == 0);
@@ -100,8 +110,7 @@ gen7_upload_urb(struct brw_context *brw)
   assert(!brw->gs.prog_active);

   gen7_emit_vs_workaround_flush(intel);
-   gen7_emit_urb_state(brw, brw->urb.nr_vs_entries, brw->urb.vs_size,
-                       brw->urb.vs_start);
+   gen7_emit_urb_state(brw, brw->urb.nr_vs_entries, vs_size, brw->urb.vs_start);
 }

 void
--- a/src/mesa/drivers/dri/intel/intel_chipset.h
+++ b/src/mesa/drivers/dri/intel/intel_chipset.h
@@ -87,42 +87,72 @@
 #define PCI_CHIP_IVYBRIDGE_S_GT1        0x015a  /* Server */
 #define PCI_CHIP_IVYBRIDGE_S_GT2        0x016a

+#define PCI_CHIP_BAYTRAIL_M_1           0x0F31
+#define PCI_CHIP_BAYTRAIL_M_2           0x0F32
+#define PCI_CHIP_BAYTRAIL_M_3           0x0F33
+#define PCI_CHIP_BAYTRAIL_M_4           0x0157
+#define PCI_CHIP_BAYTRAIL_D             0x0155
+
 #define PCI_CHIP_HASWELL_GT1            0x0402 /* Desktop */
 #define PCI_CHIP_HASWELL_GT2            0x0412
-#define PCI_CHIP_HASWELL_GT2_PLUS       0x0422
+#define PCI_CHIP_HASWELL_GT3            0x0422
 #define PCI_CHIP_HASWELL_M_GT1          0x0406 /* Mobile */
 #define PCI_CHIP_HASWELL_M_GT2          0x0416
-#define PCI_CHIP_HASWELL_M_GT2_PLUS     0x0426
+#define PCI_CHIP_HASWELL_M_GT3          0x0426
 #define PCI_CHIP_HASWELL_S_GT1          0x040A /* Server */
 #define PCI_CHIP_HASWELL_S_GT2          0x041A
-#define PCI_CHIP_HASWELL_S_GT2_PLUS     0x042A
+#define PCI_CHIP_HASWELL_S_GT3          0x042A
+#define PCI_CHIP_HASWELL_B_GT1          0x040B /* Reserved */
+#define PCI_CHIP_HASWELL_B_GT2          0x041B
+#define PCI_CHIP_HASWELL_B_GT3          0x042B
+#define PCI_CHIP_HASWELL_E_GT1          0x040E /* Reserved */
+#define PCI_CHIP_HASWELL_E_GT2          0x041E
+#define PCI_CHIP_HASWELL_E_GT3          0x042E
 #define PCI_CHIP_HASWELL_SDV_GT1        0x0C02 /* Desktop */
 #define PCI_CHIP_HASWELL_SDV_GT2        0x0C12
-#define PCI_CHIP_HASWELL_SDV_GT2_PLUS   0x0C22
+#define PCI_CHIP_HASWELL_SDV_GT3        0x0C22
 #define PCI_CHIP_HASWELL_SDV_M_GT1      0x0C06 /* Mobile */
 #define PCI_CHIP_HASWELL_SDV_M_GT2      0x0C16
-#define PCI_CHIP_HASWELL_SDV_M_GT2_PLUS 0x0C26
+#define PCI_CHIP_HASWELL_SDV_M_GT3      0x0C26
 #define PCI_CHIP_HASWELL_SDV_S_GT1      0x0C0A /* Server */
 #define PCI_CHIP_HASWELL_SDV_S_GT2      0x0C1A
-#define PCI_CHIP_HASWELL_SDV_S_GT2_PLUS 0x0C2A
+#define PCI_CHIP_HASWELL_SDV_S_GT3      0x0C2A
+#define PCI_CHIP_HASWELL_SDV_B_GT1      0x0C0B /* Reserved */
+#define PCI_CHIP_HASWELL_SDV_B_GT2      0x0C1B
+#define PCI_CHIP_HASWELL_SDV_B_GT3      0x0C2B
+#define PCI_CHIP_HASWELL_SDV_E_GT1      0x0C0E /* Reserved */
+#define PCI_CHIP_HASWELL_SDV_E_GT2      0x0C1E
+#define PCI_CHIP_HASWELL_SDV_E_GT3      0x0C2E
 #define PCI_CHIP_HASWELL_ULT_GT1        0x0A02 /* Desktop */
 #define PCI_CHIP_HASWELL_ULT_GT2        0x0A12
-#define PCI_CHIP_HASWELL_ULT_GT2_PLUS   0x0A22
+#define PCI_CHIP_HASWELL_ULT_GT3        0x0A22
 #define PCI_CHIP_HASWELL_ULT_M_GT1      0x0A06 /* Mobile */
 #define PCI_CHIP_HASWELL_ULT_M_GT2      0x0A16
-#define PCI_CHIP_HASWELL_ULT_M_GT2_PLUS 0x0A26
+#define PCI_CHIP_HASWELL_ULT_M_GT3      0x0A26
 #define PCI_CHIP_HASWELL_ULT_S_GT1      0x0A0A /* Server */
 #define PCI_CHIP_HASWELL_ULT_S_GT2      0x0A1A
-#define PCI_CHIP_HASWELL_ULT_S_GT2_PLUS 0x0A2A
+#define PCI_CHIP_HASWELL_ULT_S_GT3      0x0A2A
+#define PCI_CHIP_HASWELL_ULT_B_GT1      0x0A0B /* Reserved */
+#define PCI_CHIP_HASWELL_ULT_B_GT2      0x0A1B
+#define PCI_CHIP_HASWELL_ULT_B_GT3      0x0A2B
+#define PCI_CHIP_HASWELL_ULT_E_GT1      0x0A0E /* Reserved */
+#define PCI_CHIP_HASWELL_ULT_E_GT2      0x0A1E
+#define PCI_CHIP_HASWELL_ULT_E_GT3      0x0A2E
 #define PCI_CHIP_HASWELL_CRW_GT1        0x0D02 /* Desktop */
 #define PCI_CHIP_HASWELL_CRW_GT2        0x0D12
-#define PCI_CHIP_HASWELL_CRW_GT2_PLUS   0x0D22
+#define PCI_CHIP_HASWELL_CRW_GT3        0x0D22
 #define PCI_CHIP_HASWELL_CRW_M_GT1      0x0D06 /* Mobile */
 #define PCI_CHIP_HASWELL_CRW_M_GT2      0x0D16
-#define PCI_CHIP_HASWELL_CRW_M_GT2_PLUS 0x0D26
+#define PCI_CHIP_HASWELL_CRW_M_GT3      0x0D26
 #define PCI_CHIP_HASWELL_CRW_S_GT1      0x0D0A /* Server */
 #define PCI_CHIP_HASWELL_CRW_S_GT2      0x0D1A
-#define PCI_CHIP_HASWELL_CRW_S_GT2_PLUS 0x0D2A
+#define PCI_CHIP_HASWELL_CRW_S_GT3      0x0D2A
+#define PCI_CHIP_HASWELL_CRW_B_GT1      0x0D0B /* Reserved */
+#define PCI_CHIP_HASWELL_CRW_B_GT2      0x0D1B
+#define PCI_CHIP_HASWELL_CRW_B_GT3      0x0D2B
+#define PCI_CHIP_HASWELL_CRW_E_GT1      0x0D0E /* Reserved */
+#define PCI_CHIP_HASWELL_CRW_E_GT2      0x0D1E
+#define PCI_CHIP_HASWELL_CRW_E_GT3      0x0D2E

 #define IS_MOBILE(devid)	(devid == PCI_CHIP_I855_GM || \
 				 devid == PCI_CHIP_I915_GM || \
@@ -190,47 +220,80 @@

 #define IS_IVYBRIDGE(devid)     (IS_IVB_GT1(devid) || IS_IVB_GT2(devid))

+#define IS_BAYTRAIL(devid)      (devid == PCI_CHIP_BAYTRAIL_M_1 || \
+                                 devid == PCI_CHIP_BAYTRAIL_M_2 || \
+                                 devid == PCI_CHIP_BAYTRAIL_M_3 || \
+                                 devid == PCI_CHIP_BAYTRAIL_M_4 || \
+                                 devid == PCI_CHIP_BAYTRAIL_D)
+
 #define IS_GEN7(devid)	        (IS_IVYBRIDGE(devid) || \
+				 IS_BAYTRAIL(devid) || \
 				 IS_HASWELL(devid))

 #define IS_HSW_GT1(devid)	(devid == PCI_CHIP_HASWELL_GT1 || \
 				 devid == PCI_CHIP_HASWELL_M_GT1 || \
 				 devid == PCI_CHIP_HASWELL_S_GT1 || \
+				 devid == PCI_CHIP_HASWELL_B_GT1 || \
+				 devid == PCI_CHIP_HASWELL_E_GT1 || \
 				 devid == PCI_CHIP_HASWELL_SDV_GT1 || \
 				 devid == PCI_CHIP_HASWELL_SDV_M_GT1 || \
 				 devid == PCI_CHIP_HASWELL_SDV_S_GT1 || \
+				 devid == PCI_CHIP_HASWELL_SDV_B_GT1 || \
+				 devid == PCI_CHIP_HASWELL_SDV_E_GT1 || \
 				 devid == PCI_CHIP_HASWELL_ULT_GT1 || \
 				 devid == PCI_CHIP_HASWELL_ULT_M_GT1 || \
 				 devid == PCI_CHIP_HASWELL_ULT_S_GT1 || \
+				 devid == PCI_CHIP_HASWELL_ULT_B_GT1 || \
+				 devid == PCI_CHIP_HASWELL_ULT_E_GT1 || \
 				 devid == PCI_CHIP_HASWELL_CRW_GT1 || \
 				 devid == PCI_CHIP_HASWELL_CRW_M_GT1 || \
-				 devid == PCI_CHIP_HASWELL_CRW_S_GT1)
+				 devid == PCI_CHIP_HASWELL_CRW_S_GT1 || \
+				 devid == PCI_CHIP_HASWELL_CRW_B_GT1 || \
+				 devid == PCI_CHIP_HASWELL_CRW_E_GT1)
 #define IS_HSW_GT2(devid)	(devid == PCI_CHIP_HASWELL_GT2 || \
 				 devid == PCI_CHIP_HASWELL_M_GT2 || \
 				 devid == PCI_CHIP_HASWELL_S_GT2 || \
+				 devid == PCI_CHIP_HASWELL_B_GT2 || \
+				 devid == PCI_CHIP_HASWELL_E_GT2 || \
 				 devid == PCI_CHIP_HASWELL_SDV_GT2 || \
 				 devid == PCI_CHIP_HASWELL_SDV_M_GT2 || \
 				 devid == PCI_CHIP_HASWELL_SDV_S_GT2 || \
+				 devid == PCI_CHIP_HASWELL_SDV_B_GT2 || \
+				 devid == PCI_CHIP_HASWELL_SDV_E_GT2 || \
 				 devid == PCI_CHIP_HASWELL_ULT_GT2 || \
 				 devid == PCI_CHIP_HASWELL_ULT_M_GT2 || \
 				 devid == PCI_CHIP_HASWELL_ULT_S_GT2 || \
+				 devid == PCI_CHIP_HASWELL_ULT_B_GT2 || \
+				 devid == PCI_CHIP_HASWELL_ULT_E_GT2 || \
 				 devid == PCI_CHIP_HASWELL_CRW_GT2 || \
 				 devid == PCI_CHIP_HASWELL_CRW_M_GT2 || \
 				 devid == PCI_CHIP_HASWELL_CRW_S_GT2 || \
-				 devid == PCI_CHIP_HASWELL_M_GT2_PLUS || \
-				 devid == PCI_CHIP_HASWELL_S_GT2_PLUS || \
-				 devid == PCI_CHIP_HASWELL_SDV_GT2_PLUS || \
-				 devid == PCI_CHIP_HASWELL_SDV_M_GT2_PLUS || \
-				 devid == PCI_CHIP_HASWELL_SDV_S_GT2_PLUS || \
-				 devid == PCI_CHIP_HASWELL_ULT_GT2_PLUS || \
-				 devid == PCI_CHIP_HASWELL_ULT_M_GT2_PLUS || \
-				 devid == PCI_CHIP_HASWELL_ULT_S_GT2_PLUS || \
-				 devid == PCI_CHIP_HASWELL_CRW_GT2_PLUS || \
-				 devid == PCI_CHIP_HASWELL_CRW_M_GT2_PLUS || \
-				 devid == PCI_CHIP_HASWELL_CRW_S_GT2_PLUS)
+				 devid == PCI_CHIP_HASWELL_CRW_B_GT2 || \
+				 devid == PCI_CHIP_HASWELL_CRW_E_GT2)
+#define IS_HSW_GT3(devid)	(devid == PCI_CHIP_HASWELL_GT3 || \
+				 devid == PCI_CHIP_HASWELL_M_GT3 || \
+				 devid == PCI_CHIP_HASWELL_S_GT3 || \
+				 devid == PCI_CHIP_HASWELL_B_GT3 || \
+				 devid == PCI_CHIP_HASWELL_E_GT3 || \
+				 devid == PCI_CHIP_HASWELL_SDV_GT3 || \
+				 devid == PCI_CHIP_HASWELL_SDV_M_GT3 || \
+				 devid == PCI_CHIP_HASWELL_SDV_S_GT3 || \
+				 devid == PCI_CHIP_HASWELL_SDV_B_GT3 || \
+				 devid == PCI_CHIP_HASWELL_SDV_E_GT3 || \
+				 devid == PCI_CHIP_HASWELL_ULT_GT3 || \
+				 devid == PCI_CHIP_HASWELL_ULT_M_GT3 || \
+				 devid == PCI_CHIP_HASWELL_ULT_S_GT3 || \
+				 devid == PCI_CHIP_HASWELL_ULT_B_GT3 || \
+				 devid == PCI_CHIP_HASWELL_ULT_E_GT3 || \
+				 devid == PCI_CHIP_HASWELL_CRW_GT3 || \
+				 devid == PCI_CHIP_HASWELL_CRW_M_GT3 || \
+				 devid == PCI_CHIP_HASWELL_CRW_S_GT3 || \
+				 devid == PCI_CHIP_HASWELL_CRW_B_GT3 || \
+				 devid == PCI_CHIP_HASWELL_CRW_E_GT3)

 #define IS_HASWELL(devid)       (IS_HSW_GT1(devid) || \
-				 IS_HSW_GT2(devid))
+				 IS_HSW_GT2(devid) || \
+				 IS_HSW_GT3(devid))

 #define IS_965(devid)		(IS_GEN4(devid) || \
 				 IS_G4X(devid) || \
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -186,48 +186,81 @@ intelGetString(struct gl_context * ctx, GLenum name)
      case PCI_CHIP_IVYBRIDGE_S_GT2:
 	 chipset = "Intel(R) Ivybridge Server";
 	 break;
+      case PCI_CHIP_BAYTRAIL_M_1:
+      case PCI_CHIP_BAYTRAIL_M_2:
+      case PCI_CHIP_BAYTRAIL_M_3:
+      case PCI_CHIP_BAYTRAIL_M_4:
+      case PCI_CHIP_BAYTRAIL_D:
+         chipset = "Intel(R) Bay Trail";
+         break;
      case PCI_CHIP_HASWELL_GT1:
      case PCI_CHIP_HASWELL_GT2:
-      case PCI_CHIP_HASWELL_GT2_PLUS:
+      case PCI_CHIP_HASWELL_GT3:
      case PCI_CHIP_HASWELL_SDV_GT1:
      case PCI_CHIP_HASWELL_SDV_GT2:
-      case PCI_CHIP_HASWELL_SDV_GT2_PLUS:
+      case PCI_CHIP_HASWELL_SDV_GT3:
      case PCI_CHIP_HASWELL_ULT_GT1:
      case PCI_CHIP_HASWELL_ULT_GT2:
-      case PCI_CHIP_HASWELL_ULT_GT2_PLUS:
+      case PCI_CHIP_HASWELL_ULT_GT3:
      case PCI_CHIP_HASWELL_CRW_GT1:
      case PCI_CHIP_HASWELL_CRW_GT2:
-      case PCI_CHIP_HASWELL_CRW_GT2_PLUS:
+      case PCI_CHIP_HASWELL_CRW_GT3:
 	 chipset = "Intel(R) Haswell Desktop";
 	 break;
      case PCI_CHIP_HASWELL_M_GT1:
      case PCI_CHIP_HASWELL_M_GT2:
-      case PCI_CHIP_HASWELL_M_GT2_PLUS:
+      case PCI_CHIP_HASWELL_M_GT3:
      case PCI_CHIP_HASWELL_SDV_M_GT1:
      case PCI_CHIP_HASWELL_SDV_M_GT2:
-      case PCI_CHIP_HASWELL_SDV_M_GT2_PLUS:
+      case PCI_CHIP_HASWELL_SDV_M_GT3:
      case PCI_CHIP_HASWELL_ULT_M_GT1:
      case PCI_CHIP_HASWELL_ULT_M_GT2:
-      case PCI_CHIP_HASWELL_ULT_M_GT2_PLUS:
+      case PCI_CHIP_HASWELL_ULT_M_GT3:
      case PCI_CHIP_HASWELL_CRW_M_GT1:
      case PCI_CHIP_HASWELL_CRW_M_GT2:
-      case PCI_CHIP_HASWELL_CRW_M_GT2_PLUS:
+      case PCI_CHIP_HASWELL_CRW_M_GT3:
 	 chipset = "Intel(R) Haswell Mobile";
 	 break;
      case PCI_CHIP_HASWELL_S_GT1:
      case PCI_CHIP_HASWELL_S_GT2:
-      case PCI_CHIP_HASWELL_S_GT2_PLUS:
+      case PCI_CHIP_HASWELL_S_GT3:
      case PCI_CHIP_HASWELL_SDV_S_GT1:
      case PCI_CHIP_HASWELL_SDV_S_GT2:
-      case PCI_CHIP_HASWELL_SDV_S_GT2_PLUS:
+      case PCI_CHIP_HASWELL_SDV_S_GT3:
      case PCI_CHIP_HASWELL_ULT_S_GT1:
      case PCI_CHIP_HASWELL_ULT_S_GT2:
-      case PCI_CHIP_HASWELL_ULT_S_GT2_PLUS:
+      case PCI_CHIP_HASWELL_ULT_S_GT3:
      case PCI_CHIP_HASWELL_CRW_S_GT1:
      case PCI_CHIP_HASWELL_CRW_S_GT2:
-      case PCI_CHIP_HASWELL_CRW_S_GT2_PLUS:
+      case PCI_CHIP_HASWELL_CRW_S_GT3:
 	 chipset = "Intel(R) Haswell Server";
 	 break;
+      case PCI_CHIP_HASWELL_B_GT1:
+      case PCI_CHIP_HASWELL_B_GT2:
+      case PCI_CHIP_HASWELL_B_GT3:
+      case PCI_CHIP_HASWELL_SDV_B_GT1:
+      case PCI_CHIP_HASWELL_SDV_B_GT2:
+      case PCI_CHIP_HASWELL_SDV_B_GT3:
+      case PCI_CHIP_HASWELL_ULT_B_GT1:
+      case PCI_CHIP_HASWELL_ULT_B_GT2:
+      case PCI_CHIP_HASWELL_ULT_B_GT3:
+      case PCI_CHIP_HASWELL_CRW_B_GT1:
+      case PCI_CHIP_HASWELL_CRW_B_GT2:
+      case PCI_CHIP_HASWELL_CRW_B_GT3:
+      case PCI_CHIP_HASWELL_E_GT1:
+      case PCI_CHIP_HASWELL_E_GT2:
+      case PCI_CHIP_HASWELL_E_GT3:
+      case PCI_CHIP_HASWELL_SDV_E_GT1:
+      case PCI_CHIP_HASWELL_SDV_E_GT2:
+      case PCI_CHIP_HASWELL_SDV_E_GT3:
+      case PCI_CHIP_HASWELL_ULT_E_GT1:
+      case PCI_CHIP_HASWELL_ULT_E_GT2:
+      case PCI_CHIP_HASWELL_ULT_E_GT3:
+      case PCI_CHIP_HASWELL_CRW_E_GT1:
+      case PCI_CHIP_HASWELL_CRW_E_GT2:
+      case PCI_CHIP_HASWELL_CRW_E_GT3:
+         chipset = "Intel(R) Haswell";
+         break;
      default:
         chipset = "Unknown Intel Chipset";
         break;
@@ -679,11 +712,16 @@ intelInitContext(struct intel_context *intel,
      intel->gt = 1;
   else if (IS_SNB_GT2(devID) || IS_IVB_GT2(devID) || IS_HSW_GT2(devID))
      intel->gt = 2;
+   else if (IS_HSW_GT3(devID))
+      intel->gt = 3;
   else
      intel->gt = 0;

   if (IS_HASWELL(devID)) {
      intel->is_haswell = true;
+   } else if (IS_BAYTRAIL(devID)) {
+      intel->is_baytrail = true;
+      intel->gt = 1;
   } else if (IS_G4X(devID)) {
      intel->is_g4x = true;
   } else if (IS_945(devID)) {
--- a/src/mesa/drivers/dri/intel/intel_context.h
+++ b/src/mesa/drivers/dri/intel/intel_context.h
@@ -235,6 +235,7 @@ struct intel_context
   int gt;
   bool needs_ff_sync;
   bool is_haswell;
+   bool is_baytrail;
   bool is_g4x;
   bool is_945;
   bool has_separate_stencil;
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -321,6 +321,7 @@ intel_image_target_renderbuffer_storage(struct gl_context *ctx,
   rb->Format = image->format;
   rb->_BaseFormat = _mesa_base_fbo_format(&intel->ctx,
 					   image->internal_format);
+   rb->NeedsFinishRenderTexture = true;
 }

 /**
@@ -633,28 +634,13 @@ intel_render_texture(struct gl_context * ctx,
      /* Fallback on drawing to a texture that doesn't have a miptree
       * (has a border, width/height 0, etc.)
       */
-      _mesa_reference_renderbuffer(&att->Renderbuffer, NULL);
      _swrast_render_texture(ctx, fb, att);
      return;
   }
-   else if (!irb) {
-      intel_miptree_check_level_layer(mt, att->TextureLevel, layer);

-      irb = (struct intel_renderbuffer *)intel_new_renderbuffer(ctx, ~0);
-
-      if (irb) {
-         /* bind the wrapper to the attachment point */
-         _mesa_reference_renderbuffer(&att->Renderbuffer, &irb->Base.Base);
-      }
-      else {
-         /* fallback to software rendering */
-         _swrast_render_texture(ctx, fb, att);
-         return;
-      }
-   }
+   intel_miptree_check_level_layer(mt, att->TextureLevel, layer);

   if (!intel_renderbuffer_update_wrapper(intel, irb, image, layer)) {
-       _mesa_reference_renderbuffer(&att->Renderbuffer, NULL);
       _swrast_render_texture(ctx, fb, att);
       return;
   }
@@ -679,13 +665,10 @@ intel_finish_render_texture(struct gl_context * ctx,
                            struct gl_renderbuffer_attachment *att)
 {
   struct intel_context *intel = intel_context(ctx);
-   struct gl_texture_object *tex_obj = att->Texture;
-   struct gl_texture_image *image =
-      tex_obj->Image[att->CubeMapFace][att->TextureLevel];
-   struct intel_renderbuffer *irb = intel_renderbuffer(att->Renderbuffer);
+   struct gl_renderbuffer *rb = att->Renderbuffer;
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);

-   DBG("Finish render %s texture tex=%u\n",
-       _mesa_get_format_name(image->TexFormat), att->Texture->Name);
+   DBG("Finish render %s texture\n", _mesa_get_format_name(rb->Format));

   if (irb)
      irb->tex_image = NULL;
--- a/src/mesa/drivers/dri/intel/intel_fbo.h
+++ b/src/mesa/drivers/dri/intel/intel_fbo.h
@@ -32,6 +32,7 @@
 #include <assert.h>
 #include "main/formats.h"
 #include "intel_context.h"
+#include "intel_mipmap_tree.h"
 #include "intel_screen.h"

 #ifdef __cplusplus
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
@@ -125,6 +125,9 @@ intel_miptree_create_internal(struct intel_context *intel,
                              GLuint num_samples)
 {
   struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
+   if (!mt)
+      return NULL;
+
   int compress_byte = 0;

   DBG("%s target %s format %s level %d..%d <-- %p\n", __FUNCTION__,
@@ -338,6 +341,18 @@ intel_miptree_create(struct intel_context *intel,
   etc_format = (format != tex_format) ? tex_format : MESA_FORMAT_NONE;
   base_format = _mesa_get_format_base_format(format);

+   mt = intel_miptree_create_internal(intel, target, format,
+				      first_level, last_level, width0,
+				      height0, depth0,
+				      false, num_samples);
+   /*
+    * pitch == 0 || height == 0  indicates the null texture
+    */
+   if (!mt || !mt->total_width || !mt->total_height) {
+      intel_miptree_release(&mt);
+      return NULL;
+   }
+
   if (num_samples > 1) {
      /* From p82 of the Sandy Bridge PRM, dw3[1] of SURFACE_STATE ("Tiled
       * Surface"):
@@ -361,20 +376,15 @@ intel_miptree_create(struct intel_context *intel,
 	 tiling = I915_TILING_Y;
      else if (force_y_tiling) {
         tiling = I915_TILING_Y;
-      } else if (width0 >= 64)
-	 tiling = I915_TILING_X;
-   }
-
-   mt = intel_miptree_create_internal(intel, target, format,
-				      first_level, last_level, width0,
-				      height0, depth0,
-				      false, num_samples);
-   /*
-    * pitch == 0 || height == 0  indicates the null texture
-    */
-   if (!mt || !mt->total_width || !mt->total_height) {
-      intel_miptree_release(&mt);
-      return NULL;
+      } else if (width0 >= 64) {
+         if (ALIGN(mt->total_width * mt->cpp, 512) < 32768) {
+            tiling = I915_TILING_X;
+         } else {
+            perf_debug("%dx%d miptree too large to blit, "
+                       "falling back to untiled",
+                       mt->total_width, mt->total_height);
+         }
+      }
   }

   total_width = mt->total_width;
@@ -1212,9 +1222,30 @@ intel_miptree_unmap_blit(struct intel_context *intel,
 			 unsigned int level,
 			 unsigned int slice)
 {
-   assert(!(map->mode & GL_MAP_WRITE_BIT));
-
+   struct gl_context *ctx = &intel->ctx;
   drm_intel_bo_unmap(map->bo);
+
+   if (map->mode & GL_MAP_WRITE_BIT) {
+      unsigned int image_x, image_y;
+      int x = map->x;
+      int y = map->y;
+      intel_miptree_get_image_offset(mt, level, slice, &image_x, &image_y);
+      x += image_x;
+      y += image_y;
+
+      bool ok = intelEmitCopyBlit(intel,
+                                  mt->region->cpp,
+                                  map->stride, map->bo,
+                                  0, I915_TILING_NONE,
+                                  mt->region->pitch, mt->region->bo,
+                                  mt->offset, mt->region->tiling,
+                                  0, 0,
+                                  x, y,
+                                  map->w, map->h,
+                                  GL_COPY);
+      WARN_ONCE(!ok, "Failed to blit from linear temporary mapping");
+   }
+
   drm_intel_bo_unreference(map->bo);
 }

@@ -1551,6 +1582,23 @@ intel_miptree_map_singlesample(struct intel_context *intel,
 {
   struct intel_miptree_map *map;

+   /* Estimate the size of the mappable aperture into the GTT.  There's an
+    * ioctl to get the whole GTT size, but not one to get the mappable subset.
+    * It turns out it's basically always 256MB, though some ancient hardware
+    * was smaller.
+    */
+   uint32_t gtt_size = 256 * 1024 * 1024;
+   if (intel->gen == 2)
+      gtt_size = 128 * 1024 * 1024;
+
+   /* We don't want to map two objects such that a memcpy between them would
+    * just fault one mapping in and then the other over and over forever.  So
+    * we would need to divide the GTT size by 2.  Additionally, some GTT is
+    * taken up by things like the framebuffer and the ringbuffer and such, so
+    * be more conservative.
+    */
+   uint32_t max_gtt_map_object_size = gtt_size / 4;
+
   assert(mt->num_samples <= 1);

   map = intel_miptree_attach_map(mt, level, slice, x, y, w, h, mode);
@@ -1596,6 +1644,10 @@ intel_miptree_map_singlesample(struct intel_context *intel,
            mt->region->tiling == I915_TILING_X &&
            mt->region->pitch < 32768) {
      intel_miptree_map_blit(intel, mt, map, level, slice);
+   } else if (mt->region->tiling != I915_TILING_NONE &&
+              mt->region->bo->size >= max_gtt_map_object_size) {
+      assert(mt->region->pitch < 32768);
+      intel_miptree_map_blit(intel, mt, map, level, slice);
   } else {
      intel_miptree_map_gtt(intel, mt, map, level, slice);
   }
--- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
@@ -45,6 +45,7 @@
 #include "intel_context.h"
 #include "intel_batchbuffer.h"
 #include "intel_blit.h"
+#include "intel_fbo.h"
 #include "intel_regions.h"
 #include "intel_buffers.h"
 #include "intel_pixel.h"
@@ -176,8 +177,8 @@ do_blit_bitmap( struct gl_context *ctx,
 		const GLubyte *bitmap )
 {
   struct intel_context *intel = intel_context(ctx);
-   struct intel_region *dst;
   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   struct intel_renderbuffer *irb;
   GLfloat tmpColor[4];
   GLubyte ubcolor[4];
   GLuint color;
@@ -200,10 +201,14 @@ do_blit_bitmap( struct gl_context *ctx,
   }

   intel_prepare_render(intel);
-   dst = intel_drawbuf_region(intel);

-   if (!dst)
-       return false;
+   if (fb->_NumColorDrawBuffers != 1) {
+      perf_debug("accelerated glBitmap() only supports rendering to a "
+                 "single color buffer\n");
+      return false;
+   }
+
+   irb = intel_renderbuffer(fb->_ColorDrawBuffers[0]);

   if (_mesa_is_bufferobj(unpack->BufferObj)) {
      bitmap = map_pbo(ctx, width, height, unpack, bitmap);
@@ -222,10 +227,19 @@ do_blit_bitmap( struct gl_context *ctx,
   UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[2], tmpColor[2]);
   UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[3], tmpColor[3]);

-   if (dst->cpp == 2)
-      color = PACK_COLOR_565(ubcolor[0], ubcolor[1], ubcolor[2]);
-   else
+   switch (irb->mt->format) {
+   case MESA_FORMAT_ARGB8888:
+   case MESA_FORMAT_XRGB8888:
      color = PACK_COLOR_8888(ubcolor[3], ubcolor[0], ubcolor[1], ubcolor[2]);
+      break;
+   case MESA_FORMAT_RGB565:
+      color = PACK_COLOR_565(ubcolor[0], ubcolor[1], ubcolor[2]);
+      break;
+   default:
+      perf_debug("Unsupported format %s in accelerated glBitmap()\n",
+                 _mesa_get_format_name(irb->mt->format));
+      return false;
+   }

   if (!intel_check_blit_fragment_ops(ctx, tmpColor[3] == 1.0F))
      return false;
@@ -270,14 +284,14 @@ do_blit_bitmap( struct gl_context *ctx,
 	    continue;

 	 if (!intelEmitImmediateColorExpandBlit(intel,
-						dst->cpp,
+						irb->mt->cpp,
 						(GLubyte *)stipple,
 						sz,
 						color,
-						dst->pitch,
-						dst->bo,
+						irb->mt->region->pitch,
+						irb->mt->region->bo,
 						0,
-						dst->tiling,
+						irb->mt->region->tiling,
 						dstx + px,
 						dsty + py,
 						w, h,
--- a/src/mesa/drivers/dri/intel/intel_screen.c
+++ b/src/mesa/drivers/dri/intel/intel_screen.c
@@ -306,6 +306,9 @@ intel_create_image_from_name(__DRIscreen *screen,
    int cpp;

    image = intel_allocate_image(format, loaderPrivate);
+    if (image == NULL)
+       return NULL;
+
    if (image->format == MESA_FORMAT_NONE)
       cpp = 1;
    else
@@ -369,6 +372,7 @@ intel_create_image_from_renderbuffer(__DRIcontext *context,
      break;
   }

+   rb->NeedsFinishRenderTexture = true;
   return image;
 }

@@ -398,6 +402,9 @@ intel_create_image(__DRIscreen *screen,
   }

   image = intel_allocate_image(format, loaderPrivate);
+   if (image == NULL)
+      return NULL;
+
   cpp = _mesa_get_format_bytes(image->format);
   image->region =
      intel_region_alloc(intelScreen, tiling, cpp, width, height, true);
@@ -544,6 +551,9 @@ intel_from_planar(__DRIimage *parent, int plane, void *loaderPrivate)
    stride = parent->strides[index];

    image = intel_allocate_image(dri_format, loaderPrivate);
+    if (image == NULL)
+       return NULL;
+
    if (offset + height * stride > parent->region->bo->size) {
       _mesa_warning(NULL, "intel_create_sub_image: subimage out of bounds");
       free(image);
--- a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c
@@ -271,16 +271,6 @@ nouveau_render_texture(struct gl_context *ctx, struct gl_framebuffer *fb,
 	struct gl_texture_image *ti =
 		att->Texture->Image[att->CubeMapFace][att->TextureLevel];

-	/* Allocate a renderbuffer object for the texture if we
-	 * haven't already done so. */
-	if (!rb) {
-		rb = nouveau_renderbuffer_new(ctx, ~0);
-		assert(rb);
-
-		rb->AllocStorage = NULL;
-		_mesa_reference_renderbuffer(&att->Renderbuffer, rb);
-	}
-
 	/* Update the renderbuffer fields from the texture. */
 	set_renderbuffer_format(rb, get_tex_format(ti));
 	rb->Width = ti->Width;
--- a/src/mesa/drivers/dri/nouveau/nv04_state_raster.c
+++ b/src/mesa/drivers/dri/nouveau/nv04_state_raster.c
@@ -30,6 +30,7 @@
 #include "nv_object.xml.h"
 #include "nv04_3d.xml.h"
 #include "nv04_driver.h"
+#include "main/stencil.h"

 static unsigned
 get_comparison_op(unsigned op)
@@ -177,7 +178,7 @@ nv04_emit_control(struct gl_context *ctx, int emit)
 		nv04->ctrl[1] |= NV04_MULTITEX_TRIANGLE_CONTROL1_STENCIL_ENABLE;

 	nv04->ctrl[1] |= get_comparison_op(ctx->Stencil.Function[0]) << 4 |
-			 ctx->Stencil.Ref[0] << 8 |
+			 _mesa_get_stencil_ref(ctx, 0) << 8 |
 			 ctx->Stencil.ValueMask[0] << 16 |
 			 ctx->Stencil.WriteMask[0] << 24;

--- a/src/mesa/drivers/dri/nouveau/nv10_state_raster.c
+++ b/src/mesa/drivers/dri/nouveau/nv10_state_raster.c
@@ -31,6 +31,8 @@
 #include "nv10_3d.xml.h"
 #include "nv10_driver.h"

+#include "main/stencil.h"
+
 void
 nv10_emit_alpha_func(struct gl_context *ctx, int emit)
 {
@@ -145,7 +147,7 @@ nv10_emit_stencil_func(struct gl_context *ctx, int emit)

 	BEGIN_NV04(push, NV10_3D(STENCIL_FUNC_FUNC), 3);
 	PUSH_DATA (push, nvgl_comparison_op(ctx->Stencil.Function[0]));
-	PUSH_DATA (push, ctx->Stencil.Ref[0]);
+	PUSH_DATA (push, _mesa_get_stencil_ref(ctx, 0));
 	PUSH_DATA (push, ctx->Stencil.ValueMask[0]);
 }

--- a/src/mesa/drivers/dri/r200/r200_state.c
+++ b/src/mesa/drivers/dri/r200/r200_state.c
@@ -41,6 +41,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "main/light.h"
 #include "main/framebuffer.h"
 #include "main/fbobject.h"
+#include "main/stencil.h"

 #include "swrast/swrast.h"
 #include "vbo/vbo.h"
@@ -1383,7 +1384,7 @@ r200StencilFuncSeparate( struct gl_context *ctx, GLenum face, GLenum func,
                         GLint ref, GLuint mask )
 {
   r200ContextPtr rmesa = R200_CONTEXT(ctx);
-   GLuint refmask = (((ctx->Stencil.Ref[0] & 0xff) << R200_STENCIL_REF_SHIFT) |
+   GLuint refmask = ((_mesa_get_stencil_ref(ctx, 0) << R200_STENCIL_REF_SHIFT) |
 		     ((ctx->Stencil.ValueMask[0] & 0xff) << R200_STENCIL_MASK_SHIFT));

   R200_STATECHANGE( rmesa, ctx );
--- a/src/mesa/drivers/dri/radeon/radeon_fbo.c
+++ b/src/mesa/drivers/dri/radeon/radeon_fbo.c
@@ -598,6 +598,7 @@ radeon_image_target_renderbuffer_storage(struct gl_context *ctx,
   rb->Format = image->format;
   rb->_BaseFormat = _mesa_base_fbo_format(&radeon->glCtx,
                                           image->internal_format);
+   rb->NeedsFinishRenderTexture = GL_TRUE;
 }

 /**
@@ -836,25 +837,11 @@ radeon_render_texture(struct gl_context * ctx,
   if (!radeon_image->mt) {
      /* Fallback on drawing to a texture without a miptree.
       */
-      _mesa_reference_renderbuffer(&att->Renderbuffer, NULL);
      _swrast_render_texture(ctx, fb, att);
      return;
   }
-   else if (!rrb) {
-      rrb = radeon_wrap_texture(ctx, newImage);
-      if (rrb) {
-         /* bind the wrapper to the attachment point */
-         _mesa_reference_renderbuffer(&att->Renderbuffer, &rrb->base.Base);
-      }
-      else {
-         /* fallback to software rendering */
-         _swrast_render_texture(ctx, fb, att);
-         return;
-      }
-   }

   if (!radeon_update_wrapper(ctx, rrb, newImage)) {
-       _mesa_reference_renderbuffer(&att->Renderbuffer, NULL);
       _swrast_render_texture(ctx, fb, att);
       return;
   }
@@ -898,10 +885,11 @@ radeon_finish_render_texture(struct gl_context * ctx,
                            struct gl_renderbuffer_attachment *att)
 {
    struct gl_texture_object *tex_obj = att->Texture;
-    struct gl_texture_image *image =
-	tex_obj->Image[att->CubeMapFace][att->TextureLevel];
-    radeon_texture_image *radeon_image = (radeon_texture_image *)image;
-    
+    radeon_texture_image *radeon_image = NULL;
+
+    if (tex_obj)
+        radeon_image = (radeon_texture_image *)_mesa_get_attachment_teximage(att);
+
    if (radeon_image)
 	radeon_image->used_as_render_target = GL_FALSE;

--- a/src/mesa/drivers/dri/radeon/radeon_state.c
+++ b/src/mesa/drivers/dri/radeon/radeon_state.c
@@ -42,6 +42,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "main/fbobject.h"
 #include "main/simple_list.h"
 #include "main/state.h"
+#include "main/core.h"
+#include "main/stencil.h"

 #include "vbo/vbo.h"
 #include "tnl/tnl.h"
@@ -1155,7 +1157,7 @@ radeonStencilFuncSeparate( struct gl_context *ctx, GLenum face, GLenum func,
                           GLint ref, GLuint mask )
 {
   r100ContextPtr rmesa = R100_CONTEXT(ctx);
-   GLuint refmask = (((ctx->Stencil.Ref[0] & 0xff) << RADEON_STENCIL_REF_SHIFT) |
+   GLuint refmask = ((_mesa_get_stencil_ref(ctx, 0) << RADEON_STENCIL_REF_SHIFT) |
 		     ((ctx->Stencil.ValueMask[0] & 0xff) << RADEON_STENCIL_MASK_SHIFT));

   RADEON_STATECHANGE( rmesa, ctx );
--- a/src/mesa/drivers/x11/fakeglx.c
+++ b/src/mesa/drivers/x11/fakeglx.c
@@ -1532,16 +1532,18 @@ void _kw_ungrab_all( Display *dpy )
 static void
 Fake_glXDestroyContext( Display *dpy, GLXContext ctx )
 {
-   struct fake_glx_context *glxCtx = (struct fake_glx_context *) ctx;
-   (void) dpy;
-   MakeCurrent_PrevContext = 0;
-   MakeCurrent_PrevDrawable = 0;
-   MakeCurrent_PrevReadable = 0;
-   MakeCurrent_PrevDrawBuffer = 0;
-   MakeCurrent_PrevReadBuffer = 0;
-   XMesaDestroyContext( glxCtx->xmesaContext );
-   XMesaGarbageCollect(dpy);
-   free(glxCtx);
+   if (ctx) {
+      struct fake_glx_context *glxCtx = (struct fake_glx_context *) ctx;
+      (void) dpy;
+      MakeCurrent_PrevContext = 0;
+      MakeCurrent_PrevDrawable = 0;
+      MakeCurrent_PrevReadable = 0;
+      MakeCurrent_PrevDrawBuffer = 0;
+      MakeCurrent_PrevReadBuffer = 0;
+      XMesaDestroyContext( glxCtx->xmesaContext );
+      XMesaGarbageCollect(dpy);
+      free(glxCtx);
+   }
 }


@@ -1550,7 +1552,7 @@ Fake_glXIsDirect( Display *dpy, GLXContext ctx )
 {
   struct fake_glx_context *glxCtx = (struct fake_glx_context *) ctx;
   (void) dpy;
-   return glxCtx->xmesaContext->direct;
+   return glxCtx ? glxCtx->xmesaContext->direct : False;
 }


--- a/src/mesa/main/api_arrayelt.c
+++ b/src/mesa/main/api_arrayelt.c
@@ -1636,7 +1636,7 @@ void GLAPIENTRY _ae_ArrayElement( GLint elt )
   /* If PrimitiveRestart is enabled and the index is the RestartIndex
    * then we call PrimitiveRestartNV and return.
    */
-   if (ctx->Array._PrimitiveRestart && (elt == ctx->Array._RestartIndex)) {
+   if (ctx->Array.PrimitiveRestart && (elt == ctx->Array.RestartIndex)) {
      CALL_PrimitiveRestartNV((struct _glapi_table *)disp, ());
      return;
   }
--- a/src/mesa/main/attrib.c
+++ b/src/mesa/main/attrib.c
@@ -1391,7 +1391,6 @@ copy_array_attrib(struct gl_context *ctx,
   dest->PrimitiveRestartFixedIndex = src->PrimitiveRestartFixedIndex;
   dest->_PrimitiveRestart = src->_PrimitiveRestart;
   dest->RestartIndex = src->RestartIndex;
-   dest->_RestartIndex = src->_RestartIndex;
   /* skip NewState */
   /* skip RebindArrays */

--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -619,13 +619,10 @@ _mesa_init_buffer_objects( struct gl_context *ctx )
   _mesa_reference_buffer_object(ctx, &ctx->CopyWriteBuffer,
                                 ctx->Shared->NullBufferObj);

-   ctx->UniformBufferBindings = calloc(ctx->Const.MaxUniformBufferBindings,
-				      sizeof(*ctx->UniformBufferBindings));
-
   _mesa_reference_buffer_object(ctx, &ctx->UniformBuffer,
 				 ctx->Shared->NullBufferObj);

-   for (i = 0; i < ctx->Const.MaxUniformBufferBindings; i++) {
+   for (i = 0; i < MAX_COMBINED_UNIFORM_BUFFERS; i++) {
      _mesa_reference_buffer_object(ctx,
 				    &ctx->UniformBufferBindings[i].BufferObject,
 				    ctx->Shared->NullBufferObj);
@@ -647,14 +644,11 @@ _mesa_free_buffer_objects( struct gl_context *ctx )

   _mesa_reference_buffer_object(ctx, &ctx->UniformBuffer, NULL);

-   for (i = 0; i < ctx->Const.MaxUniformBufferBindings; i++) {
+   for (i = 0; i < MAX_COMBINED_UNIFORM_BUFFERS; i++) {
      _mesa_reference_buffer_object(ctx,
 				    &ctx->UniformBufferBindings[i].BufferObject,
 				    NULL);
   }
-
-   free(ctx->UniformBufferBindings);
-   ctx->UniformBufferBindings = NULL;
 }

 static bool
--- a/src/mesa/main/compiler.h
+++ b/src/mesa/main/compiler.h
@@ -316,7 +316,7 @@ static INLINE GLuint CPU_TO_LE32(GLuint x)
    defined(__arm__) || \
    defined(__sh__) || defined(__m32r__) || \
    (defined(__sun) && defined(_IEEE_754)) || \
-    (defined(__alpha__) && defined(__IEEE_FLOAT))
+    defined(__alpha__)
 #define USE_IEEE
 #define IEEE_ONE 0x3f800000
 #endif
--- a/src/mesa/main/config.h
+++ b/src/mesa/main/config.h
@@ -170,6 +170,9 @@
 /*@{*/
 #define MAX_PROGRAM_LOCAL_PARAMS       4096
 #define MAX_UNIFORMS                   4096
+#define MAX_UNIFORM_BUFFERS            15 /* + 1 default uniform buffer */
+/* 6 is for vertex, hull, domain, geometry, fragment, and compute shader. */
+#define MAX_COMBINED_UNIFORM_BUFFERS   (MAX_UNIFORM_BUFFERS * 6)
 /*@}*/

 /**
--- a/src/mesa/main/enable.c
+++ b/src/mesa/main/enable.c
@@ -53,11 +53,6 @@ update_derived_primitive_restart_state(struct gl_context *ctx)
 {
   /* Update derived primitive restart state.
    */
-   if (ctx->Array.PrimitiveRestart)
-      ctx->Array._RestartIndex = ctx->Array.RestartIndex;
-   else
-      ctx->Array._RestartIndex = ~0;
-
   ctx->Array._PrimitiveRestart = ctx->Array.PrimitiveRestart
      || ctx->Array.PrimitiveRestartFixedIndex;
 }
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -322,12 +322,14 @@ void
 _mesa_remove_attachment(struct gl_context *ctx,
                        struct gl_renderbuffer_attachment *att)
 {
+   struct gl_renderbuffer *rb = att->Renderbuffer;
+
+   /* tell driver that we're done rendering to this texture. */
+   if (rb && rb->NeedsFinishRenderTexture)
+      ctx->Driver.FinishRenderTexture(ctx, att);
+
   if (att->Type == GL_TEXTURE) {
      ASSERT(att->Texture);
-      if (ctx->Driver.FinishRenderTexture) {
-         /* tell driver that we're done rendering to this texture. */
-         ctx->Driver.FinishRenderTexture(ctx, att);
-      }
      _mesa_reference_texobj(&att->Texture, NULL); /* unbind */
      ASSERT(!att->Texture);
   }
@@ -340,6 +342,49 @@ _mesa_remove_attachment(struct gl_context *ctx,
   att->Complete = GL_TRUE;
 }

+/**
+ * Create a renderbuffer which will be set up by the driver to wrap the
+ * texture image slice.
+ *
+ * By using a gl_renderbuffer (like user-allocated renderbuffers), drivers get
+ * to share most of their framebuffer rendering code between winsys,
+ * renderbuffer, and texture attachments.
+ *
+ * The allocated renderbuffer uses a non-zero Name so that drivers can check
+ * it for determining vertical orientation, but we use ~0 to make it fairly
+ * unambiguous with actual user (non-texture) renderbuffers.
+ */
+void
+_mesa_update_texture_renderbuffer(struct gl_context *ctx,
+                                  struct gl_framebuffer *fb,
+                                  struct gl_renderbuffer_attachment *att)
+{
+   struct gl_texture_image *texImage;
+   struct gl_renderbuffer *rb;
+
+   texImage = _mesa_get_attachment_teximage(att);
+   if (!texImage)
+      return;
+
+   rb = att->Renderbuffer;
+   if (!rb) {
+      rb = ctx->Driver.NewRenderbuffer(ctx, ~0);
+      if (!rb) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glFramebufferTexture()");
+         return;
+      }
+      _mesa_reference_renderbuffer(&att->Renderbuffer, rb);
+
+      /* This can't get called on a texture renderbuffer, so set it to NULL
+       * for clarity compared to user renderbuffers.
+       */
+      rb->AllocStorage = NULL;
+
+      rb->NeedsFinishRenderTexture = ctx->Driver.FinishRenderTexture != NULL;
+   }
+
+   ctx->Driver.RenderTexture(ctx, fb, att);
+}

 /**
 * Bind a texture object to an attachment point.
@@ -352,21 +397,23 @@ _mesa_set_texture_attachment(struct gl_context *ctx,
                             struct gl_texture_object *texObj,
                             GLenum texTarget, GLuint level, GLuint zoffset)
 {
+   struct gl_renderbuffer *rb = att->Renderbuffer;
+
+   if (rb && rb->NeedsFinishRenderTexture)
+      ctx->Driver.FinishRenderTexture(ctx, att);
+
   if (att->Texture == texObj) {
      /* re-attaching same texture */
      ASSERT(att->Type == GL_TEXTURE);
-      if (ctx->Driver.FinishRenderTexture)
-	 ctx->Driver.FinishRenderTexture(ctx, att);
   }
   else {
      /* new attachment */
-      if (ctx->Driver.FinishRenderTexture && att->Texture)
-	 ctx->Driver.FinishRenderTexture(ctx, att);
      _mesa_remove_attachment(ctx, att);
      att->Type = GL_TEXTURE;
      assert(!att->Texture);
      _mesa_reference_texobj(&att->Texture, texObj);
   }
+   invalidate_framebuffer(fb);

   /* always update these fields */
   att->TextureLevel = level;
@@ -374,11 +421,7 @@ _mesa_set_texture_attachment(struct gl_context *ctx,
   att->Zoffset = zoffset;
   att->Complete = GL_FALSE;

-   if (_mesa_get_attachment_teximage(att)) {
-      ctx->Driver.RenderTexture(ctx, fb, att);
-   }
-
-   invalidate_framebuffer(fb);
+   _mesa_update_texture_renderbuffer(ctx, fb, att);
 }


@@ -1740,7 +1783,8 @@ check_end_texture_render(struct gl_context *ctx, struct gl_framebuffer *fb)
      GLuint i;
      for (i = 0; i < BUFFER_COUNT; i++) {
         struct gl_renderbuffer_attachment *att = fb->Attachment + i;
-         if (att->Texture && att->Renderbuffer) {
+         struct gl_renderbuffer *rb = att->Renderbuffer;
+         if (rb && rb->NeedsFinishRenderTexture) {
            ctx->Driver.FinishRenderTexture(ctx, att);
         }
      }
--- a/src/mesa/main/fbobject.h
+++ b/src/mesa/main/fbobject.h
@@ -105,6 +105,11 @@ _mesa_set_renderbuffer_attachment(struct gl_context *ctx,
                                  struct gl_renderbuffer_attachment *att,
                                  struct gl_renderbuffer *rb);

+void
+_mesa_update_texture_renderbuffer(struct gl_context *ctx,
+                                  struct gl_framebuffer *fb,
+                                  struct gl_renderbuffer_attachment *att);
+
 extern void
 _mesa_framebuffer_renderbuffer(struct gl_context *ctx,
                               struct gl_framebuffer *fb,
--- a/src/mesa/main/framebuffer.c
+++ b/src/mesa/main/framebuffer.c
@@ -871,18 +871,29 @@ _mesa_dest_buffer_exists(struct gl_context *ctx, GLenum format)
 GLenum
 _mesa_get_color_read_format(struct gl_context *ctx)
 {
-   const GLenum data_type = _mesa_get_format_datatype(
-                               ctx->ReadBuffer->_ColorReadBuffer->Format);
+   if (!ctx->ReadBuffer || !ctx->ReadBuffer->_ColorReadBuffer) {
+      /* The spec is unclear how to handle this case, but NVIDIA's
+       * driver generates GL_INVALID_OPERATION.
+       */
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glGetIntegerv(GL_IMPLEMENTATION_COLOR_READ_FORMAT: "
+                  "no GL_READ_BUFFER)");
+      return GL_NONE;
+   }
+   else {
+      const GLenum format = ctx->ReadBuffer->_ColorReadBuffer->Format;
+      const GLenum data_type = _mesa_get_format_datatype(format);

-   switch (ctx->ReadBuffer->_ColorReadBuffer->Format) {
-   case MESA_FORMAT_ARGB8888:
-      return GL_BGRA;
-   case MESA_FORMAT_RGB565:
-      return GL_BGR;
-   default:
-      if (data_type == GL_UNSIGNED_INT || data_type == GL_INT) {
+      if (format == MESA_FORMAT_ARGB8888)
+         return GL_BGRA;
+      else if (format == MESA_FORMAT_RGB565)
+         return GL_BGR;
+
+      switch (data_type) {
+      case GL_UNSIGNED_INT:
+      case GL_INT:
         return GL_RGBA_INTEGER;
-      } else {
+      default:
         return GL_RGBA;
      }
   }
@@ -895,26 +906,33 @@ _mesa_get_color_read_format(struct gl_context *ctx)
 GLenum
 _mesa_get_color_read_type(struct gl_context *ctx)
 {
-   const GLenum data_type = _mesa_get_format_datatype(
-                               ctx->ReadBuffer->_ColorReadBuffer->Format);
-
-   switch (ctx->ReadBuffer->_ColorReadBuffer->Format) {
-   case MESA_FORMAT_RGB565:
-      return GL_UNSIGNED_SHORT_5_6_5_REV;
-   default:
-      break;
+   if (!ctx->ReadBuffer || !ctx->ReadBuffer->_ColorReadBuffer) {
+      /* The spec is unclear how to handle this case, but NVIDIA's
+       * driver generates GL_INVALID_OPERATION.
+       */
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glGetIntegerv(GL_IMPLEMENTATION_COLOR_READ_TYPE: "
+                  "no GL_READ_BUFFER)");
+      return GL_NONE;
   }
+   else {
+      const GLenum format = ctx->ReadBuffer->_ColorReadBuffer->Format;
+      const GLenum data_type = _mesa_get_format_datatype(format);

-   switch (data_type) {
-   case GL_SIGNED_NORMALIZED:
-      return GL_BYTE;
-   case GL_UNSIGNED_INT:
-   case GL_INT:
-   case GL_FLOAT:
-      return data_type;
-   case GL_UNSIGNED_NORMALIZED:
-   default:
-      return GL_UNSIGNED_BYTE;
+      if (format == MESA_FORMAT_RGB565)
+         return GL_UNSIGNED_SHORT_5_6_5_REV;
+
+      switch (data_type) {
+      case GL_SIGNED_NORMALIZED:
+         return GL_BYTE;
+      case GL_UNSIGNED_INT:
+      case GL_INT:
+      case GL_FLOAT:
+         return data_type;
+      case GL_UNSIGNED_NORMALIZED:
+      default:
+         return GL_UNSIGNED_BYTE;
+      }
   }
 }

--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -35,6 +35,7 @@
 #include "texcompress.h"
 #include "framebuffer.h"
 #include "samplerobj.h"
+#include "stencil.h"

 /* This is a table driven implemetation of the glGet*v() functions.
 * The basic idea is that most getters just look up an int somewhere
@@ -660,7 +661,10 @@ find_custom_value(struct gl_context *ctx, const struct value_desc *d, union valu
      v->value_enum = ctx->Stencil.ZPassFunc[ctx->Stencil.ActiveFace];
      break;
   case GL_STENCIL_REF:
-      v->value_int = ctx->Stencil.Ref[ctx->Stencil.ActiveFace];
+      v->value_int = _mesa_get_stencil_ref(ctx, ctx->Stencil.ActiveFace);
+      break;
+   case GL_STENCIL_BACK_REF:
+      v->value_int = _mesa_get_stencil_ref(ctx, 1);
      break;
   case GL_STENCIL_VALUE_MASK:
      v->value_int = ctx->Stencil.ValueMask[ctx->Stencil.ActiveFace];
--- a/src/mesa/main/get_hash_params.py
+++ b/src/mesa/main/get_hash_params.py
@@ -286,7 +286,7 @@ descriptor=[
  [ "STENCIL_BACK_FUNC", "CONTEXT_ENUM(Stencil.Function[1]), NO_EXTRA" ],
  [ "STENCIL_BACK_VALUE_MASK", "CONTEXT_INT(Stencil.ValueMask[1]), NO_EXTRA" ],
  [ "STENCIL_BACK_WRITEMASK", "CONTEXT_INT(Stencil.WriteMask[1]), NO_EXTRA" ],
-  [ "STENCIL_BACK_REF", "CONTEXT_INT(Stencil.Ref[1]), NO_EXTRA" ],
+  [ "STENCIL_BACK_REF", "LOC_CUSTOM, TYPE_INT, NO_OFFSET, NO_EXTRA" ],
  [ "STENCIL_BACK_FAIL", "CONTEXT_ENUM(Stencil.FailFunc[1]), NO_EXTRA" ],
  [ "STENCIL_BACK_PASS_DEPTH_FAIL", "CONTEXT_ENUM(Stencil.ZFailFunc[1]), NO_EXTRA" ],
  [ "STENCIL_BACK_PASS_DEPTH_PASS", "CONTEXT_ENUM(Stencil.ZPassFunc[1]), NO_EXTRA" ],
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1640,15 +1640,13 @@ struct gl_array_attrib
    * \name Primitive restart controls
    *
    * Primitive restart is enabled if either \c PrimitiveRestart or
-    * \c PrimitiveRestart is set.  If \c PrimitiveRestart is set, then
-    * \c RestartIndex is used as the cut vertex.  Otherwise ~0 is used.
+    * \c PrimitiveRestartFixedIndex is set.
    */
   /*@{*/
   GLboolean PrimitiveRestart;
   GLboolean PrimitiveRestartFixedIndex;
   GLboolean _PrimitiveRestart;
   GLuint RestartIndex;
-   GLuint _RestartIndex;
   /*@}*/

   /* GL_ARB_vertex_buffer_object */
@@ -2669,6 +2667,15 @@ struct gl_renderbuffer
   GLuint Width, Height;
   GLboolean Purgeable;  /**< Is the buffer purgeable under memory pressure? */
   GLboolean AttachedAnytime; /**< TRUE if it was attached to a framebuffer */
+   /**
+    * True for renderbuffers that wrap textures, giving the driver a chance to
+    * flush render caches through the FinishRenderTexture hook.
+    *
+    * Drivers may also set this on renderbuffers other than those generated by
+    * glFramebufferTexture(), though it means FinishRenderTexture() would be
+    * called without a rb->TexImage.
+    */
+   GLboolean NeedsFinishRenderTexture;
   GLubyte NumSamples;
   GLenum InternalFormat; /**< The user-specified format */
   GLenum _BaseFormat;    /**< Either GL_RGB, GL_RGBA, GL_DEPTH_COMPONENT or
@@ -3616,7 +3623,8 @@ struct gl_context
    * associated with uniform blocks by glUniformBlockBinding()'s state in the
    * shader program.
    */
-   struct gl_uniform_buffer_binding *UniformBufferBindings;
+   struct gl_uniform_buffer_binding
+      UniformBufferBindings[MAX_COMBINED_UNIFORM_BUFFERS];

   /*@}*/

--- a/src/mesa/main/stencil.c
+++ b/src/mesa/main/stencil.c
@@ -135,7 +135,6 @@ void GLAPIENTRY
 _mesa_StencilFuncSeparateATI( GLenum frontfunc, GLenum backfunc, GLint ref, GLuint mask )
 {
   GET_CURRENT_CONTEXT(ctx);
-   const GLint stencilMax = (1 << ctx->DrawBuffer->Visual.stencilBits) - 1;

   if (MESA_VERBOSE & VERBOSE_API)
      _mesa_debug(ctx, "glStencilFuncSeparateATI()\n");
@@ -151,8 +150,6 @@ _mesa_StencilFuncSeparateATI( GLenum frontfunc, GLenum backfunc, GLint ref, GLui
      return;
   }

-   ref = CLAMP( ref, 0, stencilMax );
-
   /* set both front and back state */
   if (ctx->Stencil.Function[0] == frontfunc &&
       ctx->Stencil.Function[1] == backfunc &&
@@ -192,7 +189,6 @@ void GLAPIENTRY
 _mesa_StencilFunc( GLenum func, GLint ref, GLuint mask )
 {
   GET_CURRENT_CONTEXT(ctx);
-   const GLint stencilMax = (1 << ctx->DrawBuffer->Visual.stencilBits) - 1;
   const GLint face = ctx->Stencil.ActiveFace;

   if (MESA_VERBOSE & VERBOSE_API)
@@ -203,8 +199,6 @@ _mesa_StencilFunc( GLenum func, GLint ref, GLuint mask )
      return;
   }

-   ref = CLAMP( ref, 0, stencilMax );
-
   if (face != 0) {
      if (ctx->Stencil.Function[face] == func &&
          ctx->Stencil.ValueMask[face] == mask &&
@@ -467,7 +461,6 @@ void GLAPIENTRY
 _mesa_StencilFuncSeparate(GLenum face, GLenum func, GLint ref, GLuint mask)
 {
   GET_CURRENT_CONTEXT(ctx);
-   const GLint stencilMax = (1 << ctx->DrawBuffer->Visual.stencilBits) - 1;

   if (MESA_VERBOSE & VERBOSE_API)
      _mesa_debug(ctx, "glStencilFuncSeparate()\n");
@@ -481,8 +474,6 @@ _mesa_StencilFuncSeparate(GLenum face, GLenum func, GLint ref, GLuint mask)
      return;
   }

-   ref = CLAMP(ref, 0, stencilMax);
-
   FLUSH_VERTICES(ctx, _NEW_STENCIL);

   if (face != GL_BACK) {
--- a/src/mesa/main/stencil.h
+++ b/src/mesa/main/stencil.h
@@ -78,4 +78,18 @@ _mesa_update_stencil(struct gl_context *ctx);
 extern void 
 _mesa_init_stencil( struct gl_context * ctx );

+/* From the GL 4.3 spec, 17.3.5:
+ *    "Stencil comparison operations and queries of <ref> clamp its value
+ *    to the range [0, 2^s-1], where <s> is the number of bits in the
+ *    stencil buffer attached to the draw framebuffer."
+ */
+
+static inline GLint
+_mesa_get_stencil_ref(struct gl_context const *ctx, int face)
+{
+   GLint stencilMax = (1 << ctx->DrawBuffer->Visual.stencilBits) - 1;
+   GLint ref = ctx->Stencil.Ref[face];
+   return CLAMP(ref, 0, stencilMax);
+}
+
 #endif
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -313,15 +313,17 @@ _mesa_base_tex_format( struct gl_context *ctx, GLint internalFormat )
      case GL_SRGB_EXT:
      case GL_SRGB8_EXT:
      case GL_COMPRESSED_SRGB_EXT:
-      case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT:
         return GL_RGB;
+      case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT:
+         return ctx->Extensions.EXT_texture_compression_s3tc ? GL_RGB : -1;
      case GL_SRGB_ALPHA_EXT:
      case GL_SRGB8_ALPHA8_EXT:
      case GL_COMPRESSED_SRGB_ALPHA_EXT:
+         return GL_RGBA;
      case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT:
      case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT:
      case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT:
-         return GL_RGBA;
+         return ctx->Extensions.EXT_texture_compression_s3tc ? GL_RGBA : -1;
      case GL_SLUMINANCE_ALPHA_EXT:
      case GL_SLUMINANCE8_ALPHA8_EXT:
      case GL_COMPRESSED_SLUMINANCE_ALPHA_EXT:
@@ -1478,13 +1480,13 @@ error_check_subtexture_dimensions(struct gl_context *ctx,
   }

   /* check xoffset and width */
-   if (xoffset < -destImage->Border) {
+   if (xoffset < - (GLint) destImage->Border) {
      _mesa_error(ctx, GL_INVALID_VALUE, "%s%dD(xoffset)",
                  function, dims);
      return GL_TRUE;
   }

-   if (xoffset + subWidth > destImage->Width) {
+   if (xoffset + subWidth > (GLint) destImage->Width) {
      _mesa_error(ctx, GL_INVALID_VALUE, "%s%dD(xoffset+width)",
                  function, dims);
      return GL_TRUE;
@@ -1498,7 +1500,7 @@ error_check_subtexture_dimensions(struct gl_context *ctx,
                     function, dims);
         return GL_TRUE;
      }
-      if (yoffset + subHeight > destImage->Height) {
+      if (yoffset + subHeight > (GLint) destImage->Height) {
         _mesa_error(ctx, GL_INVALID_VALUE, "%s%dD(yoffset+height)",
                     function, dims);
         return GL_TRUE;
@@ -1536,14 +1538,20 @@ error_check_subtexture_dimensions(struct gl_context *ctx,
         return GL_TRUE;
      }

-      /* size must be multiple of bw by bh or equal to whole texture size */
-      if ((subWidth % bw != 0) && subWidth != destImage->Width) {
+      /* The size must be a multiple of bw x bh, or we must be using a
+       * offset+size that exactly hits the edge of the image.  This
+       * is important for small mipmap levels (1x1, 2x1, etc) and for
+       * NPOT textures.
+       */
+      if ((subWidth % bw != 0) &&
+          (xoffset + subWidth != (GLint) destImage->Width)) {
         _mesa_error(ctx, GL_INVALID_OPERATION,
                     "%s%dD(width = %d)", function, dims, subWidth);
         return GL_TRUE;
      }

-      if ((subHeight % bh != 0) && subHeight != destImage->Height) {
+      if ((subHeight % bh != 0) &&
+          (yoffset + subHeight != (GLint) destImage->Height)) {
         _mesa_error(ctx, GL_INVALID_OPERATION,
                     "%s%dD(height = %d)", function, dims, subHeight);
         return GL_TRUE;
@@ -2696,8 +2704,7 @@ check_rtt_cb(GLuint key, void *data, void *userData)
             att->TextureLevel == level &&
             att->CubeMapFace == face) {
            ASSERT(_mesa_get_attachment_teximage(att));
-            /* Tell driver about the new renderbuffer texture */
-            ctx->Driver.RenderTexture(ctx, ctx->DrawBuffer, att);
+            _mesa_update_texture_renderbuffer(ctx, ctx->DrawBuffer, att);
            /* Mark fb status as indeterminate to force re-validation */
            fb->_Status = 0;
         }
--- a/src/mesa/main/varray.c
+++ b/src/mesa/main/varray.c
@@ -1112,10 +1112,9 @@ _mesa_PrimitiveRestartIndex(GLuint index)
      return;
   }

-   ctx->Array.RestartIndex = index;
-   if (ctx->Array.PrimitiveRestart && ctx->Array._RestartIndex != index) {
+   if (ctx->Array.RestartIndex != index) {
      FLUSH_VERTICES(ctx, _NEW_TRANSFORM);
-      ctx->Array._RestartIndex = index;
+      ctx->Array.RestartIndex = index;
   }
 }

@@ -1153,6 +1152,30 @@ _mesa_VertexAttribDivisor(GLuint index, GLuint divisor)
 }


+unsigned
+_mesa_primitive_restart_index(const struct gl_context *ctx, GLenum ib_type)
+{
+   /* From the OpenGL 4.3 core specification, page 302:
+    * "If both PRIMITIVE_RESTART and PRIMITIVE_RESTART_FIXED_INDEX are
+    *  enabled, the index value determined by PRIMITIVE_RESTART_FIXED_INDEX
+    *  is used."
+    */
+   if (ctx->Array.PrimitiveRestartFixedIndex) {
+      switch (ib_type) {
+      case GL_UNSIGNED_BYTE:
+         return 0xff;
+      case GL_UNSIGNED_SHORT:
+         return 0xffff;
+      case GL_UNSIGNED_INT:
+         return 0xffffffff;
+      default:
+         assert(!"_mesa_primitive_restart_index: Invalid index buffer type.");
+      }
+   }
+
+   return ctx->Array.RestartIndex;
+}
+

 /**
 * Copy one client vertex array to another.
--- a/src/mesa/main/varray.h
+++ b/src/mesa/main/varray.h
@@ -248,6 +248,8 @@ _mesa_PrimitiveRestartIndex(GLuint index);
 extern void GLAPIENTRY
 _mesa_VertexAttribDivisor(GLuint index, GLuint divisor);

+extern unsigned
+_mesa_primitive_restart_index(const struct gl_context *ctx, GLenum ib_type);

 extern void
 _mesa_copy_client_array(struct gl_context *ctx,
--- a/src/mesa/main/version.h
+++ b/src/mesa/main/version.h
@@ -34,8 +34,8 @@ struct gl_context;
 /* Mesa version */
 #define MESA_MAJOR 9
 #define MESA_MINOR 1
-#define MESA_PATCH 3
-#define MESA_VERSION_STRING "9.1.3"
+#define MESA_PATCH 4
+#define MESA_VERSION_STRING "9.1.4"

 /* To make version comparison easy */
 #define MESA_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2744,6 +2744,8 @@ ir_to_mesa_visitor::copy_propagate(void)
      /* If this is a copy, add it to the ACP. */
      if (inst->op == OPCODE_MOV &&
 	  inst->dst.file == PROGRAM_TEMPORARY &&
+	  !(inst->dst.file == inst->src[0].file &&
+	    inst->dst.index == inst->src[0].index) &&
 	  !inst->dst.reladdr &&
 	  !inst->saturate &&
 	  !inst->src[0].reladdr &&
--- a/src/mesa/state_tracker/st_atom_constbuf.c
+++ b/src/mesa/state_tracker/st_atom_constbuf.c
@@ -192,13 +192,25 @@ static void st_bind_ubos(struct st_context *st,

      binding = &st->ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
      st_obj = st_buffer_object(binding->BufferObject);
-      pipe_resource_reference(&cb.buffer, st_obj->buffer);

-      cb.buffer_size = st_obj->buffer->width0 - binding->Offset;
-      cb.buffer_offset = binding->Offset;
+      cb.buffer = st_obj->buffer;
+
+      if (cb.buffer) {
+         cb.buffer_offset = binding->Offset;
+         cb.buffer_size = cb.buffer->width0 - binding->Offset;
+
+         /* AutomaticSize is FALSE if the buffer was set with BindBufferRange.
+          * Take the minimum just to be sure.
+          */
+         if (!binding->AutomaticSize)
+            cb.buffer_size = MIN2(cb.buffer_size, binding->Size);
+      }
+      else {
+         cb.buffer_offset = 0;
+         cb.buffer_size = 0;
+      }

      st->pipe->set_constant_buffer(st->pipe, shader_type, 1 + i, &cb);
-      pipe_resource_reference(&cb.buffer, NULL);
   }
 }

--- a/src/mesa/state_tracker/st_atom_depth.c
+++ b/src/mesa/state_tracker/st_atom_depth.c
@@ -41,6 +41,9 @@
 #include "pipe/p_defines.h"
 #include "cso_cache/cso_context.h"

+#include "main/core.h"
+#include "main/stencil.h"
+

 /**
 * Convert an OpenGL compare mode to a pipe tokens.
@@ -116,7 +119,7 @@ update_depth_stencil_alpha(struct st_context *st)
      dsa->stencil[0].zpass_op = gl_stencil_op_to_pipe(ctx->Stencil.ZPassFunc[0]);
      dsa->stencil[0].valuemask = ctx->Stencil.ValueMask[0] & 0xff;
      dsa->stencil[0].writemask = ctx->Stencil.WriteMask[0] & 0xff;
-      sr.ref_value[0] = ctx->Stencil.Ref[0] & 0xff;
+      sr.ref_value[0] = _mesa_get_stencil_ref(ctx, 0);

      if (ctx->Stencil._TestTwoSide) {
         const GLuint back = ctx->Stencil._BackFace;
@@ -127,7 +130,7 @@ update_depth_stencil_alpha(struct st_context *st)
         dsa->stencil[1].zpass_op = gl_stencil_op_to_pipe(ctx->Stencil.ZPassFunc[back]);
         dsa->stencil[1].valuemask = ctx->Stencil.ValueMask[back] & 0xff;
         dsa->stencil[1].writemask = ctx->Stencil.WriteMask[back] & 0xff;
-         sr.ref_value[1] = ctx->Stencil.Ref[back] & 0xff;
+         sr.ref_value[1] = _mesa_get_stencil_ref(ctx, back);
      }
      else {
         /* This should be unnecessary. Drivers must not expect this to
--- a/src/mesa/state_tracker/st_cb_fbo.c
+++ b/src/mesa/state_tracker/st_cb_fbo.c
@@ -388,8 +388,8 @@ st_render_texture(struct gl_context *ctx,
 {
   struct st_context *st = st_context(ctx);
   struct pipe_context *pipe = st->pipe;
-   struct st_renderbuffer *strb;
-   struct gl_renderbuffer *rb;
+   struct gl_renderbuffer *rb = att->Renderbuffer;
+   struct st_renderbuffer *strb = st_renderbuffer(rb);
   struct pipe_resource *pt;
   struct st_texture_object *stObj;
   const struct gl_texture_image *texImage;
@@ -404,24 +404,6 @@ st_render_texture(struct gl_context *ctx,
   /* get pointer to texture image we're rendeing to */
   texImage = _mesa_get_attachment_teximage(att);

-   /* create new renderbuffer which wraps the texture image.
-    * Use the texture's name as the renderbuffer's name so that we have
-    * something that's non-zero (to determine vertical orientation) and
-    * possibly helpful for debugging.
-    */
-   rb = st_new_renderbuffer(ctx, att->Texture->Name);
-   if (!rb) {
-      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glFramebufferTexture()");
-      return;
-   }
-
-   _mesa_reference_renderbuffer(&att->Renderbuffer, rb);
-   assert(rb->RefCount == 1);
-   rb->AllocStorage = NULL; /* should not get called */
-   strb = st_renderbuffer(rb);
-
-   assert(strb->Base.RefCount > 0);
-
   /* get the texture for the texture object */
   stObj = st_texture_object(att->Texture);

--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@@ -231,7 +231,7 @@ st_draw_vbo(struct gl_context *ctx,
                                   nr_prims);

      if (!setup_index_buffer(st, ib, &ibuffer)) {
-         /* out of memory */
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glBegin/DrawElements/DrawArray");
         return;
      }

@@ -245,7 +245,7 @@ st_draw_vbo(struct gl_context *ctx,
       * so we only set these fields for indexed drawing:
       */
      info.primitive_restart = ctx->Array._PrimitiveRestart;
-      info.restart_index = ctx->Array._RestartIndex;
+      info.restart_index = ctx->Array.RestartIndex;
   }
   else {
      /* Transform feedback drawing is always non-indexed. */
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -71,7 +71,6 @@ void st_init_limits(struct st_context *st)
   struct gl_constants *c = &st->ctx->Const;
   gl_shader_type sh;
   boolean can_ubo = TRUE;
-   int max_const_buffers;

   c->MaxTextureLevels
      = _min(screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS),
@@ -118,9 +117,9 @@ void st_init_limits(struct st_context *st)
   c->MaxViewportHeight =
   c->MaxRenderbufferSize = c->MaxTextureRectSize;

-   c->MaxDrawBuffers
-      = _clamp(screen->get_param(screen, PIPE_CAP_MAX_RENDER_TARGETS),
-              1, MAX_DRAW_BUFFERS);
+   c->MaxDrawBuffers = c->MaxColorAttachments =
+      _clamp(screen->get_param(screen, PIPE_CAP_MAX_RENDER_TARGETS),
+             1, MAX_DRAW_BUFFERS);

   c->MaxDualSourceDrawBuffers
      = _clamp(screen->get_param(screen, PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS),
@@ -156,13 +155,16 @@ void st_init_limits(struct st_context *st)
   c->MaxTextureLodBias
      = screen->get_paramf(screen, PIPE_CAPF_MAX_TEXTURE_LOD_BIAS);

-   c->MaxDrawBuffers
-      = CLAMP(screen->get_param(screen, PIPE_CAP_MAX_RENDER_TARGETS),
-              1, MAX_DRAW_BUFFERS);
-
   c->QuadsFollowProvokingVertexConvention = screen->get_param(
      screen, PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION);

+   c->MaxUniformBlockSize =
+      screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
+                               PIPE_SHADER_CAP_MAX_CONSTS) * 16;
+   if (c->MaxUniformBlockSize < 16384) {
+      can_ubo = FALSE;
+   }
+
   for (sh = 0; sh < MESA_SHADER_TYPES; ++sh) {
      struct gl_shader_compiler_options *options =
         &st->ctx->ShaderCompilerOptions[sh];
@@ -183,17 +185,34 @@ void st_init_limits(struct st_context *st)
         continue;
      }

-      pc->MaxNativeInstructions    = screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_INSTRUCTIONS);
-      pc->MaxNativeAluInstructions = screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS);
-      pc->MaxNativeTexInstructions = screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS);
-      pc->MaxNativeTexIndirections = screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS);
-      pc->MaxNativeAttribs         = screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_INPUTS);
-      pc->MaxNativeTemps           = screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_TEMPS);
-      pc->MaxNativeAddressRegs     = screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_ADDRS);
-      pc->MaxNativeParameters      = screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_CONSTS);
-      pc->MaxUniformComponents     = 4 * MIN2(pc->MaxNativeParameters, MAX_UNIFORMS);
-      /* raise MaxParameters if native support is higher */
-      pc->MaxParameters            = MAX2(pc->MaxParameters, pc->MaxNativeParameters);
+      pc->MaxInstructions    = pc->MaxNativeInstructions    =
+         screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_INSTRUCTIONS);
+      pc->MaxAluInstructions = pc->MaxNativeAluInstructions =
+         screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS);
+      pc->MaxTexInstructions = pc->MaxNativeTexInstructions =
+         screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS);
+      pc->MaxTexIndirections = pc->MaxNativeTexIndirections =
+         screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS);
+      pc->MaxAttribs         = pc->MaxNativeAttribs         =
+         screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_INPUTS);
+      pc->MaxTemps           = pc->MaxNativeTemps           =
+         screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_TEMPS);
+      pc->MaxAddressRegs     = pc->MaxNativeAddressRegs     =
+         screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_ADDRS);
+      pc->MaxParameters      = pc->MaxNativeParameters      =
+         screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_CONSTS);
+
+      pc->MaxUniformComponents = 4 * MIN2(pc->MaxNativeParameters, MAX_UNIFORMS);
+
+      pc->MaxUniformBlocks =
+         screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_CONST_BUFFERS);
+      if (pc->MaxUniformBlocks)
+         pc->MaxUniformBlocks -= 1; /* The first one is for ordinary uniforms. */
+      pc->MaxUniformBlocks = _min(pc->MaxUniformBlocks, MAX_UNIFORM_BUFFERS);
+
+      pc->MaxCombinedUniformComponents = (pc->MaxUniformComponents +
+                                          c->MaxUniformBlockSize / 4 *
+                                          pc->MaxUniformBlocks);

      /* Gallium doesn't really care about local vs. env parameters so use the
       * same limits.
@@ -220,15 +239,9 @@ void st_init_limits(struct st_context *st)
      options->EmitNoIndirectUniform = !screen->get_shader_param(screen, sh,
                                        PIPE_SHADER_CAP_INDIRECT_CONST_ADDR);

-      if (pc->MaxNativeInstructions) {
-         if (options->EmitNoIndirectUniform)
+      if (pc->MaxNativeInstructions &&
+          (options->EmitNoIndirectUniform || pc->MaxUniformBlocks < 12)) {
         can_ubo = FALSE;
-
-         max_const_buffers = screen->get_shader_param(screen, sh,
-                                                      PIPE_SHADER_CAP_MAX_CONST_BUFFERS);
-         /* we need 13 buffers - 1 constant, 12 UBO */
-         if (max_const_buffers < 13)
-            can_ubo = FALSE;
      }

      if (options->EmitNoLoops)
@@ -238,6 +251,8 @@ void st_init_limits(struct st_context *st)
      options->LowerClipDistance = true;
   }

+   c->VertexProgram.MaxAttribs = MIN2(c->VertexProgram.MaxAttribs, 16);
+
   /* PIPE_SHADER_CAP_MAX_INPUTS for the FS specifies the maximum number
    * of inputs. It's always 2 colors + N generic inputs. */
   c->MaxVarying = screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
@@ -267,8 +282,13 @@ void st_init_limits(struct st_context *st)

   if (can_ubo) {
      st->ctx->Extensions.ARB_uniform_buffer_object = GL_TRUE;
-      st->ctx->Const.UniformBufferOffsetAlignment =
+      c->UniformBufferOffsetAlignment =
         screen->get_param(screen, PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT);
+      c->MaxCombinedUniformBlocks = c->MaxUniformBufferBindings =
+         c->VertexProgram.MaxUniformBlocks +
+         c->GeometryProgram.MaxUniformBlocks +
+         c->FragmentProgram.MaxUniformBlocks;
+      assert(c->MaxCombinedUniformBlocks <= MAX_COMBINED_UNIFORM_BUFFERS);
   }
 }

--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -3506,6 +3506,8 @@ glsl_to_tgsi_visitor::copy_propagate(void)
      /* If this is a copy, add it to the ACP. */
      if (inst->op == TGSI_OPCODE_MOV &&
          inst->dst.file == PROGRAM_TEMPORARY &&
+          !(inst->dst.file == inst->src[0].file &&
+             inst->dst.index == inst->src[0].index) &&
          !inst->dst.reladdr &&
          !inst->saturate &&
          !inst->src[0].reladdr &&
--- a/src/mesa/swrast/s_stencil.c
+++ b/src/mesa/swrast/s_stencil.c
@@ -28,6 +28,8 @@
 #include "main/imports.h"
 #include "main/format_pack.h"
 #include "main/format_unpack.h"
+#include "main/core.h"
+#include "main/stencil.h"

 #include "s_context.h"
 #include "s_depth.h"
@@ -128,7 +130,7 @@ apply_stencil_op(const struct gl_context *ctx, GLenum oper, GLuint face,
                 GLuint n, GLubyte stencil[], const GLubyte mask[],
                 GLint stride)
 {
-   const GLubyte ref = ctx->Stencil.Ref[face];
+   const GLubyte ref = _mesa_get_stencil_ref(ctx, face);
   const GLubyte wrtmask = ctx->Stencil.WriteMask[face];
   const GLubyte invmask = (GLubyte) (~wrtmask);
   GLuint i, j;
@@ -215,7 +217,7 @@ do_stencil_test(struct gl_context *ctx, GLuint face, GLuint n,
   GLboolean allfail = GL_FALSE;
   GLuint i, j;
   const GLuint valueMask = ctx->Stencil.ValueMask[face];
-   const GLubyte ref = (GLubyte) (ctx->Stencil.Ref[face] & valueMask);
+   const GLubyte ref = (GLubyte) (_mesa_get_stencil_ref(ctx, face) & valueMask);
   GLubyte s;

   /*
--- a/Show More
+++ b/Show More