docs: Add 8.0.2 release notes

Signed-off-by: Jakob Bornecrantz <jakob@vmware.com>
mesa: Bump version number to 8.0.2
2012-03-21 18:01:20 +00:00 · 2012-03-21 17:55:53 +00:00 · 2012-03-21 17:55:53 +00:00 · 2012-03-21 17:55:53 +00:00 · 2012-03-20 12:20:04 -07:00 · 2012-03-20 12:19:58 -07:00
85 changed files with 2175 additions and 766 deletions
--- a/8
+++ b/8
@@ -184,7 +184,7 @@ ultrix-gcc:

 # Rules for making release tarballs

-PACKAGE_VERSION=8.0
+PACKAGE_VERSION=8.0.2
 PACKAGE_DIR = Mesa-$(PACKAGE_VERSION)
 PACKAGE_NAME = MesaLib-$(PACKAGE_VERSION)

@@ -199,6 +199,12 @@ EXTRA_FILES = \
 	src/glsl/glcpp/glcpp-lex.c			\
 	src/glsl/glcpp/glcpp-parse.c			\
 	src/glsl/glcpp/glcpp-parse.h			\
+	src/mesa/main/api_exec_es1.c			\
+	src/mesa/main/api_exec_es1_dispatch.h		\
+	src/mesa/main/api_exec_es1_remap_helper.h	\
+	src/mesa/main/api_exec_es2.c			\
+	src/mesa/main/api_exec_es2_dispatch.h		\
+	src/mesa/main/api_exec_es2_remap_helper.h	\
 	src/mesa/program/lex.yy.c			\
 	src/mesa/program/program_parse.tab.c		\
 	src/mesa/program/program_parse.tab.h
--- a/configs/darwin
+++ b/configs/darwin
@@ -9,8 +9,8 @@ INSTALL_DIR = /usr/X11
 X11_DIR = $(INSTALL_DIR)

 # Compiler and flags
-CC = gcc
-CXX = g++
+CC = $(shell xcrun -find cc)
+CXX = $(shell xcrun -find c++)
 PIC_FLAGS = -fPIC
 DEFINES =  -D_DARWIN_C_SOURCE -DPTHREADS -D_GNU_SOURCE \
 	   -DGLX_ALIAS_UNSUPPORTED \
@@ -24,11 +24,14 @@ DEFINES =  -D_DARWIN_C_SOURCE -DPTHREADS -D_GNU_SOURCE \
 # -DIN_DRI_DRIVER

 ARCH_FLAGS += $(RC_CFLAGS)
+INCLUDE_FLAGS = -I$(INSTALL_DIR)/include -I$(X11_DIR)/include
+OPT_FLAGS = -g3 -gdwarf-2 -Os -ffast-math -fno-strict-aliasing
+WARN_FLAGS = -Wall -Wmissing-prototypes

-CFLAGS =  -ggdb3 -Os -Wall -Wmissing-prototypes -std=c99 -ffast-math -fno-strict-aliasing -fvisibility=hidden \
-	-I$(INSTALL_DIR)/include -I$(X11_DIR)/include $(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(ASM_FLAGS) $(DEFINES)
-CXXFLAGS =  -ggdb3 -Os -Wall -fno-strict-aliasing -fvisibility=hidden \
-	-I$(INSTALL_DIR)/include -I$(X11_DIR)/include $(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(ASM_FLAGS) $(DEFINES)
+CFLAGS = -std=c99 -fvisibility=hidden \
+	$(OPT_FLAGS) $(WARN_FLAGS) $(INCLUDE_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(ASM_FLAGS) $(DEFINES) $(EXTRA_CFLAGS)
+CXXFLAGS = -fvisibility=hidden \
+	$(OPT_FLAGS) $(WARN_FLAGS) $(INCLUDE_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(ASM_FLAGS) $(DEFINES) $(EXTRA_CFLAGS)

 # Library names (actual file names)
 GL_LIB_NAME = lib$(GL_LIB).dylib
@@ -44,10 +47,10 @@ GLW_LIB_GLOB = lib$(GLW_LIB).*dylib
 OSMESA_LIB_GLOB = lib$(OSMESA_LIB).*dylib
 VG_LIB_GLOB = lib$(VG_LIB).*dylib

-GL_LIB_DEPS = -L$(INSTALL_DIR)/$(LIB_DIR) -L$(X11_DIR)/$(LIB_DIR) -lX11 -lXext -lm -lpthread
-OSMESA_LIB_DEPS =
-GLU_LIB_DEPS = -L$(TOP)/$(LIB_DIR) -l$(GL_LIB)
-GLW_LIB_DEPS = -L$(TOP)/$(LIB_DIR) -l$(GL_LIB) -L$(INSTALL_DIR)/$(LIB_DIR) -L$(X11_DIR)/$(LIB_DIR) -lX11 -lXt
+GL_LIB_DEPS = -L$(INSTALL_DIR)/$(LIB_DIR) -L$(X11_DIR)/$(LIB_DIR) -lX11-xcb -lxcb -lX11 -lXext $(EXTRA_LDFLAGS)
+OSMESA_LIB_DEPS = $(EXTRA_LDFLAGS)
+GLU_LIB_DEPS = -L$(TOP)/$(LIB_DIR) -l$(GL_LIB) $(EXTRA_LDFLAGS)
+GLW_LIB_DEPS = -L$(TOP)/$(LIB_DIR) -l$(GL_LIB) -L$(INSTALL_DIR)/$(LIB_DIR) -L$(X11_DIR)/$(LIB_DIR) -lX11 -lXt $(EXTRA_LDFLAGS)

 SRC_DIRS = glsl mapi/glapi mapi/vgapi glx/apple mesa gallium glu
 GLU_DIRS = sgi
--- a/configs/darwin-fat-intel
+++ b/configs/darwin-fat-intel
@@ -0,0 +1,7 @@
+# Configuration for Darwin / MacOS X, making 32bit and 64bit fat dynamic libs for intel
+
+RC_CFLAGS=-arch i386 -arch x86_64
+
+include $(TOP)/configs/darwin
+
+CONFIG_NAME = darwin-fat-intel
--- a/docs/contents.html
+++ b/docs/contents.html
@@ -52,7 +52,6 @@ a:visited {

 <b>User Topics</b>
 <ul>
-<li><a href="shading.html" target="MainFrame">Shading Language</a>
 <li><a href="egl.html" target="MainFrame">EGL</a>
 <li><a href="opengles.html" target="MainFrame">OpenGL ES</a>
 <li><a href="openvg.html" target="MainFrame">OpenVG / Vega</a>
--- a/docs/news.html
+++ b/docs/news.html
@@ -11,6 +11,32 @@
 <H1>News</H1>


+<h2>March 21, 2012</h2>
+
+<p>
+<a href="relnotes-8.0.2.html">Mesa 8.0.2</a> is released.
+This is a bug fix release.
+</p>
+
+
+<h2>February 16, 2012</h2>
+
+<p>
+<a href="relnotes-8.0.1.html">Mesa 8.0.1</a> is released.
+This is a bug fix release.
+</p>
+
+
+<h2>February 9, 2012</h2>
+
+<p>
+<a href="relnotes-8.0.html">Mesa 8.0</a> is released.
+This is the first version of Mesa to support OpenGL 3.0 and GLSL 1.30
+(with the i965 driver).
+See the release notes for more information about the release.
+</p>
+
+
 <h2>November 27, 2011</h2>

 <p>
--- a/docs/relnotes-8.0.1.html
+++ b/docs/relnotes-8.0.1.html
@@ -0,0 +1,153 @@
+<HTML>
+
+<head>
+<TITLE>Mesa Release Notes</TITLE>
+<link rel="stylesheet" type="text/css" href="mesa.css">
+<meta http-equiv="content-type" content="text/html; charset=utf-8" />
+</head>
+
+<BODY>
+
+<body bgcolor="#eeeeee">
+
+<H1>Mesa 8.0.1 Release Notes / February 16, 2012</H1>
+
+<p>
+Mesa 8.0.1 is a bug fix release which fixes bugs found since the 8.0 release.
+</p>
+<p>
+Mesa 8.0 implements the OpenGL 3.0 API, but the version reported by
+glGetString(GL_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.0.
+</p>
+<p>
+See the <a href="install.html">Compiling/Installing page</a> for prerequisites
+for DRI hardware acceleration.
+</p>
+
+
+<h2>MD5 checksums</h2>
+<pre>
+4855c2d93bd2ebd43f384bdcc92c9a27  MesaLib-8.0.1.tar.gz
+24eeebf66971809d8f40775a379b36c9  MesaLib-8.0.1.tar.bz2
+54e745d14dac5717f7f65b4e2d5c1df2  MesaLib-8.0.1.zip
+</pre>
+
+<h2>New features</h2>
+<p>None.</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28924">Bug 28924</a> - [ILK] piglit tex-border-1 fail</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=40864">Bug 40864</a> - [bisected pineview] oglc pxconv-gettex(basic.allCases) fails on pineview</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=43327">Bug 43327</a> - [bisected SNB] HiZ make many oglc cases regressed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=44333">Bug 44333</a> - [bisected] Color distortion with xbmc mediaplayer</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=44927">Bug 44927</a> - [SNB IVB regression] gl-117 abort when click</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=45221">Bug 45221</a> - [bisected IVB] glean/fbo regression in stencil-only case</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=45877">Bug 45877</a> - main/image.c:1597: _mesa_convert_colors: Assertion `dstType == 0x1406' failed.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=45578">Bug 45578</a> - main/image.c:1659: _mesa_convert_colors: Assertion `dstType == 0x1403' failed.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=45872">Bug 45872</a> - [bisected PNV] oglc mustpass(basic.stipple) regressed on pineview</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=45876">Bug 45876</a> - [PNV]oglc texenv(basic.allCases) regressed on pineview</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=45917">Bug 45917</a> - [PNV] Regression in Piglit test general/two-sided-lighting-separate-specular</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=45943">Bug 45943</a> - [r300g] r300_emit.c:365:r300_emit_aa_state: Assertion `(aa-d&gt;dest)-&gt;cs_buf' failed.</li>
+
+<!-- <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=">Bug </a> - </li> -->
+
+</ul>
+
+
+<h2>Changes</h2>
+<p>The full set of changes can be viewed by using the following GIT command:</p>
+
+<pre>
+  git log mesa-8.0..mesa-8.0.1
+</pre>
+
+<p>Alex Deucher (2):
+<ul>
+  <li>r600g: fix tex tile_type offset for cayman</li>
+  <li>r600g: 128 bit formats require tile_type = 1 on cayman</li>
+</ul></p>
+
+<p>Anuj Phogat (2):
+<ul>
+  <li>meta: Add pixel store/pack operations in decompress_texture_image</li>
+  <li>meta: Avoid FBO resizing/reallocating in decompress_texture_image</li>
+</ul></p>
+
+<p>Brian Paul (6):
+<ul>
+  <li>docs: add news item for 8.0 release</li>
+  <li>docs: update info about supported systems, GPUs, APIs</li>
+  <li>docs: add VMware link</li>
+  <li>docs: remove link to the GLSL compiler page</li>
+  <li>mesa: fix proxy texture target initialization</li>
+  <li>swrast: fix span color type selection</li>
+</ul></p>
+
+<p>Chad Versace (2):
+<ul>
+  <li>i965: Rewrite the HiZ op</li>
+  <li>i965: Remove file i965/junk, accidentally added in 7b36c68</li>
+</ul></p>
+
+<p>Dave Airlie (1):
+<ul>
+  <li>st/mesa: only resolve if number of samples is &gt; 1</li>
+</ul></p>
+
+<p>Eric Anholt (3):
+<ul>
+  <li>i965: Fix HiZ change compiler warning.</li>
+  <li>i965: Report the failure message when failing to compile the fragment shader.</li>
+  <li>i965/fs: Enable register spilling on gen7 too.</li>
+</ul></p>
+
+<p>Ian Romanick (4):
+<ul>
+  <li>docs: Add 8.0 MD5 checksums</li>
+  <li>glapi: Include GLES2 headers for ES2 extension functions</li>
+  <li>swrast: Only avoid empty _TexEnvPrograms</li>
+  <li>mesa: Bump version number to 8.0.1</li>
+</ul></p>
+
+<p>Kenneth Graunke (4):
+<ul>
+  <li>i965: Fix border color on Ironlake.</li>
+  <li>i965/fs: Add a new fs_inst::regs_written function.</li>
+  <li>i965/fs: Take # of components into account in try_rewrite_rhs_to_dst.</li>
+  <li>i965: Emit Ivybridge VS workaround flushes.</li>
+</ul></p>
+
+<p>Mathias Fröhlich (1):
+<ul>
+  <li>state_stracker: Fix access to uninitialized memory.</li>
+</ul></p>
+
+<p>Paul Berry (1):
+<ul>
+  <li>i915: Fix type of "specoffset" variable.</li>
+</ul></p>
+
+<p>Simon Farnsworth (1):
+<ul>
+  <li>r600g: Use a fake reloc to sleep for fences</li>
+</ul></p>
+
+</body>
+</html>
--- a/docs/relnotes-8.0.2.html
+++ b/docs/relnotes-8.0.2.html
@@ -0,0 +1,158 @@
+<HTML>
+
+<head>
+<TITLE>Mesa Release Notes</TITLE>
+<link rel="stylesheet" type="text/css" href="mesa.css">
+<meta http-equiv="content-type" content="text/html; charset=utf-8" />
+</head>
+
+<BODY>
+
+<body bgcolor="#eeeeee">
+
+<H1>Mesa 8.0.2 Release Notes / March 21, 2012</H1>
+
+<p>
+Mesa 8.0.2 is a bug fix release which fixes bugs found since the 8.0.1 release.
+</p>
+<p>
+Mesa 8.0.2 implements the OpenGL 3.0 API, but the version reported by
+glGetString(GL_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.0.
+</p>
+<p>
+See the <a href="install.html">Compiling/Installing page</a> for prerequisites
+for DRI hardware acceleration.
+</p>
+
+
+<h2>MD5 checksums</h2>
+<pre>
+tdb
+</pre>
+
+<h2>New features</h2>
+<p>None.</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=38720">Bug 38720</a> - [SNB] Trine triggers a GPU hang</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=40059">Bug 40059</a> - [SNB] hang in "Amnesia: The Dark Descent" demo</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=45216">Bug 45216</a> - [SNB] GPU hang in OilRush</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=46631">Bug 46631</a> - It's really hard to hit the fast path for the fallback glReadPixels code</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=46679">Bug 46679</a> - glReadPixels on a luminance texture returns the wrong values</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=46311">Bug 46311</a> - Missing support of point size in Mesa core</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=46665">Bug 46665</a> - [PNV] webgl conformance case max texture fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=45975">Bug 45975</a> - [Gen4 + ILK] render with pointcoord will fail to render</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=46666">Bug 46666</a> - [PNV] webgl conformance case NPOT case fails with TEXTURE_MIN_FILTER set to LINEAR</li>
+
+<!-- <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=">Bug </a> - </li> -->
+
+</ul>
+
+
+<h2>Changes</h2>
+<p>The full set of changes can be viewed by using the following GIT command:</p>
+
+<pre>
+  git log mesa-8.0.1..mesa-8.0.2
+</pre>
+
+<p>Brian Paul (7):
+<ul>
+      <li>svga: add null vs pointer check in update_need_pipeline()</li>
+      <li>util: add mutex lock in u_debug_memory.c code</li>
+      <li>mesa: add _mesa_rebase_rgba_float/uint() functions</li>
+      <li>mesa: use _mesa_rebase_rgba_float/uint() in glReadPixels code</li>
+      <li>mesa: use _mesa_rebase_rgba_float/uint() in glGetTexImage code</li>
+      <li>mesa: fix GL_LUMINANCE handling in glGetTexImage</li>
+      <li>docs: add links to 8.0.1 release notes</li>
+</ul></p>
+
+<p>Daniel Vetter (1):
+<ul>
+      <li>i965: fixup W-tile offset computation to take swizzling into account</li>
+<ul></p>
+
+<p>Dylan Noblesmith (1):
+<ul>
+      <li>mesa: add back glGetnUniform*v() overflow error reporting</li>
+</ul></p>
+
+<p>Ian Romanick (1):
+<ul>
+      <li>docs: Add 8.0.1 release md5sums</li>
+</ul></p>
+
+<p>Jakob Bornecrantz (3):
+<ul>
+      <li>mesa: Include mesa ES mapi generated files</li>
+      <li>mesa: Bump version number to 8.0.2</li>
+      <li>docs: Add 8.0.2 release notes</li>
+</ul></p>
+
+<p>Jeremy Huddleston (3):
+<ul>
+      <li>darwin: config file cleanups</li>
+      <li>darwin: Build create_context.c</li>
+      <li>darwin: Link against libxcb</li>
+</ul></p>
+
+<p>José Fonseca (1):
+<ul>
+      <li>svga: Clamp advertised PIPE_SHADER_CAP_MAX_TEMPS to SVGA3D_TEMPREG_MAX.</li>
+</ul></p>
+
+<p>Kenneth Graunke (2):
+<ul>
+      <li>i965: Only set Last Render Target Select on the last FB write.</li>
+      <li>i965: Fix Gen6+ dynamic state upper bound on older kernels.</li>
+</ul></p>
+
+<p>Marek Olšák (1):
+<ul>
+      <li>gallium/rtasm: properly detect SSE and SSE2</li>
+</ul></p>
+
+<p>Neil Roberts (1):
+<ul>
+      <li>mesa: Don't disable fast path for normalized types</li>
+</ul></p>
+
+<p>Tom Stellard (1):
+<ul>
+      <li>r300/compiler: Fix bug when lowering KILP on r300 cards</li>
+</ul></p>
+
+<p>Yuanhan Liu (6):
+<ul>
+      <li>mesa: let GL3 buf obj queries not depend on opengl major version</li>
+      <li>tnl: let _TNL_ATTRIB_POINTSIZE do not depend on ctx-&gt;VertexProgram._Enabled</li>
+      <li>i915: fix wrong rendering of gl_PointSize on Pineview</li>
+      <li>i915: move the FALLBACK_DRAW_OFFSET check outside the drawing rect check</li>
+      <li>i965: handle gl_PointCoord for Gen4 and Gen5 platforms</li>
+      <li>i915: fallback for NPOT cubemap texture</li>
+</ul></p>
+
+<p>Zack Rusin (3):
+<ul>
+      <li>svga: fix a crash happening before setting fragment shaders.</li>
+      <li>svga: Fix stencil op mapping</li>
+      <li>svga: fix the rasterizer state resets</li>
+</ul></p>
+
+
+</body>
+</html>
--- a/docs/relnotes-8.0.html
+++ b/docs/relnotes-8.0.html
@@ -30,7 +30,9 @@ for DRI hardware acceleration.

 <h2>MD5 checksums</h2>
 <pre>
-tbd
+3516fea6c28ce4a0fa9759e4894729a1  MesaLib-8.0.tar.gz
+1a5668fe72651a670611164cefc703b2  MesaLib-8.0.tar.bz2
+66f5a01a85530a91472a3acceb556db8  MesaLib-8.0.zip
 </pre>


--- a/docs/relnotes.html
+++ b/docs/relnotes.html
@@ -13,6 +13,8 @@ The release notes summarize what's new or changed in each Mesa release.
 </p>

 <UL>
+<LI><A HREF="relnotes-8.0.2.html">8.0.2 release notes</A>
+<LI><A HREF="relnotes-8.0.1.html">8.0.1 release notes</A>
 <LI><A HREF="relnotes-8.0.html">8.0 release notes</A>
 <LI><A HREF="relnotes-7.11.html">7.11 release notes</A>
 <LI><A HREF="relnotes-7.10.3.html">7.10.3 release notes</A>
--- a/docs/systems.html
+++ b/docs/systems.html
@@ -9,34 +9,78 @@
 <H1>Supported Systems and Drivers</H1>

 <p>
-Mesa was originally designed for Unix/X11 systems and is still best
-supported on those systems. All you need is an ANSI C compiler and the
-X development environment to use Mesa.
+Mesa is primarily developed and used on Linux systems.
+But there's also support for Windows, other flavors of Unix and other
+systems such as Haiku.
+We're actively developing and maintaining several hardware and
+software drivers.
 </p>

 <p>
-The DRI hardware drivers for the X.org server and XFree86 provide
-hardware accelerated rendering for chips from ATI, Intel, and NVIDIA
-on Linux and FreeBSD.
+The primary API is OpenGL but there's also support for OpenGL ES 1
+and ES 2, OpenVG and the EGL interface.
 </p>

 <p>
-Drivers for other assorted platforms include:
-the Apple Macintosh and Windows.
+Hardware drivers include:
 </p>
+<ul>
+<li>Intel i965, i945, i915.
+    See <a href="http://intellinuxgraphics.org/index.html" target="_parent">
+    Intel's website</a>
+<li>AMD Radeon series
+<li>Some NVIDIA GPus.
+<li>VMware virtual GPU
+</ul>

 <p>
-Details about particular drivers follows:
+Software drivers include:
+</p>
+<ul>
+<li><a href="llvmpipe.html">llvmpipe</a> - uses LLVM for x86 JIT code
+    generation and is multi-threaded
+<li>softpipe - a reference Gallium driver
+<li>swrast - the legacy/original Mesa software rasterizer
+</ul>
+
+<p>
+Additional driver information:
 </p>

 <UL>
 <li><a href="http://dri.freedesktop.org/" target="_parent"> DRI hardware
 drivers</a> for the X Window System
-<LI><a href="xlibdriver.html">Xlib software driver</a> for the X Window System
+<li><a href="xlibdriver.html">Xlib / swrast driver</a> for the X Window System
 and Unix-like operating systems
-<LI>Microsoft Windows <A HREF="README.WIN32">(README.WIN32)</A>
-<LI>DEC VMS <A HREF="README.VMS">(README.VMS)</A>
+<li><a href="README.WIN32">Microsoft Windows</a>
+<li><a href="vmware-guest.html">VMware</a> guest OS driver
 </UL>

+
+<h1>
+Deprecated Systems and Drivers
+</h1>
+
+<p>
+In the past there were other drivers for older GPUs and operating
+systems.
+These have been removed from the Mesa source tree and distribution.
+If anyone's interested though, the code can be found in the git repo.
+The list includes:
+</p>
+
+<ul>
+<li>3dfx/glide
+<li>Matrox
+<li>ATI R128
+<li>Savage
+<li>VIA Unichrome
+<li>SIS
+<li>3Dlabs gamma
+<li>DOS
+<li>fbdev
+<li>DEC/VMS
+<ul>
+
 </body>
 </html>
--- a/docs/vmware-guest.html
+++ b/docs/vmware-guest.html
@@ -10,7 +10,8 @@
 <h1>VMware guest GL driver</h1>

 <p>
-This page describes how to build, install and use the VMware guest GL driver
+This page describes how to build, install and use the
+<a href="http://www.vmware.com/" target="_parent">VMware</a> guest GL driver
 (aka the SVGA or SVGA3D driver) for Linux using the latest source code.
 This driver gives a Linux virtual machine access to the host's GPU for
 hardware-accelerated 3D.
--- a/src/gallium/auxiliary/draw/draw_context.h
+++ b/src/gallium/auxiliary/draw/draw_context.h
@@ -80,6 +80,21 @@ void draw_set_viewport_state( struct draw_context *draw,
 void draw_set_clip_state( struct draw_context *pipe,
                          const struct pipe_clip_state *clip );

+/**
+ * Sets the rasterization state used by the draw module.
+ * The rast_handle is used to pass the driver specific representation
+ * of the rasterization state. It's going to be used when the
+ * draw module sets the state back on the driver itself using the
+ * pipe::bind_rasterizer_state method.
+ *
+ * NOTE: if you're calling this function from within the pipe's
+ * bind_rasterizer_state you should always call it before binding
+ * the actual state - that's because the draw module can try to
+ * bind its own rasterizer state which would reset your newly
+ * set state. i.e. always do
+ * draw_set_rasterizer_state(driver->draw, state->pipe_state, state);
+ * driver->state.raster = state;
+ */
 void draw_set_rasterizer_state( struct draw_context *draw,
                                const struct pipe_rasterizer_state *raster,
                                void *rast_handle );
--- a/src/gallium/auxiliary/rtasm/rtasm_cpu.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_cpu.c
@@ -25,43 +25,43 @@
 *
 **************************************************************************/

-
-#include "util/u_debug.h"
+#include "pipe/p_config.h"
 #include "rtasm_cpu.h"

-
 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
-static boolean rtasm_sse_enabled(void)
+
+#include "util/u_debug.h"
+#include "util/u_cpu_detect.h"
+
+DEBUG_GET_ONCE_BOOL_OPTION(nosse, "GALLIUM_NOSSE", FALSE);
+
+static struct util_cpu_caps *get_cpu_caps(void)
 {
-   static boolean firsttime = 1;
-   static boolean enabled;
-   
-   /* This gets called quite often at the moment:
-    */
-   if (firsttime) {
-      enabled =  !debug_get_bool_option("GALLIUM_NOSSE", FALSE);
-      firsttime = FALSE;
-   }
-   return enabled;
+   util_cpu_detect();
+   return &util_cpu_caps;
 }
-#endif

 int rtasm_cpu_has_sse(void)
 {
-   /* FIXME: actually detect this at run-time */
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
-   return rtasm_sse_enabled();
-#else
-   return 0;
-#endif
+   return !debug_get_option_nosse() && get_cpu_caps()->has_sse;
 }

 int rtasm_cpu_has_sse2(void) 
 {
-   /* FIXME: actually detect this at run-time */
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
-   return rtasm_sse_enabled();
-#else
-   return 0;
-#endif
+   return !debug_get_option_nosse() && get_cpu_caps()->has_sse2;
 }
+
+
+#else
+
+int rtasm_cpu_has_sse(void)
+{
+   return 0;
+}
+
+int rtasm_cpu_has_sse2(void)
+{
+   return 0;
+}
+
+#endif
--- a/src/gallium/auxiliary/util/u_debug_memory.c
+++ b/src/gallium/auxiliary/util/u_debug_memory.c
@@ -38,6 +38,7 @@

 #include "os/os_memory.h"
 #include "os/os_memory_debug.h"
+#include "os/os_thread.h"

 #include "util/u_debug.h" 
 #include "util/u_debug_stack.h" 
@@ -72,6 +73,8 @@ struct debug_memory_footer

 static struct list_head list = { &list, &list };

+pipe_static_mutex(list_mutex);
+
 static unsigned long last_no = 0;


@@ -132,7 +135,9 @@ debug_malloc(const char *file, unsigned line, const char *function,
   ftr = footer_from_header(hdr);
   ftr->magic = DEBUG_MEMORY_MAGIC;
   
+   pipe_mutex_lock(list_mutex);
   LIST_ADDTAIL(&hdr->head, &list);
+   pipe_mutex_unlock(list_mutex);
   
   return data_from_header(hdr);
 }
@@ -164,7 +169,9 @@ debug_free(const char *file, unsigned line, const char *function,
      debug_assert(0);
   }

+   pipe_mutex_lock(list_mutex);
   LIST_DEL(&hdr->head);
+   pipe_mutex_unlock(list_mutex);
   hdr->magic = 0;
   ftr->magic = 0;
   
@@ -232,7 +239,9 @@ debug_realloc(const char *file, unsigned line, const char *function,
   new_ftr = footer_from_header(new_hdr);
   new_ftr->magic = DEBUG_MEMORY_MAGIC;
   
+   pipe_mutex_lock(list_mutex);
   LIST_REPLACE(&old_hdr->head, &new_hdr->head);
+   pipe_mutex_unlock(list_mutex);

   /* copy data */
   new_ptr = data_from_header(new_hdr);
--- a/src/gallium/drivers/r300/compiler/radeon_program_alu.c
+++ b/src/gallium/drivers/r300/compiler/radeon_program_alu.c
@@ -1165,35 +1165,79 @@ int radeonTransformDeriv(struct radeon_compiler* c,
 }

 /**
+ * IF Temp[0].x -> IF Temp[0].x
+ * ...          -> ...
+ * KILP         -> KIL -abs(Temp[0].x)
+ * ...          -> ...
+ * ENDIF        -> ENDIF
+ *
+ * === OR ===
+ *
 * IF Temp[0].x -\
 * KILP         - > KIL -abs(Temp[0].x)
 * ENDIF        -/
 *
- * This needs to be done in its own pass, because it modifies the instructions
- * before and after KILP.
+ * === OR ===
+ *
+ * IF Temp[0].x -> IF Temp[0].x
+ * ...          -> ...
+ * ELSE         -> ELSE
+ * ...	        -> ...
+ * KILP	        -> KIL -abs(Temp[0].x)
+ * ...          -> ...
+ * ENDIF        -> ENDIF
+ *
+ * === OR ===
+ *
+ * KILP         -> KIL -none.1111
+ *
+ * This needs to be done in its own pass, because it might modify the
+ * instructions before and after KILP.
 */
 void rc_transform_KILP(struct radeon_compiler * c, void *user)
 {
 	struct rc_instruction * inst;
 	for (inst = c->Program.Instructions.Next;
 			inst != &c->Program.Instructions; inst = inst->Next) {
+		struct rc_instruction * if_inst;
+		unsigned in_if = 0;

 		if (inst->U.I.Opcode != RC_OPCODE_KILP)
 			continue;

+		for (if_inst = inst->Prev; if_inst != &c->Program.Instructions;
+						if_inst = if_inst->Prev) {
+
+			if (if_inst->U.I.Opcode == RC_OPCODE_IF) {
+				in_if = 1;
+				break;
+			}
+		}
+
 		inst->U.I.Opcode = RC_OPCODE_KIL;

-		if (inst->Prev->U.I.Opcode != RC_OPCODE_IF
-				|| inst->Next->U.I.Opcode != RC_OPCODE_ENDIF) {
+		if (!in_if) {
 			inst->U.I.SrcReg[0] = negate(builtin_one);
 		} else {
-
+			/* This should work even if the KILP is inside the ELSE
+			 * block, because -0.0 is considered negative. */
 			inst->U.I.SrcReg[0] =
-				negate(absolute(inst->Prev->U.I.SrcReg[0]));
-			/* Remove IF */
-			rc_remove_instruction(inst->Prev);
-			/* Remove ENDIF */
-			rc_remove_instruction(inst->Next);
+				negate(absolute(if_inst->U.I.SrcReg[0]));
+
+			if (inst->Prev->U.I.Opcode != RC_OPCODE_IF
+				&& inst->Next->U.I.Opcode != RC_OPCODE_ENDIF) {
+
+				/* Optimize the special case:
+				 * IF Temp[0].x
+				 * KILP
+				 * ENDIF
+				 */
+
+				/* Remove IF */
+				rc_remove_instruction(inst->Prev);
+				/* Remove ENDIF */
+				rc_remove_instruction(inst->Next);
+			}
 		}
 	}
 }
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -1046,6 +1046,7 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte
 							struct pipe_resource *texture,
 							const struct pipe_sampler_view *state)
 {
+	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
 	struct r600_pipe_sampler_view *view = CALLOC_STRUCT(r600_pipe_sampler_view);
 	struct r600_pipe_resource_state *rstate;
 	struct r600_resource_texture *tmp = (struct r600_resource_texture*)texture;
@@ -1092,6 +1093,11 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte
 		      util_format_get_blockwidth(state->format), 8);
 	array_mode = tmp->array_mode[0];
 	tile_type = tmp->tile_type;
+	/* 128 bit formats require tile type = 1 */
+	if (rctx->chip_class == CAYMAN) {
+		if (util_format_get_blocksize(state->format) >= 16)
+			tile_type = 1;
+	}

 	if (texture->target == PIPE_TEXTURE_1D_ARRAY) {
 	        height = 1;
@@ -1107,8 +1113,11 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte

 	rstate->val[0] = (S_030000_DIM(r600_tex_dim(texture->target)) |
 			  S_030000_PITCH((pitch / 8) - 1) |
-			  S_030000_NON_DISP_TILING_ORDER(tile_type) |
 			  S_030000_TEX_WIDTH(texture->width0 - 1));
+	if (rctx->chip_class == CAYMAN)
+		rstate->val[0] |= CM_S_030000_NON_DISP_TILING_ORDER(tile_type);
+	else
+		rstate->val[0] |= S_030000_NON_DISP_TILING_ORDER(tile_type);
 	rstate->val[1] = (S_030004_TEX_HEIGHT(height - 1) |
 			  S_030004_TEX_DEPTH(depth - 1) |
 			  S_030004_ARRAY_MODE(array_mode));
@@ -1457,6 +1466,11 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state
 		tile_type = rtex->tile_type;
 	} else /* workaround for linear buffers */
 		tile_type = 1;
+	/* 128 bit formats require tile type = 1 */
+	if (rctx->chip_class == CAYMAN) {
+		if (util_format_get_blocksize(surf->base.format) >= 16)
+			tile_type = 1;
+	}

 	/* FIXME handle enabling of CB beyond BASE8 which has different offset */
 	r600_pipe_state_add_reg(rstate,
--- a/src/gallium/drivers/r600/evergreend.h
+++ b/src/gallium/drivers/r600/evergreend.h
@@ -970,6 +970,9 @@
 #define   S_030000_NON_DISP_TILING_ORDER(x)            (((x) & 0x1) << 5)
 #define   G_030000_NON_DISP_TILING_ORDER(x)            (((x) >> 5) & 0x1)
 #define   C_030000_NON_DISP_TILING_ORDER               0xFFFFFFDF
+#define   CM_S_030000_NON_DISP_TILING_ORDER(x)         (((x) & 0x3) << 4)
+#define   CM_G_030000_NON_DISP_TILING_ORDER(x)         (((x) >> 4) & 0x3)
+#define   CM_C_030000_NON_DISP_TILING_ORDER            0xFFFFFFCF
 #define   S_030000_PITCH(x)                            (((x) & 0xFFF) << 6)
 #define   G_030000_PITCH(x)                            (((x) >> 6) & 0xFFF)
 #define   C_030000_PITCH                               0xFFFC003F
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -47,6 +47,7 @@
 #include "r600_resource.h"
 #include "r600_shader.h"
 #include "r600_pipe.h"
+#include "r600_hw_context_priv.h"

 /*
 * pipe_context
@@ -116,6 +117,14 @@ static struct r600_fence *r600_create_fence(struct r600_pipe_context *ctx)

 	rscreen->fences.data[fence->index] = 0;
 	r600_context_emit_fence(&ctx->ctx, rscreen->fences.bo, fence->index, 1);
+
+	/* Create a dummy BO so that fence_finish without a timeout can sleep waiting for completion */
+	fence->sleep_bo = (struct r600_resource*)
+			pipe_buffer_create(&ctx->ctx.screen->screen, PIPE_BIND_CUSTOM,
+					   PIPE_USAGE_STAGING, 1);
+	/* Add the fence as a dummy relocation. */
+	r600_context_bo_reloc(&ctx->ctx, fence->sleep_bo, RADEON_USAGE_READWRITE);
+
 out:
 	pipe_mutex_unlock(rscreen->fences.mutex);
 	return fence;
@@ -568,6 +577,7 @@ static void r600_fence_reference(struct pipe_screen *pscreen,
 	if (pipe_reference(&(*oldf)->reference, &newf->reference)) {
 		struct r600_screen *rscreen = (struct r600_screen *)pscreen;
 		pipe_mutex_lock(rscreen->fences.mutex);
+		pipe_resource_reference((struct pipe_resource**)&(*oldf)->sleep_bo, NULL);
 		LIST_ADDTAIL(&(*oldf)->head, &rscreen->fences.pool);
 		pipe_mutex_unlock(rscreen->fences.mutex);
 	}
@@ -601,6 +611,17 @@ static boolean r600_fence_finish(struct pipe_screen *pscreen,
 	}

 	while (rscreen->fences.data[rfence->index] == 0) {
+		/* Special-case infinite timeout - wait for the dummy BO to become idle */
+		if (timeout == PIPE_TIMEOUT_INFINITE) {
+			rscreen->ws->buffer_wait(rfence->sleep_bo->buf, RADEON_USAGE_READWRITE);
+			break;
+		}
+
+		/* The dummy BO will be busy until the CS including the fence has completed, or
+		 * the GPU is reset. Don't bother continuing to spin when the BO is idle. */
+		if (!rscreen->ws->buffer_is_busy(rfence->sleep_bo->buf, RADEON_USAGE_READWRITE))
+			break;
+
 		if (++spins % 256)
 			continue;
 #ifdef PIPE_OS_UNIX
@@ -610,11 +631,11 @@ static boolean r600_fence_finish(struct pipe_screen *pscreen,
 #endif
 		if (timeout != PIPE_TIMEOUT_INFINITE &&
 		    os_time_get() - start_time >= timeout) {
-			return FALSE;
+			break;
 		}
 	}

-	return TRUE;
+	return rscreen->fences.data[rfence->index] != 0;
 }

 static int r600_interpret_tiling(struct r600_screen *rscreen, uint32_t tiling_config)
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -172,6 +172,7 @@ struct r600_textures_info {
 struct r600_fence {
 	struct pipe_reference		reference;
 	unsigned			index; /* in the shared bo */
+	struct r600_resource            *sleep_bo;
 	struct list_head		head;
 };

--- a/src/gallium/drivers/svga/svga_pipe_depthstencil.c
+++ b/src/gallium/drivers/svga/svga_pipe_depthstencil.c
@@ -57,10 +57,10 @@ svga_translate_stencil_op(unsigned op)
   case PIPE_STENCIL_OP_KEEP:      return SVGA3D_STENCILOP_KEEP;
   case PIPE_STENCIL_OP_ZERO:      return SVGA3D_STENCILOP_ZERO;
   case PIPE_STENCIL_OP_REPLACE:   return SVGA3D_STENCILOP_REPLACE;
-   case PIPE_STENCIL_OP_INCR:      return SVGA3D_STENCILOP_INCR;
-   case PIPE_STENCIL_OP_DECR:      return SVGA3D_STENCILOP_DECR;
-   case PIPE_STENCIL_OP_INCR_WRAP: return SVGA3D_STENCILOP_INCRSAT; /* incorrect? */
-   case PIPE_STENCIL_OP_DECR_WRAP: return SVGA3D_STENCILOP_DECRSAT; /* incorrect? */
+   case PIPE_STENCIL_OP_INCR:      return SVGA3D_STENCILOP_INCRSAT;
+   case PIPE_STENCIL_OP_DECR:      return SVGA3D_STENCILOP_DECRSAT;
+   case PIPE_STENCIL_OP_INCR_WRAP: return SVGA3D_STENCILOP_INCR;
+   case PIPE_STENCIL_OP_DECR_WRAP: return SVGA3D_STENCILOP_DECR;
   case PIPE_STENCIL_OP_INVERT:    return SVGA3D_STENCILOP_INVERT;
   default:
      assert(0);
--- a/src/gallium/drivers/svga/svga_pipe_rasterizer.c
+++ b/src/gallium/drivers/svga/svga_pipe_rasterizer.c
@@ -237,11 +237,11 @@ static void svga_bind_rasterizer_state( struct pipe_context *pipe,
   struct svga_context *svga = svga_context(pipe);
   struct svga_rasterizer_state *raster = (struct svga_rasterizer_state *)state;

-   svga->curr.rast = raster;

   draw_set_rasterizer_state(svga->swtnl.draw, raster ? &raster->templ : NULL,
                             state);
-   
+   svga->curr.rast = raster;
+
   svga->dirty |= SVGA_NEW_RAST;
 }

--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -235,7 +235,7 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
      case PIPE_SHADER_CAP_MAX_TEMPS:
         if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_FRAGMENT_SHADER_TEMPS, &result))
            return 32;
-         return result.u;
+         return MIN2(result.u, SVGA3D_TEMPREG_MAX);
      case PIPE_SHADER_CAP_MAX_ADDRS:
      case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
 	 /* 
@@ -286,7 +286,7 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
      case PIPE_SHADER_CAP_MAX_TEMPS:
         if (!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_VERTEX_SHADER_TEMPS, &result))
            return 32;
-         return result.u;
+         return MIN2(result.u, SVGA3D_TEMPREG_MAX);
      case PIPE_SHADER_CAP_MAX_ADDRS:
         return 1;
      case PIPE_SHADER_CAP_MAX_PREDS:
--- a/src/gallium/drivers/svga/svga_state_need_swtnl.c
+++ b/src/gallium/drivers/svga/svga_state_need_swtnl.c
@@ -136,7 +136,7 @@ update_need_pipeline( struct svga_context *svga,

   /* EDGEFLAGS
    */
-    if (vs->base.info.writes_edgeflag) {
+    if (vs && vs->base.info.writes_edgeflag) {
      SVGA_DBG(DEBUG_SWTNL, "%s: edgeflags\n", __FUNCTION__);
      need_pipeline = TRUE;
   }
@@ -145,7 +145,8 @@ update_need_pipeline( struct svga_context *svga,
    */
   if (svga->curr.reduced_prim == PIPE_PRIM_POINTS) {
      unsigned sprite_coord_gen = svga->curr.rast->templ.sprite_coord_enable;
-      unsigned generic_inputs = svga->curr.fs->generic_inputs;
+      unsigned generic_inputs =
+         svga->curr.fs ? svga->curr.fs->generic_inputs : 0;

      if (sprite_coord_gen &&
          (generic_inputs & ~sprite_coord_gen)) {
--- a/src/glx/apple/Makefile
+++ b/src/glx/apple/Makefile
@@ -35,6 +35,7 @@ SOURCES = \
 	apple_xgl_api_stereo.c \
 	apple_xgl_api_viewport.c \
 	appledri.c \
+	../create_context.c \
 	../clientattrib.c \
 	../compsize.c \
 	../glxconfig.c \
--- a/src/glx/create_context.c
+++ b/src/glx/create_context.c
@@ -80,8 +80,13 @@ glXCreateContextAttribsARB(Display *dpy, GLXFBConfig config,
 					       &dummy_err);
   }

-   if (gc == NULL)
+   if (gc == NULL) {
+#ifdef GLX_USE_APPLEGL
+      gc = applegl_create_context(psc, cfg, share, 0);
+#else
      gc = indirect_create_context(psc, cfg, share, 0);
+#endif
+   }

   gc->xid = xcb_generate_id(c);
   gc->share_xid = (share != NULL) ? share->xid : 0;
--- a/src/mapi/glapi/glapi_priv.h
+++ b/src/mapi/glapi/glapi_priv.h
@@ -38,6 +38,16 @@
 #include "GL/gl.h"
 #include "GL/glext.h"

+/* The define of GL_COVERAGE_SAMPLES_NV in gl2ext.h is guarded by a different
+ * extension (GL_NV_coverage_sample) than in glext.h
+ * (GL_NV_multisample_coverage).  Just undefine it to avoid spurious compiler
+ * warnings.
+ */
+#undef GL_COVERAGE_SAMPLES_NV
+
+#include "GLES2/gl2platform.h"
+#include "GLES2/gl2ext.h"
+
 #ifndef GL_OES_fixed_point
 typedef int GLfixed;
 typedef int GLclampx;
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -3243,7 +3243,7 @@ decompress_texture_image(struct gl_context *ctx,
                         struct gl_texture_image *texImage,
                         GLuint slice,
                         GLenum destFormat, GLenum destType,
-                         GLvoid *dest, GLint destRowLength)
+                         GLvoid *dest)
 {
   struct decompress_state *decompress = &ctx->Meta->Decompress;
   struct gl_texture_object *texObj = texImage->TexObject;
@@ -3273,7 +3273,7 @@ decompress_texture_image(struct gl_context *ctx,
   fboDrawSave = ctx->DrawBuffer->Name;
   fboReadSave = ctx->ReadBuffer->Name;

-   _mesa_meta_begin(ctx, MESA_META_ALL);
+   _mesa_meta_begin(ctx, MESA_META_ALL & ~MESA_META_PIXEL_STORE);

   /* Create/bind FBO/renderbuffer */
   if (decompress->FBO == 0) {
@@ -3291,7 +3291,7 @@ decompress_texture_image(struct gl_context *ctx,
   }

   /* alloc dest surface */
-   if (width != decompress->Width || height != decompress->Height) {
+   if (width > decompress->Width || height > decompress->Height) {
      _mesa_RenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_RGBA,
                                   width, height);
      decompress->Width = width;
@@ -3408,7 +3408,6 @@ decompress_texture_image(struct gl_context *ctx,
         _mesa_PixelTransferf(GL_BLUE_SCALE, 0.0f);
      }

-      ctx->Pack.RowLength = destRowLength;
      _mesa_ReadPixels(0, 0, width, height, destFormat, destType, dest);
   }

@@ -3449,8 +3448,7 @@ _mesa_meta_GetTexImage(struct gl_context *ctx,
      const GLuint slice = 0; /* only 2D compressed textures for now */
      /* Need to unlock the texture here to prevent deadlock... */
      _mesa_unlock_texture(ctx, texObj);
-      decompress_texture_image(ctx, texImage, slice, format, type, pixels,
-                               ctx->Pack.RowLength);
+      decompress_texture_image(ctx, texImage, slice, format, type, pixels);
      /* ... and relock it */
      _mesa_lock_texture(ctx, texObj);
   }
--- a/src/mesa/drivers/dri/i915/i915_fragprog.c
+++ b/src/mesa/drivers/dri/i915/i915_fragprog.c
@@ -1361,6 +1361,10 @@ i915ValidateFragmentProgram(struct i915_context *i915)
      EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, S4_VFMT_XYZ, 12);
   }

+   /* Handle gl_PointSize builtin var here */
+   if (ctx->Point._Attenuated || ctx->VertexProgram.PointSizeEnabled)
+      EMIT_ATTR(_TNL_ATTRIB_POINTSIZE, EMIT_1F, S4_VFMT_POINT_WIDTH, 4);
+
   if (inputsRead & FRAG_BIT_COL0) {
      intel->coloroffset = offset / 4;
      EMIT_ATTR(_TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, S4_VFMT_COLOR, 4);
--- a/src/mesa/drivers/dri/i915/i915_texstate.c
+++ b/src/mesa/drivers/dri/i915/i915_texstate.c
@@ -319,6 +319,28 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
           ((wt != GL_CLAMP) && (wt != GL_CLAMP_TO_EDGE))))
          return false;

+      /*
+       * According to 3DSTATE_MAP_STATE at page of 104 in Bspec
+       * Vol3d 3D Instructions:
+       *   [DevGDG and DevAlv]: Must be a power of 2 for cube maps.
+       *   [DevLPT, DevCST and DevBLB]: If not a power of 2, cube maps
+       *      must have all faces enabled.
+       *
+       * But, as I tested on pineview(DevBLB derived), the rendering is
+       * bad(you will find the color isn't samplered right in some
+       * fragments). After checking, it seems that the texture layout is
+       * wrong: making the width and height align of 4(although this
+       * doesn't make much sense) will fix this issue and also broke some
+       * others. Well, Bspec mentioned nothing about the layout alignment
+       * and layout for NPOT cube map.  I guess the Bspec just assume it's
+       * a POT cube map.
+       *
+       * Thus, I guess we need do this for other platforms as well.
+       */
+      if (tObj->Target == GL_TEXTURE_CUBE_MAP_ARB &&
+          !is_power_of_two(firstImage->Height))
+         return false;
+
      state[I915_TEXREG_SS3] = ss3;     /* SS3_NORMALIZED_COORDS */

      state[I915_TEXREG_SS3] |=
--- a/src/mesa/drivers/dri/i915/i915_vtbl.c
+++ b/src/mesa/drivers/dri/i915/i915_vtbl.c
@@ -665,12 +665,11 @@ i915_set_draw_region(struct intel_context *intel,

   draw_offset = (draw_y << 16) | draw_x;

+   FALLBACK(intel, I915_FALLBACK_DRAW_OFFSET,
+            (ctx->DrawBuffer->Width + draw_x > 2048) ||
+            (ctx->DrawBuffer->Height + draw_y > 2048));
   /* When changing drawing rectangle offset, an MI_FLUSH is first required. */
   if (draw_offset != i915->last_draw_offset) {
-      FALLBACK(intel, I915_FALLBACK_DRAW_OFFSET,
-               (ctx->DrawBuffer->Width + draw_x > 2048) ||
-               (ctx->DrawBuffer->Height + draw_y > 2048));
-
      state->Buffer[I915_DESTREG_DRAWRECT0] = MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE;
      i915->last_draw_offset = draw_offset;
   } else
--- a/src/mesa/drivers/dri/i915/intel_tris.c
+++ b/src/mesa/drivers/dri/i915/intel_tris.c
@@ -663,7 +663,7 @@ do {							\
   struct intel_context *intel = intel_context(ctx);			\
   GLuint color[n] = { 0, }, spec[n] = { 0, };				\
   GLuint coloroffset = intel->coloroffset;				\
-   bool specoffset = intel->specoffset;				\
+   GLuint specoffset = intel->specoffset;				\
   (void) color; (void) spec; (void) coloroffset; (void) specoffset;


--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -101,6 +101,7 @@ i965_C_SOURCES := \
 	gen7_cc_state.c \
 	gen7_clip_state.c \
 	gen7_disable.c \
+	gen7_hiz.c \
 	gen7_misc_state.c \
 	gen7_sampler_state.c \
 	gen7_sf_state.c \
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -41,8 +41,6 @@
 #include "brw_draw.h"
 #include "brw_state.h"

-#include "gen6_hiz.h"
-
 #include "intel_fbo.h"
 #include "intel_mipmap_tree.h"
 #include "intel_regions.h"
@@ -57,58 +55,6 @@
 * Mesa's Driver Functions
 ***************************************/

-/**
- * \brief Prepare for entry into glBegin/glEnd block.
- *
- * Resolve buffers before entering a glBegin/glEnd block. This is
- * necessary to prevent recursive calls to FLUSH_VERTICES.
- *
- * This resolves the depth buffer of each enabled depth texture and the HiZ
- * buffer of the attached depth renderbuffer.
- *
- * Details
- * -------
- * When vertices are queued during a glBegin/glEnd block, those vertices must
- * be drawn before any rendering state changes. To ensure this, Mesa calls
- * FLUSH_VERTICES as a prehook to such state changes. Therefore,
- * FLUSH_VERTICES itself cannot change rendering state without falling into a
- * recursive trap.
- *
- * This precludes meta-ops, namely buffer resolves, from occurring while any
- * vertices are queued. To prevent that situation, we resolve some buffers on
- * entering a glBegin/glEnd
- *
- * \see brwCleanupExecEnd()
- */
-static void brwPrepareExecBegin(struct gl_context *ctx)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct intel_context *intel = &brw->intel;
-   struct intel_renderbuffer *draw_irb;
-   struct intel_texture_object *tex_obj;
-
-   if (!intel->has_hiz) {
-      /* The context uses no feature that requires buffer resolves. */
-      return;
-   }
-
-   /* Resolve each enabled texture. */
-   for (int i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
-      if (!ctx->Texture.Unit[i]._ReallyEnabled)
-	 continue;
-      tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
-      if (!tex_obj || !tex_obj->mt)
-	 continue;
-      intel_miptree_all_slices_resolve_depth(intel, tex_obj->mt);
-   }
-
-   /* Resolve the attached depth buffer. */
-   draw_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
-   if (draw_irb) {
-      intel_renderbuffer_resolve_hiz(intel, draw_irb);
-   }
-}
-
 static void brwInitDriverFunctions(struct intel_screen *screen,
 				   struct dd_function_table *functions)
 {
@@ -117,7 +63,6 @@ static void brwInitDriverFunctions(struct intel_screen *screen,
   brwInitFragProgFuncs( functions );
   brw_init_queryobj_functions(functions);

-   functions->PrepareExecBegin = brwPrepareExecBegin;
   functions->BeginTransformFeedback = brw_begin_transform_feedback;

   if (screen->gen >= 7)
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -119,6 +119,10 @@
 #define BRW_MAX_CURBE                    (32*16)

 struct brw_context;
+struct brw_instruction;
+struct brw_vs_prog_key;
+struct brw_wm_prog_key;
+struct brw_wm_prog_data;

 enum brw_state_id {
   BRW_STATE_URB_FENCE,
@@ -144,7 +148,6 @@ enum brw_state_id {
   BRW_STATE_VS_CONSTBUF,
   BRW_STATE_PROGRAM_CACHE,
   BRW_STATE_STATE_BASE_ADDRESS,
-   BRW_STATE_HIZ,
   BRW_STATE_SOL_INDICES,
 };

@@ -174,7 +177,6 @@ enum brw_state_id {
 #define BRW_NEW_VS_CONSTBUF            (1 << BRW_STATE_VS_CONSTBUF)
 #define BRW_NEW_PROGRAM_CACHE		(1 << BRW_STATE_PROGRAM_CACHE)
 #define BRW_NEW_STATE_BASE_ADDRESS	(1 << BRW_STATE_STATE_BASE_ADDRESS)
-#define BRW_NEW_HIZ			(1 << BRW_STATE_HIZ)
 #define BRW_NEW_SOL_INDICES		(1 << BRW_STATE_SOL_INDICES)

 struct brw_state_flags {
@@ -288,6 +290,12 @@ typedef enum
   BRW_VERT_RESULT_NDC = VERT_RESULT_MAX,
   BRW_VERT_RESULT_HPOS_DUPLICATE,
   BRW_VERT_RESULT_PAD,
+   /*
+    * It's actually not a vert_result but just a _mark_ to let sf aware that
+    * he need do something special to handle gl_PointCoord builtin variable
+    * correctly. see compile_sf_prog() for more info.
+    */
+   BRW_VERT_RESULT_PNTC,
   BRW_VERT_RESULT_MAX
 } brw_vert_result;

@@ -950,38 +958,18 @@ struct brw_context
   int state_batch_count;

   /**
-    * \brief State needed to execute HiZ meta-ops
+    * \brief State needed to execute HiZ ops.
    *
-    * All fields except \c op are initialized by gen6_hiz_init().
+    * \see gen6_hiz_init()
+    * \see gen6_hiz_exec()
    */
   struct brw_hiz_state {
-      /**
-       * \brief Indicates which HiZ operation is in progress.
+      /** \brief VBO for rectangle primitive.
       *
-       * See the following sections of the Sandy Bridge PRM, Volume 1, Part2:
-       *   - 7.5.3.1 Depth Buffer Clear
-       *   - 7.5.3.2 Depth Buffer Resolve
-       *   - 7.5.3.3 Hierarchical Depth Buffer Resolve
+       * Rather than using glGenBuffers(), we allocate the VBO directly
+       * through drm.
       */
-      enum brw_hiz_op {
-	 BRW_HIZ_OP_NONE = 0,
-	 BRW_HIZ_OP_DEPTH_CLEAR,
-	 BRW_HIZ_OP_DEPTH_RESOLVE,
-	 BRW_HIZ_OP_HIZ_RESOLVE,
-      } op;
-
-      /** \brief Shader state */
-      struct {
-	 GLuint program;
-	 GLuint position_vbo;
-	 GLint position_location;
-      } shader;
-
-      /** \brief VAO for the rectangle primitive's vertices. */
-      GLuint vao;
-
-      GLuint fbo;
-      struct gl_renderbuffer *depth_rb;
+      drm_intel_bo *vertex_bo;
   } hiz;

   struct brw_sol_state {
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -126,12 +126,7 @@ static void gen6_set_prim(struct brw_context *brw,

   DBG("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim->mode));

-   if (brw->hiz.op) {
-      assert(prim->mode == GL_TRIANGLES);
-      hw_prim = _3DPRIM_RECTLIST;
-   } else {
-      hw_prim = prim_to_hw_prim[prim->mode];
-   }
+   hw_prim = prim_to_hw_prim[prim->mode];

   if (hw_prim != brw->primitive) {
      brw->primitive = hw_prim;
@@ -307,17 +302,11 @@ brw_predraw_resolve_buffers(struct brw_context *brw)
   struct intel_context *intel = &brw->intel;
   struct intel_renderbuffer *depth_irb;
   struct intel_texture_object *tex_obj;
-   bool did_resolve = false;
-
-   /* Avoid recursive HiZ op. */
-   if (brw->hiz.op) {
-      return;
-   }

   /* Resolve the depth buffer's HiZ buffer. */
   depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
   if (depth_irb && depth_irb->mt) {
-      did_resolve |= intel_renderbuffer_resolve_hiz(intel, depth_irb);
+      intel_renderbuffer_resolve_hiz(intel, depth_irb);
   }

   /* Resolve depth buffer of each enabled depth texture. */
@@ -327,33 +316,7 @@ brw_predraw_resolve_buffers(struct brw_context *brw)
      tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
      if (!tex_obj || !tex_obj->mt)
 	 continue;
-      did_resolve |= intel_miptree_all_slices_resolve_depth(intel, tex_obj->mt);
-   }
-
-   if (did_resolve) {
-      /* Call vbo_bind_array() to synchronize the vbo module's vertex
-       * attributes to the gl_context's.
-       *
-       * Details
-       * -------
-       * The vbo module tracks vertex attributes separately from the
-       * gl_context.  Specifically, the vbo module maintins vertex attributes
-       * in vbo_exec_context::array::inputs, which is synchronized with
-       * gl_context::Array::ArrayObj::VertexAttrib by vbo_bind_array().
-       * vbo_draw_arrays() calls vbo_bind_array() to perform the
-       * synchronization before calling the real draw call,
-       * vbo_context::draw_arrays.
-       *
-       * At this point (after performing a resolve meta-op but before calling
-       * vbo_bind_array), the gl_context's vertex attributes have been
-       * restored to their original state (that is, their state before the
-       * meta-op began), but the vbo module's vertex attribute are those used
-       * in the last meta-op. Therefore we must manually synchronize the two with
-       * vbo_bind_array() before continuing with the original draw command.
-       */
-      _mesa_update_state(ctx);
-      vbo_bind_arrays(ctx);
-      _mesa_update_state(ctx);
+      intel_miptree_all_slices_resolve_depth(intel, tex_obj->mt);
   }
 }

@@ -372,9 +335,7 @@ static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
   struct intel_renderbuffer *depth_irb =
 	 intel_get_renderbuffer(fb, BUFFER_DEPTH);

-   if (depth_irb &&
-       ctx->Depth.Mask &&
-       !brw->hiz.op) {
+   if (depth_irb && ctx->Depth.Mask) {
      intel_renderbuffer_set_needs_depth_resolve(depth_irb);
   }
 }
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -2188,7 +2188,7 @@ void brw_fb_WRITE(struct brw_compile *p,
 			    msg_type,
 			    msg_length,
 			    header_present,
-			    1, /* last render target write */
+			    eot, /* last render target write */
 			    response_length,
 			    eot,
 			    0 /* send_commit_msg */);
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -710,6 +710,15 @@ fs_visitor::calculate_urb_setup()
 	       urb_setup[fp_index] = urb_next++;
 	 }
      }
+
+      /*
+       * It's a FS only attribute, and we did interpolation for this attribute
+       * in SF thread. So, count it here, too.
+       *
+       * See compile_sf_prog() for more info.
+       */
+      if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(FRAG_ATTRIB_PNTC))
+         urb_setup[FRAG_ATTRIB_PNTC] = urb_next++;
   }

   /* Each attribute is 4 setup channels, each of which is half a reg. */
@@ -1844,6 +1853,9 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c,
      prog->LinkStatus = false;
      ralloc_strcat(&prog->InfoLog, v.fail_msg);

+      _mesa_problem(NULL, "Failed to compile fragment shader: %s\n",
+		    v.fail_msg);
+
      return false;
   }

--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -286,6 +286,18 @@ public:
 	      offset == inst->offset);
   }

+   int regs_written()
+   {
+      if (is_tex())
+	 return 4;
+
+      /* The SINCOS and INT_DIV_QUOTIENT_AND_REMAINDER math functions return 2,
+       * but we don't currently use them...nor do we have an opcode for them.
+       */
+
+      return 1;
+   }
+
   bool is_tex()
   {
      return (opcode == SHADER_OPCODE_TEX ||
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -236,8 +236,6 @@ fs_visitor::assign_regs()

      if (reg == -1) {
 	 fail("no register to spill\n");
-      } else if (intel->gen >= 7) {
-	 fail("no spilling support on gen7 yet\n");
      } else if (c->dispatch_width == 16) {
 	 fail("no spilling support on 16-wide yet\n");
      } else {
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -564,6 +564,12 @@ fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
       !src.equals(&last_rhs_inst->dst))
      return false;

+   /* If last_rhs_inst wrote a different number of components than our LHS,
+    * we can't safely rewrite it.
+    */
+   if (ir->lhs->type->vector_elements != last_rhs_inst->regs_written())
+      return false;
+
   /* Success!  Rewrite the instruction. */
   last_rhs_inst->dst = dst;

--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -774,8 +774,7 @@ static void upload_state_base_address( struct brw_context *brw )
 	* If this isn't programmed to a real bound, the sampler border color
 	* pointer is rejected, causing border color to mysteriously fail.
 	*/
-       OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
-		 intel->batch.bo->size | 1);
+       OUT_BATCH(0xfffff001);
       OUT_BATCH(1); /* Indirect object upper bound */
       OUT_BATCH(1); /* Instruction access upper bound */
       ADVANCE_BATCH();
@@ -788,7 +787,7 @@ static void upload_state_base_address( struct brw_context *brw )
       OUT_BATCH(1); /* Indirect object base address */
       OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
 		 1); /* Instruction base address */
-       OUT_BATCH(1); /* General state upper bound */
+       OUT_BATCH(0xfffff001); /* General state upper bound */
       OUT_BATCH(1); /* Indirect object upper bound */
       OUT_BATCH(1); /* Instruction access upper bound */
       ADVANCE_BATCH();
--- a/src/mesa/drivers/dri/i965/brw_sf.c
+++ b/src/mesa/drivers/dri/i965/brw_sf.c
@@ -64,6 +64,16 @@ static void compile_sf_prog( struct brw_context *brw,

   c.key = *key;
   brw_compute_vue_map(&c.vue_map, intel, c.key.userclip_active, c.key.attrs);
+   if (c.key.do_point_coord) {
+      /*
+       * gl_PointCoord is a FS instead of VS builtin variable, thus it's
+       * not included in c.vue_map generated in VS stage. Here we add
+       * it manually to let SF shader generate the needed interpolation
+       * coefficient for FS shader.
+       */
+      c.vue_map.vert_result_to_slot[BRW_VERT_RESULT_PNTC] = c.vue_map.num_slots;
+      c.vue_map.slot_to_vert_result[c.vue_map.num_slots++] = BRW_VERT_RESULT_PNTC;
+   }
   c.urb_entry_read_offset = brw_sf_compute_urb_entry_read_offset(intel);
   c.nr_attr_regs = (c.vue_map.num_slots + 1)/2 - c.urb_entry_read_offset;
   c.nr_setup_regs = c.nr_attr_regs;
@@ -125,6 +135,8 @@ brw_upload_sf_prog(struct brw_context *brw)
 {
   struct gl_context *ctx = &brw->intel.ctx;
   struct brw_sf_prog_key key;
+   /* _NEW_BUFFERS */
+   bool render_to_fbo = ctx->DrawBuffer->Name != 0;

   memset(&key, 0, sizeof(key));

@@ -167,7 +179,15 @@ brw_upload_sf_prog(struct brw_context *brw)
 	    key.point_sprite_coord_replace |= (1 << i);
      }
   }
-   key.sprite_origin_lower_left = (ctx->Point.SpriteOrigin == GL_LOWER_LEFT);
+   if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(FRAG_ATTRIB_PNTC))
+      key.do_point_coord = 1;
+   /*
+    * Window coordinates in a FBO are inverted, which means point
+    * sprite origin must be inverted, too.
+    */
+   if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) != render_to_fbo)
+      key.sprite_origin_lower_left = true;
+
   /* _NEW_LIGHT */
   key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT);
   key.do_twoside_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide);
@@ -176,10 +196,9 @@ brw_upload_sf_prog(struct brw_context *brw)
   if (key.do_twoside_color) {
      /* If we're rendering to a FBO, we have to invert the polygon
       * face orientation, just as we invert the viewport in
-       * sf_unit_create_from_key().  ctx->DrawBuffer->Name will be
-       * nonzero if we're rendering to such an FBO.
+       * sf_unit_create_from_key().
       */
-      key.frontface_ccw = (ctx->Polygon.FrontFace == GL_CCW) ^ (ctx->DrawBuffer->Name != 0);
+      key.frontface_ccw = (ctx->Polygon.FrontFace == GL_CCW) != render_to_fbo;
   }

   if (!brw_search_cache(&brw->cache, BRW_SF_PROG,
@@ -192,7 +211,8 @@ brw_upload_sf_prog(struct brw_context *brw)

 const struct brw_tracked_state brw_sf_prog = {
   .dirty = {
-      .mesa  = (_NEW_HINT | _NEW_LIGHT | _NEW_POLYGON | _NEW_POINT | _NEW_TRANSFORM),
+      .mesa  = (_NEW_HINT | _NEW_LIGHT | _NEW_POLYGON | _NEW_POINT |
+                _NEW_TRANSFORM | _NEW_BUFFERS),
      .brw   = (BRW_NEW_REDUCED_PRIMITIVE),
      .cache = CACHE_NEW_VS_PROG
   },
--- a/src/mesa/drivers/dri/i965/brw_sf.h
+++ b/src/mesa/drivers/dri/i965/brw_sf.h
@@ -52,6 +52,7 @@ struct brw_sf_prog_key {
   GLuint do_flat_shading:1;
   GLuint frontface_ccw:1;
   GLuint do_point_sprite:1;
+   GLuint do_point_coord:1;
   GLuint sprite_origin_lower_left:1;
   GLuint userclip_active:1;
 };
--- a/src/mesa/drivers/dri/i965/brw_sf_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c
@@ -386,6 +386,8 @@ calculate_point_sprite_mask(struct brw_sf_compile *c, GLuint reg)
      if (c->key.point_sprite_coord_replace & (1 << (vert_result1 - VERT_RESULT_TEX0)))
 	 pc |= 0x0f;
   }
+   if (vert_result1 == BRW_VERT_RESULT_PNTC)
+      pc |= 0x0f;

   vert_result2 = vert_reg_to_vert_result(c, reg, 1);
   if (vert_result2 >= VERT_RESULT_TEX0 && vert_result2 <= VERT_RESULT_TEX7) {
@@ -393,6 +395,8 @@ calculate_point_sprite_mask(struct brw_sf_compile *c, GLuint reg)
                                                     VERT_RESULT_TEX0)))
         pc |= 0xf0;
   }
+   if (vert_result2 == BRW_VERT_RESULT_PNTC)
+      pc |= 0xf0;

   return pc;
 }
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -372,7 +372,6 @@ static struct dirty_bit_map brw_bits[] = {
   DEFINE_BIT(BRW_NEW_GS_BINDING_TABLE),
   DEFINE_BIT(BRW_NEW_PS_BINDING_TABLE),
   DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
-   DEFINE_BIT(BRW_NEW_HIZ),
   {0, 0, 0}
 };

--- a/src/mesa/drivers/dri/i965/brw_vtbl.c
+++ b/src/mesa/drivers/dri/i965/brw_vtbl.c
@@ -50,6 +50,7 @@
 #include "brw_wm.h"

 #include "gen6_hiz.h"
+#include "gen7_hiz.h"

 #include "glsl/ralloc.h"

@@ -70,9 +71,11 @@ static void brw_destroy_context( struct intel_context *intel )

   brw_destroy_state(brw);
   brw_draw_destroy( brw );
+
   ralloc_free(brw->wm.compile_data);

   dri_bo_release(&brw->curbe.curbe_bo);
+   dri_bo_release(&brw->hiz.vertex_bo);
   dri_bo_release(&brw->vs.const_bo);
   dri_bo_release(&brw->wm.const_bo);

@@ -236,8 +239,15 @@ void brwInitVtbl( struct brw_context *brw )
   brw->intel.vtbl.is_hiz_depth_format = brw_is_hiz_depth_format;

   if (brw->intel.has_hiz) {
-      brw->intel.vtbl.resolve_depth_slice = gen6_resolve_depth_slice;
-      brw->intel.vtbl.resolve_hiz_slice = gen6_resolve_hiz_slice;
+      if (brw->intel.gen == 7) {
+         brw->intel.vtbl.resolve_depth_slice = gen7_resolve_depth_slice;
+         brw->intel.vtbl.resolve_hiz_slice = gen7_resolve_hiz_slice;
+      } else if (brw->intel.gen == 6) {
+         brw->intel.vtbl.resolve_depth_slice = gen6_resolve_depth_slice;
+         brw->intel.vtbl.resolve_hiz_slice = gen6_resolve_hiz_slice;
+      } else {
+         assert(0);
+      }
   }

   if (brw->intel.gen >= 7) {
--- a/src/mesa/drivers/dri/i965/gen6_clip_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c
@@ -67,23 +67,6 @@ upload_clip_state(struct brw_context *brw)
         GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE;
   }

-   if (brw->hiz.op) {
-      /* HiZ operations emit a rectangle primitive, which requires clipping to
-       * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1
-       * Section 1.3 3D Primitives Overview:
-       *    RECTLIST:
-       *    Either the CLIP unit should be DISABLED, or the CLIP unit's Clip
-       *    Mode should be set to a value other than CLIPMODE_NORMAL.
-       */
-      BEGIN_BATCH(4);
-      OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-      return;
-   }
-
   if (!ctx->Transform.DepthClamp)
      depth_clamp = GEN6_CLIP_Z_TEST;

@@ -124,8 +107,7 @@ const struct brw_tracked_state gen6_clip_state = {
   .dirty = {
      .mesa  = _NEW_TRANSFORM | _NEW_LIGHT,
      .brw   = (BRW_NEW_CONTEXT |
-                BRW_NEW_FRAGMENT_PROGRAM |
-                BRW_NEW_HIZ),
+                BRW_NEW_FRAGMENT_PROGRAM),
      .cache = 0
   },
   .emit = upload_clip_state,
--- a/src/mesa/drivers/dri/i965/gen6_depthstencil.c
+++ b/src/mesa/drivers/dri/i965/gen6_depthstencil.c
@@ -82,11 +82,7 @@ gen6_upload_depth_stencil_state(struct brw_context *brw)
   }

   /* _NEW_DEPTH */
-   if ((ctx->Depth.Test || brw->hiz.op) && depth_irb) {
-      assert(brw->hiz.op != BRW_HIZ_OP_DEPTH_RESOLVE || ctx->Depth.Test);
-      assert(brw->hiz.op != BRW_HIZ_OP_HIZ_RESOLVE   || !ctx->Depth.Test);
-      assert(brw->hiz.op != BRW_HIZ_OP_DEPTH_CLEAR   || !ctx->Depth.Test);
-
+   if (ctx->Depth.Test && depth_irb) {
      ds->ds2.depth_test_enable = ctx->Depth.Test;
      ds->ds2.depth_test_func = intel_translate_compare_func(ctx->Depth.Func);
      ds->ds2.depth_write_enable = ctx->Depth.Mask;
@@ -98,8 +94,7 @@ gen6_upload_depth_stencil_state(struct brw_context *brw)
 const struct brw_tracked_state gen6_depth_stencil_state = {
   .dirty = {
      .mesa = _NEW_DEPTH | _NEW_STENCIL | _NEW_BUFFERS,
-      .brw  = (BRW_NEW_BATCH |
-	       BRW_NEW_HIZ),
+      .brw  = BRW_NEW_BATCH,
      .cache = 0,
   },
   .emit = gen6_upload_depth_stencil_state,
--- a/src/mesa/drivers/dri/i965/gen6_hiz.c
+++ b/src/mesa/drivers/dri/i965/gen6_hiz.c
@@ -21,345 +21,620 @@
 * IN THE SOFTWARE.
 */

-#include "gen6_hiz.h"
-
 #include <assert.h>

-#include "mesa/drivers/common/meta.h"
-
-#include "mesa/main/arrayobj.h"
-#include "mesa/main/bufferobj.h"
-#include "mesa/main/depth.h"
-#include "mesa/main/enable.h"
-#include "mesa/main/fbobject.h"
-#include "mesa/main/framebuffer.h"
-#include "mesa/main/get.h"
-#include "mesa/main/renderbuffer.h"
-#include "mesa/main/shaderapi.h"
-#include "mesa/main/varray.h"
-
+#include "intel_batchbuffer.h"
 #include "intel_fbo.h"
 #include "intel_mipmap_tree.h"
-#include "intel_regions.h"
-#include "intel_tex.h"

 #include "brw_context.h"
 #include "brw_defines.h"
+#include "brw_state.h"

-static const uint32_t gen6_hiz_meta_save =
-
-      /* Disable alpha, depth, and stencil test.
-       *
-       * See the following sections of the Sandy Bridge PRM, Volume 1, Part2:
-       *   - 7.5.3.1 Depth Buffer Clear
-       *   - 7.5.3.2 Depth Buffer Resolve
-       *   - 7.5.3.3 Hierarchical Depth Buffer Resolve
-       */
-      MESA_META_ALPHA_TEST |
-      MESA_META_DEPTH_TEST |
-      MESA_META_STENCIL_TEST |
-
-      /* Disable viewport mapping.
-       *
-       * From page 11 of the Sandy Bridge PRM, Volume 2, Part 1, Section 1.3
-       * 3D Primitives Overview:
-       *    RECTLIST:
-       *    Viewport Mapping must be DISABLED (as is typical with the use of
-       *    screen- space coordinates).
-       *
-       * We must also manually disable 3DSTATE_SF.Viewport_Transform_Enable.
-       */
-      MESA_META_VIEWPORT |
-
-      /* Disable clipping.
-       *
-       * From page 11 of the Sandy Bridge PRM, Volume 2, Part 1, Section 1.3
-       * 3D Primitives Overview:
-       *     Either the CLIP unit should be DISABLED, or the CLIP unit’s Clip
-       *     Mode should be set to a value other than CLIPMODE_NORMAL.
-       */
-      MESA_META_CLIP |
-
-      /* Render a solid rectangle (set 3DSTATE_SF.FrontFace_Fill_Mode).
-       *
-       * From page 249 of the Sandy Bridge PRM, Volume 2, Part 1, Section
-       * 6.4.1.1 3DSTATE_SF, FrontFace_Fill_Mode:
-       *     SOLID: Any triangle or rectangle object found to be front-facing
-       *     is rendered as a solid object. This setting is required when
-       *     (rendering rectangle (RECTLIST) objects.
-       * Also see field BackFace_Fill_Mode.
-       *
-       * Note: MESA_META_RASTERIZAION also disables culling, but that is
-       * irrelevant. See 3DSTATE_SF.Cull_Mode.
-       */
-      MESA_META_RASTERIZATION |
-
-      /* Each HiZ operation uses a vertex shader and VAO. */
-      MESA_META_SHADER |
-      MESA_META_VERTEX |
-
-      /* Disable scissoring.
-       *
-       * Scissoring is disabled for resolves because a resolve operation
-       * should resolve the entire buffer. Scissoring is disabled for depth
-       * clears because, if we are performing a partial depth clear, then we
-       * specify the clear region with the RECTLIST vertices.
-       */
-      MESA_META_SCISSOR |
-
-      MESA_META_SELECT_FEEDBACK;
-
-static void
-gen6_hiz_get_framebuffer_enum(struct gl_context *ctx,
-                              GLenum *bind_enum,
-                              GLenum *get_enum)
-{
-   if (ctx->Extensions.EXT_framebuffer_blit && ctx->API == API_OPENGL) {
-      /* Different buffers may be bound to GL_DRAW_FRAMEBUFFER and
-       * GL_READ_FRAMEBUFFER. Take care to not disrupt the read buffer.
-       */
-      *bind_enum = GL_DRAW_FRAMEBUFFER;
-      *get_enum = GL_DRAW_FRAMEBUFFER_BINDING;
-   } else {
-      /* The enums GL_DRAW_FRAMEBUFFER and GL_READ_FRAMEBUFFER do not exist.
-       * The bound framebuffer is both the read and draw buffer.
-       */
-      *bind_enum = GL_FRAMEBUFFER;
-      *get_enum = GL_FRAMEBUFFER_BINDING;
-   }
-}
+#include "gen6_hiz.h"

 /**
- * Initialize static data needed for HiZ operations.
+ * \name Constants for HiZ VBO
+ * \{
+ *
+ * \see brw_context::hiz::vertex_bo
 */
-static void
+#define GEN6_HIZ_NUM_VERTICES 3
+#define GEN6_HIZ_NUM_VUE_ELEMS 8
+#define GEN6_HIZ_VBO_SIZE (GEN6_HIZ_NUM_VERTICES \
+                           * GEN6_HIZ_NUM_VUE_ELEMS \
+                           * sizeof(float))
+/** \} */
+
+/**
+ * \brief Initialize data needed for the HiZ op.
+ *
+ * This called when executing the first HiZ op.
+ * \see brw_context::hiz
+ */
+void
 gen6_hiz_init(struct brw_context *brw)
 {
   struct gl_context *ctx = &brw->intel.ctx;
+   struct intel_context *intel = &brw->intel;
   struct brw_hiz_state *hiz = &brw->hiz;
-   GLenum fb_bind_enum, fb_get_enum;

-   if (hiz->fbo != 0)
-      return;
+   hiz->vertex_bo = drm_intel_bo_alloc(intel->bufmgr, "bufferobj",
+                                       GEN6_HIZ_VBO_SIZE, /* size */
+                                       64); /* alignment */

-   gen6_hiz_get_framebuffer_enum(ctx, &fb_bind_enum, &fb_get_enum);
+   if (!hiz->vertex_bo)
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "failed to allocate internal VBO");
+}

-   /* Create depthbuffer.
+void
+gen6_hiz_emit_batch_head(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->intel.ctx;
+   struct intel_context *intel = &brw->intel;
+
+   /* To ensure that the batch contains only the resolve, flush the batch
+    * before beginning and after finishing emitting the resolve packets.
    *
-    * Until glRenderbufferStorage is called, the renderbuffer hash table
-    * maps the renderbuffer name to a dummy renderbuffer. We need the
-    * renderbuffer to be registered in the hash table so that framebuffer
-    * validation succeeds, so we hackishly allocate storage then immediately
-    * discard it.
+    * Ideally, we would not need to flush for the resolve op. But, I suspect
+    * that it's unsafe for CMD_PIPELINE_SELECT to occur multiple times in
+    * a single batch, and there is no safe way to ensure that other than by
+    * fencing the resolve with flushes. Ideally, we would just detect if
+    * a batch is in progress and do the right thing, but that would require
+    * the ability to *safely* access brw_context::state::dirty::brw
+    * outside of the brw_upload_state() codepath.
    */
-   GLuint depth_rb_name;
-   _mesa_GenRenderbuffersEXT(1, &depth_rb_name);
-   _mesa_BindRenderbufferEXT(GL_RENDERBUFFER, depth_rb_name);
-   _mesa_RenderbufferStorageEXT(GL_RENDERBUFFER, GL_DEPTH_COMPONENT, 32, 32);
-   _mesa_reference_renderbuffer(&hiz->depth_rb,
-                                _mesa_lookup_renderbuffer(ctx, depth_rb_name));
-   intel_miptree_release(&((struct intel_renderbuffer*) hiz->depth_rb)->mt);
+   intel_flush(ctx);

-   /* Setup FBO. */
-   _mesa_GenFramebuffersEXT(1, &hiz->fbo);
-   _mesa_BindFramebufferEXT(fb_bind_enum, hiz->fbo);
-   _mesa_FramebufferRenderbufferEXT(fb_bind_enum,
-                                    GL_DEPTH_ATTACHMENT,
-                                    GL_RENDERBUFFER,
-                                    hiz->depth_rb->Name);
+   /* CMD_PIPELINE_SELECT
+    *
+    * Select the 3D pipeline, as opposed to the media pipeline.
+    */
+   {
+      BEGIN_BATCH(1);
+      OUT_BATCH(brw->CMD_PIPELINE_SELECT << 16);
+      ADVANCE_BATCH();
+   }

-   /* Compile vertex shader. */
-   const char *vs_source =
-      "attribute vec4 position;\n"
-      "void main()\n"
-      "{\n"
-      "   gl_Position = position;\n"
-      "}\n";
-   GLuint vs = _mesa_CreateShaderObjectARB(GL_VERTEX_SHADER);
-   _mesa_ShaderSourceARB(vs, 1, &vs_source, NULL);
-   _mesa_CompileShaderARB(vs);
+   /* 3DSTATE_MULTISAMPLE */
+   {
+      int length = intel->gen == 7 ? 4 : 3;

-   /* Compile fragment shader. */
-   const char *fs_source = "void main() {}";
-   GLuint fs = _mesa_CreateShaderObjectARB(GL_FRAGMENT_SHADER);
-   _mesa_ShaderSourceARB(fs, 1, &fs_source, NULL);
-   _mesa_CompileShaderARB(fs);
+      BEGIN_BATCH(length);
+      OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (length - 2));
+      OUT_BATCH(MS_PIXEL_LOCATION_CENTER |
+                MS_NUMSAMPLES_1);
+      OUT_BATCH(0);
+      if (length >= 4)
+         OUT_BATCH(0);
+      ADVANCE_BATCH();

-   /* Link and use program. */
-   hiz->shader.program = _mesa_CreateProgramObjectARB();
-   _mesa_AttachShader(hiz->shader.program, vs);
-   _mesa_AttachShader(hiz->shader.program, fs);
-   _mesa_LinkProgramARB(hiz->shader.program);
-   _mesa_UseProgramObjectARB(hiz->shader.program);
+   }

-   /* Create and bind VAO. */
-   _mesa_GenVertexArrays(1, &hiz->vao);
-   _mesa_BindVertexArray(hiz->vao);
+   /* 3DSTATE_SAMPLE_MASK */
+   {
+      BEGIN_BATCH(2);
+      OUT_BATCH(_3DSTATE_SAMPLE_MASK << 16 | (2 - 2));
+      OUT_BATCH(1);
+      ADVANCE_BATCH();
+   }

-   /* Setup VBO for 'position'. */
-   hiz->shader.position_location =
-      _mesa_GetAttribLocationARB(hiz->shader.program, "position");
-   _mesa_GenBuffersARB(1, &hiz->shader.position_vbo);
-   _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, hiz->shader.position_vbo);
-   _mesa_VertexAttribPointerARB(hiz->shader.position_location,
-				2, /*components*/
-				GL_FLOAT,
-				GL_FALSE, /*normalized?*/
-				0, /*stride*/
-				NULL);
-   _mesa_EnableVertexAttribArrayARB(hiz->shader.position_location);
-
-   /* Cleanup. */
-   _mesa_DeleteShader(vs);
-   _mesa_DeleteShader(fs);
+   /* CMD_STATE_BASE_ADDRESS
+    *
+    * From the Sandy Bridge PRM, Volume 1, Part 1, Table STATE_BASE_ADDRESS:
+    *     The following commands must be reissued following any change to the
+    *     base addresses:
+    *         3DSTATE_CC_POINTERS
+    *         3DSTATE_BINDING_TABLE_POINTERS
+    *         3DSTATE_SAMPLER_STATE_POINTERS
+    *         3DSTATE_VIEWPORT_STATE_POINTERS
+    *         MEDIA_STATE_POINTERS
+    */
+   {
+      BEGIN_BATCH(10);
+      OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
+      OUT_BATCH(1); /* GeneralStateBaseAddressModifyEnable */
+      /* SurfaceStateBaseAddress */
+      OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1);
+      /* DynamicStateBaseAddress */
+      OUT_RELOC(intel->batch.bo, (I915_GEM_DOMAIN_RENDER |
+                                  I915_GEM_DOMAIN_INSTRUCTION), 0, 1);
+      OUT_BATCH(1); /* IndirectObjectBaseAddress */
+      OUT_BATCH(1); /* InstructionBaseAddress */
+      OUT_BATCH(1); /* GeneralStateUpperBound */
+      OUT_BATCH(1); /* DynamicStateUpperBound */
+      OUT_BATCH(1); /* IndirectObjectUpperBound*/
+      OUT_BATCH(1); /* InstructionAccessUpperBound */
+      ADVANCE_BATCH();
+   }
 }

-/**
- * Wrap \c brw->hiz.depth_rb around a miptree.
- *
- * \see gen6_hiz_teardown_depth_buffer()
- */
-static void
-gen6_hiz_setup_depth_buffer(struct brw_context *brw,
-			    struct intel_mipmap_tree *mt,
-			    unsigned int level,
-			    unsigned int layer)
+void
+gen6_hiz_emit_vertices(struct brw_context *brw,
+                       struct intel_mipmap_tree *mt,
+                       unsigned int level,
+                       unsigned int layer)
 {
-   struct gl_renderbuffer *rb = brw->hiz.depth_rb;
-   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
-
-   rb->Format = mt->format;
-   rb->_BaseFormat = _mesa_get_format_base_format(rb->Format);
-   rb->InternalFormat = rb->_BaseFormat;
-   rb->Width = mt->level[level].width;
-   rb->Height = mt->level[level].height;
-
-   irb->mt_level = level;
-   irb->mt_layer = layer;
-
-   intel_miptree_reference(&irb->mt, mt);
-   intel_renderbuffer_set_draw_offset(irb);
-}
-
-/**
- * Release the region from \c brw->hiz.depth_rb.
- *
- * \see gen6_hiz_setup_depth_buffer()
- */
-static void
-gen6_hiz_teardown_depth_buffer(struct gl_renderbuffer *rb)
-{
-   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
-   intel_miptree_release(&irb->mt);
-}
-
-static void
-gen6_resolve_slice(struct intel_context *intel,
-	         struct intel_mipmap_tree *mt,
-		 unsigned int level,
-		 unsigned int layer,
-                 enum brw_hiz_op op)
-{
-   struct gl_context *ctx = &intel->ctx;
-   struct brw_context *brw = brw_context(ctx);
+   struct intel_context *intel = &brw->intel;
   struct brw_hiz_state *hiz = &brw->hiz;
-   GLenum fb_bind_enum, fb_get_enum;

-   /* Do not recurse. */
-   assert(!brw->hiz.op);
-
-   assert(mt->hiz_mt != NULL);
-   assert(level >= mt->first_level);
-   assert(level <= mt->last_level);
-   assert(layer < mt->level[level].depth);
-
-   gen6_hiz_get_framebuffer_enum(ctx, &fb_bind_enum, &fb_get_enum);
-
-   /* Save state. */
-   GLint save_drawbuffer;
-   GLint save_renderbuffer;
-   _mesa_meta_begin(ctx, gen6_hiz_meta_save);
-   _mesa_GetIntegerv(fb_get_enum, &save_drawbuffer);
-   _mesa_GetIntegerv(GL_RENDERBUFFER_BINDING, &save_renderbuffer);
-
-   /* Initialize context data for HiZ operations. */
-   gen6_hiz_init(brw);
-
-   /* Set depth state. */
-   if (!ctx->Depth.Mask) {
-      /* This sets 3DSTATE_WM.Depth_Buffer_Write_Enable. */
-      _mesa_DepthMask(GL_TRUE);
-   }
-   if (op == BRW_HIZ_OP_DEPTH_RESOLVE) {
-      _mesa_set_enable(ctx, GL_DEPTH_TEST, GL_TRUE);
-      _mesa_DepthFunc(GL_NEVER);
-   }
-
-   /* Setup FBO. */
-   gen6_hiz_setup_depth_buffer(brw, mt, level, layer);
-   _mesa_BindFramebufferEXT(fb_bind_enum, hiz->fbo);
-
-
-   /* A rectangle primitive (3DPRIM_RECTLIST) consists of only three vertices.
-    * The vertices reside in screen space with DirectX coordinates (this is,
-    * (0, 0) is the upper left corner).
+   /* Setup VBO for the rectangle primitive..
+    *
+    * A rectangle primitive (3DPRIM_RECTLIST) consists of only three
+    * vertices. The vertices reside in screen space with DirectX coordinates
+    * (that is, (0, 0) is the upper left corner).
    *
    *   v2 ------ implied
    *    |        |
    *    |        |
    *   v0 ----- v1
-    */
-   const int width = hiz->depth_rb->Width;
-   const int height = hiz->depth_rb->Height;
-   const GLfloat positions[] = {
-          0, height,
-      width, height,
-          0,      0,
-   };
-
-   /* Setup program and vertex attributes. */
-   _mesa_UseProgramObjectARB(hiz->shader.program);
-   _mesa_BindVertexArray(hiz->vao);
-   _mesa_BindBufferARB(GL_ARRAY_BUFFER, hiz->shader.position_vbo);
-   _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(positions), positions,
-		       GL_DYNAMIC_DRAW_ARB);
-
-   /* Execute the HiZ operation. */
-   brw->hiz.op = op;
-   brw->state.dirty.brw |= BRW_NEW_HIZ;
-   _mesa_DrawArrays(GL_TRIANGLES, 0, 3);
-   brw->state.dirty.brw |= BRW_NEW_HIZ;
-   brw->hiz.op = BRW_HIZ_OP_NONE;
-
-   /* Restore state.
    *
-    * The order in which state is restored is significant. The draw buffer
-    * used for the HiZ op has no stencil buffer, and glStencilFunc() clamps
-    * the stencil reference value to the range allowed by the draw buffer's
-    * number of stencil bits. So, the draw buffer binding must be restored
-    * before the stencil state, or else the stencil ref will be clamped to 0.
+    * Since the VS is disabled, the clipper loads each VUE directly from
+    * the URB. This is controlled by the 3DSTATE_VERTEX_BUFFERS and
+    * 3DSTATE_VERTEX_ELEMENTS packets below. The VUE contents are as follows:
+    *   dw0: Reserved, MBZ.
+    *   dw1: Render Target Array Index. The HiZ op does not use indexed
+    *        vertices, so set the dword to 0.
+    *   dw2: Viewport Index. The HiZ op disables viewport mapping and
+    *        scissoring, so set the dword to 0.
+    *   dw3: Point Width: The HiZ op does not emit the POINTLIST primitive, so
+    *        set the dword to 0.
+    *   dw4: Vertex Position X.
+    *   dw5: Vertex Position Y.
+    *   dw6: Vertex Position Z.
+    *   dw7: Vertex Position W.
+    *
+    * For details, see the Sandybridge PRM, Volume 2, Part 1, Section 1.5.1
+    * "Vertex URB Entry (VUE) Formats".
    */
-   gen6_hiz_teardown_depth_buffer(hiz->depth_rb);
-   _mesa_BindRenderbufferEXT(GL_RENDERBUFFER, save_renderbuffer);
-   _mesa_BindFramebufferEXT(fb_bind_enum, save_drawbuffer);
-   _mesa_meta_end(ctx);
+   {
+      const int width = mt->level[level].width;
+      const int height = mt->level[level].height;
+
+      const float vertices[GEN6_HIZ_VBO_SIZE] = {
+         /* v0 */ 0, 0, 0, 0,         0, height, 0, 1,
+         /* v1 */ 0, 0, 0, 0,     width, height, 0, 1,
+         /* v2 */ 0, 0, 0, 0,         0,      0, 0, 1,
+      };
+
+      drm_intel_bo_subdata(hiz->vertex_bo, 0, GEN6_HIZ_VBO_SIZE, vertices);
+   }
+
+   /* 3DSTATE_VERTEX_BUFFERS */
+   {
+      const int num_buffers = 1;
+      const int batch_length = 1 + 4 * num_buffers;
+
+      uint32_t dw0 = GEN6_VB0_ACCESS_VERTEXDATA |
+                     (GEN6_HIZ_NUM_VUE_ELEMS * sizeof(float)) << BRW_VB0_PITCH_SHIFT;
+
+      if (intel->gen >= 7)
+         dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE;
+
+      BEGIN_BATCH(batch_length);
+      OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (batch_length - 2));
+      OUT_BATCH(dw0);
+      /* start address */
+      OUT_RELOC(hiz->vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, 0);
+      /* end address */
+      OUT_RELOC(hiz->vertex_bo, I915_GEM_DOMAIN_VERTEX,
+                0, hiz->vertex_bo->size - 1);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_VERTEX_ELEMENTS
+    *
+    * Fetch dwords 0 - 7 from each VUE. See the comments above where
+    * hiz->vertex_bo is filled with data.
+    */
+   {
+      const int num_elements = 2;
+      const int batch_length = 1 + 2 * num_elements;
+
+      BEGIN_BATCH(batch_length);
+      OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (batch_length - 2));
+      /* Element 0 */
+      OUT_BATCH(GEN6_VE0_VALID |
+                BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT |
+                0 << BRW_VE0_SRC_OFFSET_SHIFT);
+      OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
+                BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT |
+                BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_2_SHIFT |
+                BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_3_SHIFT);
+      /* Element 1 */
+      OUT_BATCH(GEN6_VE0_VALID |
+                BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT |
+                16 << BRW_VE0_SRC_OFFSET_SHIFT);
+      OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
+                BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT |
+                BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_2_SHIFT |
+                BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_3_SHIFT);
+      ADVANCE_BATCH();
+   }
 }

+/**
+ * \brief Execute a HiZ op on a miptree slice.
+ *
+ * To execute the HiZ op, this function manually constructs and emits a batch
+ * to "draw" the HiZ op's rectangle primitive. The batchbuffer is flushed
+ * before constructing and after emitting the batch.
+ *
+ * This function alters no GL state.
+ *
+ * For an overview of HiZ ops, see the following sections of the Sandy Bridge
+ * PRM, Volume 1, Part 2:
+ *   - 7.5.3.1 Depth Buffer Clear
+ *   - 7.5.3.2 Depth Buffer Resolve
+ *   - 7.5.3.3 Hierarchical Depth Buffer Resolve
+ */
+static void
+gen6_hiz_exec(struct intel_context *intel,
+              struct intel_mipmap_tree *mt,
+              unsigned int level,
+              unsigned int layer,
+              enum gen6_hiz_op op)
+{
+   struct gl_context *ctx = &intel->ctx;
+   struct brw_context *brw = brw_context(ctx);
+   struct brw_hiz_state *hiz = &brw->hiz;
+
+   assert(op != GEN6_HIZ_OP_DEPTH_CLEAR); /* Not implemented yet. */
+   assert(mt->hiz_mt != NULL);
+   intel_miptree_check_level_layer(mt, level, layer);
+
+   if (hiz->vertex_bo == NULL)
+      gen6_hiz_init(brw);
+
+   if (hiz->vertex_bo == NULL) {
+      /* Ouch. Give up. */
+      return;
+   }
+
+   gen6_hiz_emit_batch_head(brw);
+   gen6_hiz_emit_vertices(brw, mt, level, layer);
+
+   /* 3DSTATE_URB
+    *
+    * Assign the entire URB to the VS. Even though the VS disabled, URB space
+    * is still needed because the clipper loads the VUE's from the URB. From
+    * the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE,
+    * Dword 1.15:0 "VS Number of URB Entries":
+    *     This field is always used (even if VS Function Enable is DISABLED).
+    *
+    * The warning below appears in the PRM (Section 3DSTATE_URB), but we can
+    * safely ignore it because this batch contains only one draw call.
+    *     Because of URB corruption caused by allocating a previous GS unit
+    *     URB entry to the VS unit, software is required to send a “GS NULL
+    *     Fence” (Send URB fence with VS URB size == 1 and GS URB size == 0)
+    *     plus a dummy DRAW call before any case where VS will be taking over
+    *     GS URB space.
+    */
+   {
+      BEGIN_BATCH(3);
+      OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2));
+      OUT_BATCH(brw->urb.max_vs_entries << GEN6_URB_VS_ENTRIES_SHIFT);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_CC_STATE_POINTERS
+    *
+    * The pointer offsets are relative to
+    * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
+    *
+    * The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE.
+    */
+   {
+      uint32_t depthstencil_offset;
+      gen6_hiz_emit_depth_stencil_state(brw, op, &depthstencil_offset);
+
+      BEGIN_BATCH(4);
+      OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (4 - 2));
+      OUT_BATCH(1); /* BLEND_STATE offset */
+      OUT_BATCH(depthstencil_offset | 1); /* DEPTH_STENCIL_STATE offset */
+      OUT_BATCH(1); /* COLOR_CALC_STATE offset */
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_VS
+    *
+    * Disable vertex shader.
+    */
+   {
+      /* From the BSpec, Volume 2a, Part 3 "Vertex Shader", Section
+       * 3DSTATE_VS, Dword 5.0 "VS Function Enable":
+       *   [DevSNB] A pipeline flush must be programmed prior to a 3DSTATE_VS
+       *   command that causes the VS Function Enable to toggle. Pipeline
+       *   flush can be executed by sending a PIPE_CONTROL command with CS
+       *   stall bit set and a post sync operation.
+       */
+      intel_emit_post_sync_nonzero_flush(intel);
+
+      BEGIN_BATCH(6);
+      OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_GS
+    *
+    * Disable the geometry shader.
+    */
+   {
+      BEGIN_BATCH(7);
+      OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_CLIP
+    *
+    * Disable the clipper.
+    *
+    * The HiZ op emits a rectangle primitive, which requires clipping to
+    * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1
+    * Section 1.3 "3D Primitives Overview":
+    *    RECTLIST:
+    *    Either the CLIP unit should be DISABLED, or the CLIP unit's Clip
+    *    Mode should be set to a value other than CLIPMODE_NORMAL.
+    *
+    * Also disable perspective divide. This doesn't change the clipper's
+    * output, but does spare a few electrons.
+    */
+   {
+      BEGIN_BATCH(4);
+      OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_SF
+    *
+    * Disable ViewportTransformEnable (dw2.1)
+    *
+    * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D
+    * Primitives Overview":
+    *     RECTLIST: Viewport Mapping must be DISABLED (as is typical with the
+    *     use of screen- space coordinates).
+    *
+    * A solid rectangle must be rendered, so set FrontFaceFillMode (dw2.4:3)
+    * and BackFaceFillMode (dw2.5:6) to SOLID(0).
+    *
+    * From the Sandy Bridge PRM, Volume 2, Part 1, Section
+    * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode:
+    *     SOLID: Any triangle or rectangle object found to be front-facing
+    *     is rendered as a solid object. This setting is required when
+    *     (rendering rectangle (RECTLIST) objects.
+    */
+   {
+      BEGIN_BATCH(20);
+      OUT_BATCH(_3DSTATE_SF << 16 | (20 - 2));
+      OUT_BATCH((1 - 1) << GEN6_SF_NUM_OUTPUTS_SHIFT | /* only position */
+                1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
+                0 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT);
+      for (int i = 0; i < 18; ++i)
+         OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_WM
+    *
+    * Disable thread dispatch (dw5.19) and enable the HiZ op.
+    *
+    * Even though thread dispatch is disabled, max threads (dw5.25:31) must be
+    * nonzero to prevent the GPU from hanging. See the valid ranges in the
+    * BSpec, Volume 2a.11 Windower, Section 3DSTATE_WM, Dword 5.25:31
+    * "Maximum Number Of Threads".
+    */
+   {
+      uint32_t dw4 = 0;
+
+      switch (op) {
+      case GEN6_HIZ_OP_DEPTH_CLEAR:
+         assert(!"not implemented");
+         dw4 |= GEN6_WM_DEPTH_CLEAR;
+         break;
+      case GEN6_HIZ_OP_DEPTH_RESOLVE:
+         dw4 |= GEN6_WM_DEPTH_RESOLVE;
+         break;
+      case GEN6_HIZ_OP_HIZ_RESOLVE:
+         dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE;
+         break;
+      default:
+         assert(0);
+         break;
+      }
+
+      BEGIN_BATCH(9);
+      OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(dw4);
+      OUT_BATCH((brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT);
+      OUT_BATCH((1 - 1) << GEN6_WM_NUM_SF_OUTPUTS_SHIFT); /* only position */
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_DEPTH_BUFFER */
+   {
+      uint32_t width = mt->level[level].width;
+      uint32_t height = mt->level[level].height;
+
+      uint32_t tile_x;
+      uint32_t tile_y;
+      uint32_t offset;
+      {
+         /* Construct a dummy renderbuffer just to extract tile offsets. */
+         struct intel_renderbuffer rb;
+         rb.mt = mt;
+         rb.mt_level = level;
+         rb.mt_layer = layer;
+         intel_renderbuffer_set_draw_offset(&rb);
+         offset = intel_renderbuffer_tile_offsets(&rb, &tile_x, &tile_y);
+      }
+
+      uint32_t format;
+      switch (mt->format) {
+      case MESA_FORMAT_Z16:       format = BRW_DEPTHFORMAT_D16_UNORM; break;
+      case MESA_FORMAT_Z32_FLOAT: format = BRW_DEPTHFORMAT_D32_FLOAT; break;
+      case MESA_FORMAT_X8_Z24:    format = BRW_DEPTHFORMAT_D24_UNORM_X8_UINT; break;
+      default:                    assert(0); break;
+      }
+
+      intel_emit_post_sync_nonzero_flush(intel);
+      intel_emit_depth_stall_flushes(intel);
+
+      BEGIN_BATCH(7);
+      OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
+      OUT_BATCH(((mt->region->pitch * mt->region->cpp) - 1) |
+                format << 18 |
+                1 << 21 | /* separate stencil enable */
+                1 << 22 | /* hiz enable */
+                BRW_TILEWALK_YMAJOR << 26 |
+                1 << 27 | /* y-tiled */
+                BRW_SURFACE_2D << 29);
+      OUT_RELOC(mt->region->bo,
+                I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                offset);
+      OUT_BATCH(BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1 |
+                (width + tile_x - 1) << 6 |
+                (height + tile_y - 1) << 19);
+      OUT_BATCH(0);
+      OUT_BATCH(tile_x |
+                tile_y << 16);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_HIER_DEPTH_BUFFER */
+   {
+      struct intel_region *hiz_region = mt->hiz_mt->region;
+
+      BEGIN_BATCH(3);
+      OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
+      OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1);
+      OUT_RELOC(hiz_region->bo,
+                I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_STENCIL_BUFFER */
+   {
+      BEGIN_BATCH(3);
+      OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_CLEAR_PARAMS
+    *
+    * From the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE_CLEAR_PARAMS:
+    *   [DevSNB] 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE
+    *   packet when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
+    */
+   {
+      BEGIN_BATCH(2);
+      OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | (2 - 2));
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_DRAWING_RECTANGLE */
+   {
+      BEGIN_BATCH(4);
+      OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(((mt->level[level].width - 1) & 0xffff) |
+                ((mt->level[level].height - 1) << 16));
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DPRIMITIVE */
+   {
+     BEGIN_BATCH(6);
+     OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) |
+               _3DPRIM_RECTLIST << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
+               GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
+     OUT_BATCH(3); /* vertex count per instance */
+     OUT_BATCH(0);
+     OUT_BATCH(1); /* instance count */
+     OUT_BATCH(0);
+     OUT_BATCH(0);
+     ADVANCE_BATCH();
+   }
+
+   /* See comments above at first invocation of intel_flush() in
+    * gen6_hiz_emit_batch_head().
+    */
+   intel_flush(ctx);
+
+   /* Be safe. */
+   brw->state.dirty.brw = ~0;
+   brw->state.dirty.cache = ~0;
+}
+
+/**
+ * \param out_offset is relative to
+ *        CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
+ */
+void
+gen6_hiz_emit_depth_stencil_state(struct brw_context *brw,
+                                  enum gen6_hiz_op op,
+                                  uint32_t *out_offset)
+{
+   struct gen6_depth_stencil_state *state;
+   state = brw_state_batch(brw, AUB_TRACE_DEPTH_STENCIL_STATE,
+                              sizeof(*state), 64,
+                              out_offset);
+   memset(state, 0, sizeof(*state));
+
+   /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2:
+    *   - 7.5.3.1 Depth Buffer Clear
+    *   - 7.5.3.2 Depth Buffer Resolve
+    *   - 7.5.3.3 Hierarchical Depth Buffer Resolve
+    */
+   state->ds2.depth_write_enable = 1;
+   if (op == GEN6_HIZ_OP_DEPTH_RESOLVE) {
+      state->ds2.depth_test_enable = 1;
+      state->ds2.depth_test_func = COMPAREFUNC_NEVER;
+   }
+}
+
+/** \see intel_context::vtbl::resolve_hiz_slice */
 void
 gen6_resolve_hiz_slice(struct intel_context *intel,
                       struct intel_mipmap_tree *mt,
                       uint32_t level,
                       uint32_t layer)
 {
-   gen6_resolve_slice(intel, mt, level, layer, BRW_HIZ_OP_HIZ_RESOLVE);
+   gen6_hiz_exec(intel, mt, level, layer, GEN6_HIZ_OP_HIZ_RESOLVE);
 }

-
+/** \see intel_context::vtbl::resolve_depth_slice */
 void
 gen6_resolve_depth_slice(struct intel_context *intel,
                         struct intel_mipmap_tree *mt,
                         uint32_t level,
                         uint32_t layer)
 {
-   gen6_resolve_slice(intel, mt, level, layer, BRW_HIZ_OP_DEPTH_RESOLVE);
+   gen6_hiz_exec(intel, mt, level, layer, GEN6_HIZ_OP_DEPTH_RESOLVE);
 }
--- a/src/mesa/drivers/dri/i965/gen6_hiz.h
+++ b/src/mesa/drivers/dri/i965/gen6_hiz.h
@@ -28,6 +28,44 @@
 struct intel_context;
 struct intel_mipmap_tree;

+/**
+ * For an overview of the HiZ operations, see the following sections of the
+ * Sandy Bridge PRM, Volume 1, Part2:
+ *   - 7.5.3.1 Depth Buffer Clear
+ *   - 7.5.3.2 Depth Buffer Resolve
+ *   - 7.5.3.3 Hierarchical Depth Buffer Resolve
+ */
+enum gen6_hiz_op {
+   GEN6_HIZ_OP_DEPTH_CLEAR,
+   GEN6_HIZ_OP_DEPTH_RESOLVE,
+   GEN6_HIZ_OP_HIZ_RESOLVE,
+};
+
+/**
+ * \name HiZ internals
+ * \{
+ *
+ * Used internally by gen6_hiz_exec() and gen7_hiz_exec().
+ */
+
+void
+gen6_hiz_init(struct brw_context *brw);
+
+void
+gen6_hiz_emit_batch_head(struct brw_context *brw);
+
+void
+gen6_hiz_emit_vertices(struct brw_context *brw,
+                       struct intel_mipmap_tree *mt,
+                       unsigned int level,
+                       unsigned int layer);
+
+void
+gen6_hiz_emit_depth_stencil_state(struct brw_context *brw,
+                                  enum gen6_hiz_op op,
+                                  uint32_t *out_offset);
+/** \} */
+
 void
 gen6_resolve_hiz_slice(struct intel_context *intel,
                       struct intel_mipmap_tree *mt,
--- a/src/mesa/drivers/dri/i965/gen6_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
@@ -149,17 +149,8 @@ upload_sf_state(struct brw_context *brw)
      urb_entry_read_length << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
      urb_entry_read_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;

-   dw2 = GEN6_SF_STATISTICS_ENABLE;
-
-   /* Enable viewport transform only if no HiZ operation is progress
-    *
-    * From page 11 of the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D
-    * Primitives Overview":
-    *     RECTLIST: Viewport Mapping must be DISABLED (as is typical with the
-    *     use of screen- space coordinates).
-    */
-   if (!brw->hiz.op)
-      dw2 |= GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
+   dw2 = GEN6_SF_STATISTICS_ENABLE |
+         GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;

   dw3 = 0;
   dw4 = 0;
@@ -354,8 +345,7 @@ const struct brw_tracked_state gen6_sf_state = {
 		_NEW_POINT |
 		_NEW_TRANSFORM),
      .brw   = (BRW_NEW_CONTEXT |
-		BRW_NEW_FRAGMENT_PROGRAM |
-		BRW_NEW_HIZ),
+		BRW_NEW_FRAGMENT_PROGRAM),
      .cache = CACHE_NEW_VS_PROG
   },
   .emit = upload_sf_state,
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -133,6 +133,15 @@ upload_vs_state(struct brw_context *brw)
   struct intel_context *intel = &brw->intel;
   uint32_t floating_point_mode = 0;

+   /* From the BSpec, Volume 2a, Part 3 "Vertex Shader", Section
+    * 3DSTATE_VS, Dword 5.0 "VS Function Enable":
+    *   [DevSNB] A pipeline flush must be programmed prior to a 3DSTATE_VS
+    *   command that causes the VS Function Enable to toggle. Pipeline
+    *   flush can be executed by sending a PIPE_CONTROL command with CS
+    *   stall bit set and a post sync operation.
+    */
+   intel_emit_post_sync_nonzero_flush(intel);
+
   if (brw->vs.push_const_size == 0) {
      /* Disable the push constant buffers. */
      BEGIN_BATCH(5);
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -149,23 +149,6 @@ upload_wm_state(struct brw_context *brw)
   dw4 |= (brw->wm.prog_data->first_curbe_grf_16 <<
 	   GEN6_WM_DISPATCH_START_GRF_SHIFT_2);

-   switch (brw->hiz.op) {
-   case BRW_HIZ_OP_NONE:
-      break;
-   case BRW_HIZ_OP_DEPTH_CLEAR:
-      dw4 |= GEN6_WM_DEPTH_CLEAR;
-      break;
-   case BRW_HIZ_OP_DEPTH_RESOLVE:
-      dw4 |= GEN6_WM_DEPTH_RESOLVE;
-      break;
-   case BRW_HIZ_OP_HIZ_RESOLVE:
-      dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE;
-      break;
-   default:
-      assert(0);
-      break;
-   }
-
   dw5 |= (brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;

   /* CACHE_NEW_WM_PROG */
@@ -233,8 +216,7 @@ const struct brw_tracked_state gen6_wm_state = {
 		_NEW_PROGRAM_CONSTANTS |
 		_NEW_POLYGON),
      .brw   = (BRW_NEW_FRAGMENT_PROGRAM |
-		BRW_NEW_BATCH |
-		BRW_NEW_HIZ),
+		BRW_NEW_BATCH),
      .cache = (CACHE_NEW_SAMPLER |
 		CACHE_NEW_WM_PROG)
   },
--- a/src/mesa/drivers/dri/i965/gen7_clip_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_clip_state.c
@@ -39,23 +39,6 @@ upload_clip_state(struct brw_context *brw)
   /* BRW_NEW_FRAGMENT_PROGRAM */
   const struct gl_fragment_program *fprog = brw->fragment_program;

-   if (brw->hiz.op) {
-      /* HiZ operations emit a rectangle primitive, which requires clipping to
-       * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1
-       * Section 1.3 3D Primitives Overview:
-       *    RECTLIST:
-       *    Either the CLIP unit should be DISABLED, or the CLIP unit's Clip
-       *    Mode should be set to a value other than CLIPMODE_NORMAL.
-       */
-      BEGIN_BATCH(4);
-      OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      OUT_BATCH(0);
-      ADVANCE_BATCH();
-      return;
-   }
-
   /* _NEW_BUFFERS */
   bool render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;

@@ -133,8 +116,7 @@ const struct brw_tracked_state gen7_clip_state = {
                _NEW_LIGHT |
                _NEW_TRANSFORM),
      .brw   = (BRW_NEW_CONTEXT |
-                BRW_NEW_FRAGMENT_PROGRAM |
-                BRW_NEW_HIZ),
+                BRW_NEW_FRAGMENT_PROGRAM),
      .cache = 0
   },
   .emit = upload_clip_state,
--- a/src/mesa/drivers/dri/i965/gen7_hiz.c
+++ b/src/mesa/drivers/dri/i965/gen7_hiz.c
@@ -0,0 +1,464 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <assert.h>
+
+#include "intel_batchbuffer.h"
+#include "intel_fbo.h"
+#include "intel_mipmap_tree.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_state.h"
+
+#include "gen6_hiz.h"
+#include "gen7_hiz.h"
+
+/**
+ * \copydoc gen6_hiz_exec()
+ */
+static void
+gen7_hiz_exec(struct intel_context *intel,
+              struct intel_mipmap_tree *mt,
+              unsigned int level,
+              unsigned int layer,
+              enum gen6_hiz_op op)
+{
+   struct gl_context *ctx = &intel->ctx;
+   struct brw_context *brw = brw_context(ctx);
+   struct brw_hiz_state *hiz = &brw->hiz;
+
+   assert(op != GEN6_HIZ_OP_DEPTH_CLEAR); /* Not implemented yet. */
+   assert(mt->hiz_mt != NULL);
+   intel_miptree_check_level_layer(mt, level, layer);
+
+   if (hiz->vertex_bo == NULL)
+      gen6_hiz_init(brw);
+
+   if (hiz->vertex_bo == NULL) {
+      /* Ouch. Give up. */
+      return;
+   }
+
+   uint32_t depth_format;
+   switch (mt->format) {
+   case MESA_FORMAT_Z16:       depth_format = BRW_DEPTHFORMAT_D16_UNORM; break;
+   case MESA_FORMAT_Z32_FLOAT: depth_format = BRW_DEPTHFORMAT_D32_FLOAT; break;
+   case MESA_FORMAT_X8_Z24:    depth_format = BRW_DEPTHFORMAT_D24_UNORM_X8_UINT; break;
+   default:                    assert(0); break;
+   }
+
+   gen6_hiz_emit_batch_head(brw);
+   gen6_hiz_emit_vertices(brw, mt, level, layer);
+
+   /* 3DSTATE_URB_VS
+    * 3DSTATE_URB_HS
+    * 3DSTATE_URB_DS
+    * 3DSTATE_URB_GS
+    *
+    * If the 3DSTATE_URB_VS is emitted, than the others must be also. From the
+    * BSpec, Volume 2a "3D Pipeline Overview", Section 1.7.1 3DSTATE_URB_VS:
+    *     3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be
+    *     programmed in order for the programming of this state to be
+    *     valid.
+    */
+   {
+      /* The minimum valid value is 32. See 3DSTATE_URB_VS,
+       * Dword 1.15:0 "VS Number of URB Entries".
+       */
+      int num_vs_entries = 32;
+
+      BEGIN_BATCH(2);
+      OUT_BATCH(_3DSTATE_URB_VS << 16 | (2 - 2));
+      OUT_BATCH(1 << GEN7_URB_ENTRY_SIZE_SHIFT |
+                0 << GEN7_URB_STARTING_ADDRESS_SHIFT |
+                num_vs_entries);
+      ADVANCE_BATCH();
+
+      BEGIN_BATCH(2);
+      OUT_BATCH(_3DSTATE_URB_GS << 16 | (2 - 2));
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+
+      BEGIN_BATCH(2);
+      OUT_BATCH(_3DSTATE_URB_HS << 16 | (2 - 2));
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+
+      BEGIN_BATCH(2);
+      OUT_BATCH(_3DSTATE_URB_DS << 16 | (2 - 2));
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_DEPTH_STENCIL_STATE_POINTERS
+    *
+    * The offset is relative to CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
+    */
+   {
+      uint32_t depthstencil_offset;
+      gen6_hiz_emit_depth_stencil_state(brw, op, &depthstencil_offset);
+
+      BEGIN_BATCH(2);
+      OUT_BATCH(_3DSTATE_DEPTH_STENCIL_STATE_POINTERS << 16 | (2 - 2));
+      OUT_BATCH(depthstencil_offset | 1);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_VS
+    *
+    * Disable vertex shader.
+    */
+   {
+      BEGIN_BATCH(6);
+      OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_HS
+    *
+    * Disable the hull shader.
+    */
+   {
+      BEGIN_BATCH(7);
+      OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_TE
+    *
+    * Disable the tesselation engine.
+    */
+   {
+      BEGIN_BATCH(4);
+      OUT_BATCH(_3DSTATE_TE << 16 | (4 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_DS
+    *
+    * Disable the domain shader.
+    */
+   {
+      BEGIN_BATCH(6);
+      OUT_BATCH(_3DSTATE_DS << 16 | (6 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_GS
+    *
+    * Disable the geometry shader.
+    */
+   {
+      BEGIN_BATCH(7);
+      OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_STREAMOUT
+    *
+    * Disable streamout.
+    */
+   {
+      BEGIN_BATCH(3);
+      OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (3 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_CLIP
+    *
+    * Disable the clipper.
+    *
+    * The HiZ op emits a rectangle primitive, which requires clipping to
+    * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1
+    * Section 1.3 "3D Primitives Overview":
+    *    RECTLIST:
+    *    Either the CLIP unit should be DISABLED, or the CLIP unit's Clip
+    *    Mode should be set to a value other than CLIPMODE_NORMAL.
+    *
+    * Also disable perspective divide. This doesn't change the clipper's
+    * output, but does spare a few electrons.
+    */
+   {
+      BEGIN_BATCH(4);
+      OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_SF
+    *
+    * Disable ViewportTransformEnable (dw1.1)
+    *
+    * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D
+    * Primitives Overview":
+    *     RECTLIST: Viewport Mapping must be DISABLED (as is typical with the
+    *     use of screen- space coordinates).
+    *
+    * A solid rectangle must be rendered, so set FrontFaceFillMode (dw1.6:5)
+    * and BackFaceFillMode (dw1.4:3) to SOLID(0).
+    *
+    * From the Sandy Bridge PRM, Volume 2, Part 1, Section
+    * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode:
+    *     SOLID: Any triangle or rectangle object found to be front-facing
+    *     is rendered as a solid object. This setting is required when
+    *     (rendering rectangle (RECTLIST) objects.
+    */
+   {
+      BEGIN_BATCH(7);
+      OUT_BATCH(_3DSTATE_SF << 16 | (7 - 2));
+      OUT_BATCH(depth_format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_SBE */
+   {
+      BEGIN_BATCH(14);
+      OUT_BATCH(_3DSTATE_SBE << 16 | (14 - 2));
+      OUT_BATCH((1 - 1) << GEN7_SBE_NUM_OUTPUTS_SHIFT | /* only position */
+                1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
+                0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT);
+      for (int i = 0; i < 12; ++i)
+         OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_WM
+    *
+    * Disable PS thread dispatch (dw1.29) and enable the HiZ op.
+    */
+   {
+      uint32_t dw1 = 0;
+
+      switch (op) {
+      case GEN6_HIZ_OP_DEPTH_CLEAR:
+         assert(!"not implemented");
+         dw1 |= GEN7_WM_DEPTH_CLEAR;
+         break;
+      case GEN6_HIZ_OP_DEPTH_RESOLVE:
+         dw1 |= GEN7_WM_DEPTH_RESOLVE;
+         break;
+      case GEN6_HIZ_OP_HIZ_RESOLVE:
+         dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE;
+         break;
+      default:
+         assert(0);
+         break;
+      }
+
+      BEGIN_BATCH(3);
+      OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2));
+      OUT_BATCH(dw1);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_PS
+    *
+    * Pixel shader dispatch is disabled above in 3DSTATE_WM, dw1.29. Despite
+    * that, thread dispatch info must still be specified.
+    *     - Maximum Number of Threads (dw4.24:31) must be nonzero, as the BSpec
+    *       states that the valid range for this field is [0x3, 0x2f].
+    *     - A dispatch mode must be given; that is, at least one of the
+    *       "N Pixel Dispatch Enable" (N=8,16,32) fields must be set. This was
+    *       discovered through simulator error messages.
+    */
+   {
+      BEGIN_BATCH(8);
+      OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(((brw->max_wm_threads - 1) << GEN7_PS_MAX_THREADS_SHIFT) |
+		GEN7_PS_32_DISPATCH_ENABLE);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_DEPTH_BUFFER */
+   {
+      uint32_t width = mt->level[level].width;
+      uint32_t height = mt->level[level].height;
+
+      uint32_t tile_x;
+      uint32_t tile_y;
+      uint32_t offset;
+      {
+         /* Construct a dummy renderbuffer just to extract tile offsets. */
+         struct intel_renderbuffer rb;
+         rb.mt = mt;
+         rb.mt_level = level;
+         rb.mt_layer = layer;
+         intel_renderbuffer_set_draw_offset(&rb);
+         offset = intel_renderbuffer_tile_offsets(&rb, &tile_x, &tile_y);
+      }
+
+      intel_emit_depth_stall_flushes(intel);
+
+      BEGIN_BATCH(7);
+      OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
+      OUT_BATCH(((mt->region->pitch * mt->region->cpp) - 1) |
+                depth_format << 18 |
+                1 << 22 | /* hiz enable */
+                1 << 28 | /* depth write */
+                BRW_SURFACE_2D << 29);
+      OUT_RELOC(mt->region->bo,
+                I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                offset);
+      OUT_BATCH((width + tile_x - 1) << 4 |
+                (height + tile_y - 1) << 18);
+      OUT_BATCH(0);
+      OUT_BATCH(tile_x |
+                tile_y << 16);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_HIER_DEPTH_BUFFER */
+   {
+      struct intel_region *hiz_region = mt->hiz_mt->region;
+
+      BEGIN_BATCH(3);
+      OUT_BATCH((GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
+      OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1);
+      OUT_RELOC(hiz_region->bo,
+                I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+                0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_STENCIL_BUFFER */
+   {
+      BEGIN_BATCH(3);
+      OUT_BATCH((GEN7_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_CLEAR_PARAMS
+    *
+    * From the BSpec, Volume 2a.11 Windower, Section 1.5.6.3.2
+    * 3DSTATE_CLEAR_PARAMS:
+    *    [DevIVB] 3DSTATE_CLEAR_PARAMS must always be programmed in the along
+    *    with the other Depth/Stencil state commands(i.e.  3DSTATE_DEPTH_BUFFER,
+    *    3DSTATE_STENCIL_BUFFER, or 3DSTATE_HIER_DEPTH_BUFFER).
+    */
+   {
+      BEGIN_BATCH(3);
+      OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS << 16 | (3 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DSTATE_DRAWING_RECTANGLE */
+   {
+      BEGIN_BATCH(4);
+      OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(((mt->level[level].width - 1) & 0xffff) |
+                ((mt->level[level].height - 1) << 16));
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+
+   /* 3DPRIMITIVE */
+   {
+     BEGIN_BATCH(7);
+     OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2));
+     OUT_BATCH(GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL |
+               _3DPRIM_RECTLIST);
+     OUT_BATCH(3); /* vertex count per instance */
+     OUT_BATCH(0);
+     OUT_BATCH(1); /* instance count */
+     OUT_BATCH(0);
+     OUT_BATCH(0);
+     ADVANCE_BATCH();
+   }
+
+   /* See comments above at first invocation of intel_flush() in
+    * gen6_hiz_emit_batch_head().
+    */
+   intel_flush(ctx);
+
+   /* Be safe. */
+   brw->state.dirty.brw = ~0;
+   brw->state.dirty.cache = ~0;
+}
+
+/** \copydoc gen6_resolve_hiz_slice() */
+void
+gen7_resolve_hiz_slice(struct intel_context *intel,
+                       struct intel_mipmap_tree *mt,
+                       uint32_t level,
+                       uint32_t layer)
+{
+   gen7_hiz_exec(intel, mt, level, layer, GEN6_HIZ_OP_HIZ_RESOLVE);
+}
+
+/** \copydoc gen6_resolve_depth_slice() */
+void
+gen7_resolve_depth_slice(struct intel_context *intel,
+                         struct intel_mipmap_tree *mt,
+                         uint32_t level,
+                         uint32_t layer)
+{
+   gen7_hiz_exec(intel, mt, level, layer, GEN6_HIZ_OP_DEPTH_RESOLVE);
+}
--- a/src/mesa/drivers/dri/i965/gen7_hiz.h
+++ b/src/mesa/drivers/dri/i965/gen7_hiz.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#pragma once
+
+#include <stdint.h>
+
+struct intel_context;
+struct intel_mipmap_tree;
+
+/** \copydoc gen6_resolve_hiz_slice() */
+void
+gen7_resolve_hiz_slice(struct intel_context *intel,
+                       struct intel_mipmap_tree *mt,
+                       uint32_t level,
+                       uint32_t layer);
+
+/** \copydoc gen6_resolve_depth_slice() */
+void
+gen7_resolve_depth_slice(struct intel_context *intel,
+                         struct intel_mipmap_tree *mt,
+                         uint32_t level,
+                         uint32_t layer);
--- a/src/mesa/drivers/dri/i965/gen7_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c
@@ -149,8 +149,7 @@ const struct brw_tracked_state gen7_sbe_state = {
 		_NEW_PROGRAM |
 		_NEW_TRANSFORM),
      .brw   = (BRW_NEW_CONTEXT |
-		BRW_NEW_FRAGMENT_PROGRAM |
-		BRW_NEW_HIZ),
+		BRW_NEW_FRAGMENT_PROGRAM),
      .cache = CACHE_NEW_VS_PROG
   },
   .emit = upload_sbe_state,
@@ -166,17 +165,8 @@ upload_sf_state(struct brw_context *brw)
   /* _NEW_BUFFERS */
   bool render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;

-   dw1 = GEN6_SF_STATISTICS_ENABLE;
-
-   /* Enable viewport transform only if no HiZ operation is progress
-    *
-    * From page 11 of the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D
-    * Primitives Overview":
-    *     RECTLIST: Viewport Mapping must be DISABLED (as is typical with the
-    *     use of screen- space coordinates).
-    */
-   if (!brw->hiz.op)
-      dw1 |= GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
+   dw1 = GEN6_SF_STATISTICS_ENABLE |
+         GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;

   /* _NEW_BUFFERS */
   dw1 |= (brw_depthbuffer_format(brw) << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT);
@@ -310,8 +300,7 @@ const struct brw_tracked_state gen7_sf_state = {
 		_NEW_SCISSOR |
 		_NEW_BUFFERS |
 		_NEW_POINT),
-      .brw   = (BRW_NEW_CONTEXT |
-		BRW_NEW_HIZ),
+      .brw   = BRW_NEW_CONTEXT,
      .cache = CACHE_NEW_VS_PROG
   },
   .emit = upload_sf_state,
--- a/src/mesa/drivers/dri/i965/gen7_urb.c
+++ b/src/mesa/drivers/dri/i965/gen7_urb.c
@@ -99,6 +99,8 @@ gen7_upload_urb(struct brw_context *brw)
   /* GS requirement */
   assert(!brw->gs.prog_active);

+   gen7_emit_vs_workaround_flush(intel);
+
   BEGIN_BATCH(2);
   OUT_BATCH(_3DSTATE_URB_VS << 16 | (2 - 2));
   OUT_BATCH(brw->urb.nr_vs_entries |
--- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
@@ -35,6 +35,8 @@ upload_vs_state(struct brw_context *brw)
   struct intel_context *intel = &brw->intel;
   uint32_t floating_point_mode = 0;

+   gen7_emit_vs_workaround_flush(intel);
+
   BEGIN_BATCH(2);
   OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_VS << 16 | (2 - 2));
   OUT_BATCH(brw->bind.bo_offset);
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@@ -49,23 +49,6 @@ upload_wm_state(struct brw_context *brw)
   dw1 |= GEN7_WM_LINE_AA_WIDTH_1_0;
   dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5;

-   switch (brw->hiz.op) {
-   case BRW_HIZ_OP_NONE:
-      break;
-   case BRW_HIZ_OP_DEPTH_CLEAR:
-      dw1 |= GEN7_WM_DEPTH_CLEAR;
-      break;
-   case BRW_HIZ_OP_DEPTH_RESOLVE:
-      dw1 |= GEN7_WM_DEPTH_RESOLVE;
-      break;
-   case BRW_HIZ_OP_HIZ_RESOLVE:
-      dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE;
-      break;
-   default:
-      assert(0);
-      break;
-   }
-
   /* _NEW_LINE */
   if (ctx->Line.StippleFlag)
      dw1 |= GEN7_WM_LINE_STIPPLE_ENABLE;
@@ -106,7 +89,6 @@ const struct brw_tracked_state gen7_wm_state = {
      .mesa  = (_NEW_LINE | _NEW_LIGHT | _NEW_POLYGON |
 	        _NEW_COLOR | _NEW_BUFFERS),
      .brw   = (BRW_NEW_FRAGMENT_PROGRAM |
-                BRW_NEW_HIZ |
 		BRW_NEW_BATCH),
      .cache = 0,
   },
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
@@ -57,13 +57,13 @@ intel_batchbuffer_init(struct intel_context *intel)
 {
   intel_batchbuffer_reset(intel);

-   if (intel->gen == 6) {
+   if (intel->gen >= 6) {
      /* We can't just use brw_state_batch to get a chunk of space for
       * the gen6 workaround because it involves actually writing to
       * the buffer, and the kernel doesn't let us write to the batch.
       */
      intel->batch.workaround_bo = drm_intel_bo_alloc(intel->bufmgr,
-						      "gen6 workaround",
+						      "pipe_control workaround",
 						      4096, 4096);
   }
 }
@@ -363,6 +363,28 @@ intel_emit_depth_stall_flushes(struct intel_context *intel)
   ADVANCE_BATCH();
 }

+/**
+ * From the BSpec, volume 2a.03: VS Stage Input / State:
+ * "[DevIVB] A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth
+ *  stall needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
+ *  3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
+ *  3DSTATE_SAMPLER_STATE_POINTER_VS command.  Only one PIPE_CONTROL needs
+ *  to be sent before any combination of VS associated 3DSTATE."
+ */
+void
+gen7_emit_vs_workaround_flush(struct intel_context *intel)
+{
+   assert(intel->gen == 7);
+
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_PIPE_CONTROL);
+   OUT_BATCH(PIPE_CONTROL_DEPTH_STALL | PIPE_CONTROL_WRITE_IMMEDIATE);
+   OUT_RELOC(intel->batch.workaround_bo,
+	     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);
+   OUT_BATCH(0); /* write data */
+   ADVANCE_BATCH();
+}
+
 /**
 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
 * implementing two workarounds on gen6.  From section 1.4.7.1
--- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h
@@ -43,6 +43,7 @@ bool intel_batchbuffer_emit_reloc_fenced(struct intel_context *intel,
 void intel_batchbuffer_emit_mi_flush(struct intel_context *intel);
 void intel_emit_post_sync_nonzero_flush(struct intel_context *intel);
 void intel_emit_depth_stall_flushes(struct intel_context *intel);
+void gen7_emit_vs_workaround_flush(struct intel_context *intel);

 static INLINE uint32_t float_as_int(float f)
 {
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -628,6 +628,8 @@ intelInitContext(struct intel_context *intel,
   intel->has_separate_stencil = intel->intelScreen->hw_has_separate_stencil;
   intel->must_use_separate_stencil = intel->intelScreen->hw_must_use_separate_stencil;
   intel->has_hiz = intel->intelScreen->hw_has_hiz;
+   intel->has_llc = intel->intelScreen->hw_has_llc;
+   intel->has_swizzling = intel->intelScreen->hw_has_swizzling;

   memset(&ctx->TextureFormatSupported,
 	  0, sizeof(ctx->TextureFormatSupported));
--- a/src/mesa/drivers/dri/intel/intel_context.h
+++ b/src/mesa/drivers/dri/intel/intel_context.h
@@ -213,6 +213,8 @@ struct intel_context
   bool has_separate_stencil;
   bool must_use_separate_stencil;
   bool has_hiz;
+   bool has_llc;
+   bool has_swizzling;

   int urb_size;

--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
@@ -830,7 +830,8 @@ intel_miptree_map_s8(struct intel_context *intel,
 	 for (uint32_t x = 0; x < map->w; x++) {
 	    ptrdiff_t offset = intel_offset_S8(mt->region->pitch,
 	                                       x + image_x + map->x,
-	                                       y + image_y + map->y);
+	                                       y + image_y + map->y,
+					       intel->has_swizzling);
 	    untiled_s8_map[y * map->w + x] = tiled_s8_map[offset];
 	 }
      }
@@ -865,7 +866,8 @@ intel_miptree_unmap_s8(struct intel_context *intel,
 	 for (uint32_t x = 0; x < map->w; x++) {
 	    ptrdiff_t offset = intel_offset_S8(mt->region->pitch,
 	                                       x + map->x,
-	                                       y + map->y);
+	                                       y + map->y,
+					       intel->has_swizzling);
 	    tiled_s8_map[offset] = untiled_s8_map[y * map->w + x];
 	 }
      }
@@ -925,7 +927,8 @@ intel_miptree_map_depthstencil(struct intel_context *intel,
 	    int map_x = map->x + x, map_y = map->y + y;
 	    ptrdiff_t s_offset = intel_offset_S8(s_mt->region->pitch,
 						 map_x + s_image_x,
-						 map_y + s_image_y);
+						 map_y + s_image_y,
+						 intel->has_swizzling);
 	    ptrdiff_t z_offset = ((map_y + z_image_y) * z_mt->region->pitch +
 				  (map_x + z_image_x));
 	    uint8_t s = s_map[s_offset];
@@ -983,7 +986,8 @@ intel_miptree_unmap_depthstencil(struct intel_context *intel,
 	 for (uint32_t x = 0; x < map->w; x++) {
 	    ptrdiff_t s_offset = intel_offset_S8(s_mt->region->pitch,
 						 x + s_image_x + map->x,
-						 y + s_image_y + map->y);
+						 y + s_image_y + map->y,
+						 intel->has_swizzling);
 	    ptrdiff_t z_offset = ((y + z_image_y) * z_mt->region->pitch +
 				  (x + z_image_x));

@@ -1052,7 +1056,7 @@ intel_miptree_map(struct intel_context *intel,
      intel_miptree_map_s8(intel, mt, map, level, slice);
   } else if (mt->stencil_mt) {
      intel_miptree_map_depthstencil(intel, mt, map, level, slice);
-   } else if (intel->gen >= 6 &&
+   } else if (intel->has_llc &&
 	      !(mode & GL_MAP_WRITE_BIT) &&
 	      !mt->compressed &&
 	      mt->region->tiling == I915_TILING_X) {
--- a/src/mesa/drivers/dri/intel/intel_screen.c
+++ b/src/mesa/drivers/dri/intel/intel_screen.c
@@ -650,6 +650,30 @@ intel_override_separate_stencil(struct intel_screen *screen)
   }
 }

+static bool
+intel_detect_swizzling(struct intel_screen *screen)
+{
+   drm_intel_bo *buffer;
+   unsigned long flags = 0;
+   unsigned long aligned_pitch;
+   uint32_t tiling = I915_TILING_X;
+   uint32_t swizzle_mode = 0;
+
+   buffer = drm_intel_bo_alloc_tiled(screen->bufmgr, "swizzle test",
+				     64, 64, 4,
+				     &tiling, &aligned_pitch, flags);
+   if (buffer == NULL)
+      return false;
+
+   drm_intel_bo_get_tiling(buffer, &tiling, &swizzle_mode);
+   drm_intel_bo_unreference(buffer);
+
+   if (swizzle_mode == I915_BIT_6_SWIZZLE_NONE)
+      return false;
+   else
+      return true;
+}
+
 /**
 * This is the driver specific part of the createNewScreen entry point.
 * Called when using DRI2.
@@ -723,6 +747,14 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
   intelScreen->hw_has_hiz = intelScreen->gen >= 6;
   intelScreen->dri2_has_hiz = INTEL_DRI2_HAS_HIZ_UNKNOWN;

+#if defined(I915_PARAM_HAS_LLC)
+   intelScreen->hw_has_llc =
+      intel_get_boolean(intelScreen->driScrnPriv,
+              I915_PARAM_HAS_LLC);
+#else
+   intelScreen->hw_has_llc = intelScreen->gen >= 6;
+#endif
+
   intel_override_hiz(intelScreen);
   intel_override_separate_stencil(intelScreen);

@@ -740,6 +772,8 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
   if (!intel_init_bufmgr(intelScreen))
       return false;

+   intelScreen->hw_has_swizzling = intel_detect_swizzling(intelScreen);
+
   psp->extensions = intelScreenExtensions;

   msaa_samples_array[0] = 0;
--- a/src/mesa/drivers/dri/intel/intel_screen.h
+++ b/src/mesa/drivers/dri/intel/intel_screen.h
@@ -116,6 +116,9 @@ struct intel_screen

   bool kernel_has_gen7_sol_reset;

+   bool hw_has_llc;
+   bool hw_has_swizzling;
+
   bool no_vbo;
   dri_bufmgr *bufmgr;
   struct _mesa_HashTable *named_regions;
--- a/src/mesa/drivers/dri/intel/intel_span.c
+++ b/src/mesa/drivers/dri/intel/intel_span.c
@@ -65,7 +65,7 @@
 *    mesa: Fix return type of  _mesa_get_format_bytes() (#37351)
 */
 intptr_t
-intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y)
+intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y, bool swizzled)
 {
   uint32_t tile_size = 4096;
   uint32_t tile_width = 64;
@@ -90,22 +90,16 @@ intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y)
               +   2 * (byte_y % 2)
               +   1 * (byte_x % 2);

-   /*
-    * Errata for Gen5:
-    *
-    * An additional offset is needed which is not documented in the PRM.
-    *
-    * if ((byte_x / 8) % 2 == 1) {
-    *    if ((byte_y / 8) % 2) == 0) {
-    *       u += 64;
-    *    } else {
-    *       u -= 64;
-    *    }
-    * }
-    *
-    * The offset is expressed more tersely as
-    * u += ((int) x & 0x8) * (8 - (((int) y & 0x8) << 1));
-    */
+   if (swizzled) {
+      /* adjust for bit6 swizzling */
+      if (((byte_x / 8) % 2) == 1) {
+	 if (((byte_y / 8) % 2) == 0) {
+	    u += 64;
+	 } else {
+	    u -= 64;
+	 }
+      }
+   }

   return u;
 }
--- a/src/mesa/drivers/dri/intel/intel_span.h
+++ b/src/mesa/drivers/dri/intel/intel_span.h
@@ -38,6 +38,6 @@ extern void intelSpanRenderStart(struct gl_context * ctx);

 void intel_map_vertex_shader_textures(struct gl_context *ctx);
 void intel_unmap_vertex_shader_textures(struct gl_context *ctx);
-intptr_t intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y);
+intptr_t intel_offset_S8(uint32_t stride, uint32_t x, uint32_t y, bool swizzled);

 #endif
--- a/src/mesa/main/bufferobj.c
+++ b/src/mesa/main/bufferobj.c
@@ -1159,17 +1159,17 @@ _mesa_GetBufferParameterivARB(GLenum target, GLenum pname, GLint *params)
      *params = _mesa_bufferobj_mapped(bufObj);
      return;
   case GL_BUFFER_ACCESS_FLAGS:
-      if (ctx->VersionMajor < 3)
+      if (!ctx->Extensions.ARB_map_buffer_range)
         goto invalid_pname;
      *params = bufObj->AccessFlags;
      return;
   case GL_BUFFER_MAP_OFFSET:
-      if (ctx->VersionMajor < 3)
+      if (!ctx->Extensions.ARB_map_buffer_range)
         goto invalid_pname;
      *params = (GLint) bufObj->Offset;
      return;
   case GL_BUFFER_MAP_LENGTH:
-      if (ctx->VersionMajor < 3)
+      if (!ctx->Extensions.ARB_map_buffer_range)
         goto invalid_pname;
      *params = (GLint) bufObj->Length;
      return;
@@ -1210,7 +1210,7 @@ _mesa_GetBufferParameteri64v(GLenum target, GLenum pname, GLint64 *params)
      *params = simplified_access_mode(bufObj->AccessFlags);
      return;
   case GL_BUFFER_ACCESS_FLAGS:
-      if (ctx->VersionMajor < 3)
+      if (!ctx->Extensions.ARB_map_buffer_range)
         goto invalid_pname;
      *params = bufObj->AccessFlags;
      return;
@@ -1218,12 +1218,12 @@ _mesa_GetBufferParameteri64v(GLenum target, GLenum pname, GLint64 *params)
      *params = _mesa_bufferobj_mapped(bufObj);
      return;
   case GL_BUFFER_MAP_OFFSET:
-      if (ctx->VersionMajor < 3)
+      if (!ctx->Extensions.ARB_map_buffer_range)
         goto invalid_pname;
      *params = bufObj->Offset;
      return;
   case GL_BUFFER_MAP_LENGTH:
-      if (ctx->VersionMajor < 3)
+      if (!ctx->Extensions.ARB_map_buffer_range)
         goto invalid_pname;
      *params = bufObj->Length;
      return;
--- a/src/mesa/main/pack.c
+++ b/src/mesa/main/pack.c
@@ -5254,3 +5254,94 @@ _mesa_unpack_image( GLuint dimensions,
   }
 }

+
+
+/**
+ * If we unpack colors from a luminance surface, we'll get pixel colors
+ * such as (l, l, l, a).
+ * When we call _mesa_pack_rgba_span_float(format=GL_LUMINANCE), that
+ * function will compute L=R+G+B before packing.  The net effect is we'll
+ * accidentally store luminance values = 3*l.
+ * This function compensates for that by converting (aka rebasing) (l,l,l,a)
+ * to be (l,0,0,a).
+ * It's a similar story for other formats such as LUMINANCE_ALPHA, ALPHA
+ * and INTENSITY.
+ *
+ * Finally, we also need to do this when the actual surface format does
+ * not match the logical surface format.  For example, suppose the user
+ * requests a GL_LUMINANCE texture but the driver stores it as RGBA.
+ * Again, we'll get pixel values like (l,l,l,a).
+ */
+void
+_mesa_rebase_rgba_float(GLuint n, GLfloat rgba[][4], GLenum baseFormat)
+{
+   GLuint i;
+
+   switch (baseFormat) {
+   case GL_ALPHA:
+      for (i = 0; i < n; i++) {
+         rgba[i][RCOMP] = 0.0F;
+         rgba[i][GCOMP] = 0.0F;
+         rgba[i][BCOMP] = 0.0F;
+      }
+      break;
+   case GL_INTENSITY:
+      /* fall-through */
+   case GL_LUMINANCE:
+      for (i = 0; i < n; i++) {
+         rgba[i][GCOMP] = 0.0F;
+         rgba[i][BCOMP] = 0.0F;
+         rgba[i][ACOMP] = 1.0F;
+      }
+      break;
+   case GL_LUMINANCE_ALPHA:
+      for (i = 0; i < n; i++) {
+         rgba[i][GCOMP] = 0.0F;
+         rgba[i][BCOMP] = 0.0F;
+      }
+      break;
+   default:
+      /* no-op */
+      ;
+   }
+}
+
+
+/**
+ * As above, but GLuint components.
+ */
+void
+_mesa_rebase_rgba_uint(GLuint n, GLuint rgba[][4], GLenum baseFormat)
+{
+   GLuint i;
+
+   switch (baseFormat) {
+   case GL_ALPHA:
+      for (i = 0; i < n; i++) {
+         rgba[i][RCOMP] = 0;
+         rgba[i][GCOMP] = 0;
+         rgba[i][BCOMP] = 0;
+      }
+      break;
+   case GL_INTENSITY:
+      /* fall-through */
+   case GL_LUMINANCE:
+      for (i = 0; i < n; i++) {
+         rgba[i][GCOMP] = 0;
+         rgba[i][BCOMP] = 0;
+         rgba[i][ACOMP] = 1;
+      }
+      break;
+   case GL_LUMINANCE_ALPHA:
+      for (i = 0; i < n; i++) {
+         rgba[i][GCOMP] = 0;
+         rgba[i][BCOMP] = 0;
+      }
+      break;
+   default:
+      /* no-op */
+      ;
+   }
+}
+
+
--- a/src/mesa/main/pack.h
+++ b/src/mesa/main/pack.h
@@ -149,4 +149,11 @@ _mesa_pack_rgba_span_int(struct gl_context *ctx, GLuint n, GLuint rgba[][4],
                         GLenum dstFormat, GLenum dstType,
                         GLvoid *dstAddr);

+
+extern void
+_mesa_rebase_rgba_float(GLuint n, GLfloat rgba[][4], GLenum baseFormat);
+
+extern void
+_mesa_rebase_rgba_uint(GLuint n, GLuint rgba[][4], GLenum baseFormat);
+
 #endif
--- a/src/mesa/main/readpix.c
+++ b/src/mesa/main/readpix.c
@@ -218,6 +218,16 @@ fast_read_rgba_pixels_memcpy( struct gl_context *ctx,
      return GL_FALSE;
   }

+   /* If the format is unsigned normalized then we can ignore clamping
+    * because the values are already in the range [0,1] so it won't
+    * have any effect anyway.
+    */
+   if (_mesa_get_format_datatype(rb->Format) == GL_UNSIGNED_NORMALIZED)
+      transferOps &= ~IMAGE_CLAMP_BIT;
+
+   if (transferOps)
+      return GL_FALSE;
+
   dstStride = _mesa_image_row_stride(packing, width, format, type);
   dst = (GLubyte *) _mesa_image_address2d(packing, pixels, width, height,
 					   format, type, 0, 0);
@@ -274,10 +284,14 @@ slow_read_rgba_pixels( struct gl_context *ctx,
   for (j = 0; j < height; j++) {
      if (_mesa_is_integer_format(format)) {
 	 _mesa_unpack_uint_rgba_row(rbFormat, width, map, (GLuint (*)[4]) rgba);
+         _mesa_rebase_rgba_uint(width, (GLuint (*)[4]) rgba,
+                                rb->_BaseFormat);
 	 _mesa_pack_rgba_span_int(ctx, width, (GLuint (*)[4]) rgba, format,
                                  type, dst);
      } else {
 	 _mesa_unpack_rgba_row(rbFormat, width, map, (GLfloat (*)[4]) rgba);
+         _mesa_rebase_rgba_float(width, (GLfloat (*)[4]) rgba,
+                                 rb->_BaseFormat);
 	 _mesa_pack_rgba_span_float(ctx, width, (GLfloat (*)[4]) rgba, format,
                                    type, dst, packing, transferOps);
      }
@@ -313,13 +327,11 @@ read_rgba_pixels( struct gl_context *ctx,
      transferOps |= IMAGE_CLAMP_BIT;
   }

-   if (!transferOps) {
-      /* Try the optimized paths first. */
-      if (fast_read_rgba_pixels_memcpy(ctx, x, y, width, height,
-				       format, type, pixels, packing,
-				       transferOps)) {
-	 return;
-      }
+   /* Try the optimized paths first. */
+   if (fast_read_rgba_pixels_memcpy(ctx, x, y, width, height,
+                                    format, type, pixels, packing,
+                                    transferOps)) {
+      return;
   }

   slow_read_rgba_pixels(ctx, x, y, width, height,
--- a/src/mesa/main/texgetimage.c
+++ b/src/mesa/main/texgetimage.c
@@ -275,13 +275,8 @@ get_tex_rgba_compressed(struct gl_context *ctx, GLuint dimensions,

   if (baseFormat == GL_LUMINANCE ||
       baseFormat == GL_LUMINANCE_ALPHA) {
-      /* Set green and blue to zero since the pack function here will
-       * compute L=R+G+B.
-       */
-      GLuint i;
-      for (i = 0; i < width * height; i++) {
-         tempImage[i * 4 + GCOMP] = tempImage[i * 4 + BCOMP] = 0.0f;
-      }
+      _mesa_rebase_rgba_float(width * height, (GLfloat (*)[4]) tempImage,
+                              baseFormat);
   }

   srcRow = tempImage;
@@ -312,6 +307,8 @@ get_tex_rgba_uncompressed(struct gl_context *ctx, GLuint dimensions,
   const gl_format texFormat =
      _mesa_get_srgb_format_linear(texImage->TexFormat);
   const GLuint width = texImage->Width;
+   const GLenum destBaseFormat = _mesa_base_tex_format(ctx, format);
+   GLenum rebaseFormat = GL_NONE;
   GLuint height = texImage->Height;
   GLuint depth = texImage->Depth;
   GLuint img, row;
@@ -332,6 +329,28 @@ get_tex_rgba_uncompressed(struct gl_context *ctx, GLuint dimensions,
      height = 1;
   }

+   if (texImage->_BaseFormat == GL_LUMINANCE ||
+       texImage->_BaseFormat == GL_INTENSITY ||
+       texImage->_BaseFormat == GL_LUMINANCE_ALPHA) {
+      /* If a luminance (or intensity) texture is read back as RGB(A), the
+       * returned value should be (L,0,0,1), not (L,L,L,1).  Set rebaseFormat
+       * here to get G=B=0.
+       */
+      rebaseFormat = texImage->_BaseFormat;
+   }
+   else if ((texImage->_BaseFormat == GL_RGBA ||
+             texImage->_BaseFormat == GL_RGB) &&
+            (destBaseFormat == GL_LUMINANCE ||
+             destBaseFormat == GL_LUMINANCE_ALPHA ||
+             destBaseFormat == GL_LUMINANCE_INTEGER_EXT ||
+             destBaseFormat == GL_LUMINANCE_ALPHA_INTEGER_EXT)) {
+      /* If we're reading back an RGB(A) texture as luminance then we need
+       * to return L=tex(R).  Note, that's different from glReadPixels which
+       * returns L=R+G+B.
+       */
+      rebaseFormat = GL_LUMINANCE_ALPHA; /* this covers GL_LUMINANCE too */
+   }
+
   for (img = 0; img < depth; img++) {
      GLubyte *srcMap;
      GLint rowstride;
@@ -349,76 +368,14 @@ get_tex_rgba_uncompressed(struct gl_context *ctx, GLuint dimensions,

 	    if (is_integer) {
 	       _mesa_unpack_uint_rgba_row(texFormat, width, src, rgba_uint);
-
-	       if (texImage->_BaseFormat == GL_ALPHA) {
-		  GLint col;
-		  for (col = 0; col < width; col++) {
-		     rgba_uint[col][RCOMP] = 0;
-		     rgba_uint[col][GCOMP] = 0;
-		     rgba_uint[col][BCOMP] = 0;
-		  }
-	       }
-	       else if (texImage->_BaseFormat == GL_LUMINANCE) {
-		  GLint col;
-		  for (col = 0; col < width; col++) {
-		     rgba_uint[col][GCOMP] = 0;
-		     rgba_uint[col][BCOMP] = 0;
-		     rgba_uint[col][ACOMP] = 1;
-		  }
-	       }
-	       else if (texImage->_BaseFormat == GL_LUMINANCE_ALPHA) {
-		  GLint col;
-		  for (col = 0; col < width; col++) {
-		     rgba_uint[col][GCOMP] = 0;
-		     rgba_uint[col][BCOMP] = 0;
-		  }
-	       }
-	       else if (texImage->_BaseFormat == GL_INTENSITY) {
-		  GLint col;
-		  for (col = 0; col < width; col++) {
-		     rgba_uint[col][GCOMP] = 0;
-		     rgba_uint[col][BCOMP] = 0;
-		     rgba_uint[col][ACOMP] = 1;
-		  }
-	       }
-
+               if (rebaseFormat)
+                  _mesa_rebase_rgba_uint(width, rgba_uint, rebaseFormat);
 	       _mesa_pack_rgba_span_int(ctx, width, rgba_uint,
 					format, type, dest);
 	    } else {
 	       _mesa_unpack_rgba_row(texFormat, width, src, rgba);
-
-	       if (texImage->_BaseFormat == GL_ALPHA) {
-		  GLint col;
-		  for (col = 0; col < width; col++) {
-		     rgba[col][RCOMP] = 0.0F;
-		     rgba[col][GCOMP] = 0.0F;
-		     rgba[col][BCOMP] = 0.0F;
-		  }
-	       }
-	       else if (texImage->_BaseFormat == GL_LUMINANCE) {
-		  GLint col;
-		  for (col = 0; col < width; col++) {
-		     rgba[col][GCOMP] = 0.0F;
-		     rgba[col][BCOMP] = 0.0F;
-		     rgba[col][ACOMP] = 1.0F;
-		  }
-	       }
-	       else if (texImage->_BaseFormat == GL_LUMINANCE_ALPHA) {
-		  GLint col;
-		  for (col = 0; col < width; col++) {
-		     rgba[col][GCOMP] = 0.0F;
-		     rgba[col][BCOMP] = 0.0F;
-		  }
-	       }
-	       else if (texImage->_BaseFormat == GL_INTENSITY) {
-		  GLint col;
-		  for (col = 0; col < width; col++) {
-		     rgba[col][GCOMP] = 0.0F;
-		     rgba[col][BCOMP] = 0.0F;
-		     rgba[col][ACOMP] = 1.0F;
-		  }
-	       }
-
+               if (rebaseFormat)
+                  _mesa_rebase_rgba_float(width, rgba, rebaseFormat);
 	       _mesa_pack_rgba_span_float(ctx, width, (GLfloat (*)[4]) rgba,
 					  format, type, dest,
 					  &ctx->Pack, transferOps);
--- a/src/mesa/main/texstate.c
+++ b/src/mesa/main/texstate.c
@@ -682,20 +682,25 @@ _mesa_update_texture( struct gl_context *ctx, GLuint new_state )
 static GLboolean
 alloc_proxy_textures( struct gl_context *ctx )
 {
+   /* NOTE: these values must be in the same order as the TEXTURE_x_INDEX
+    * values!
+    */
   static const GLenum targets[] = {
-      GL_TEXTURE_1D,
-      GL_TEXTURE_2D,
-      GL_TEXTURE_3D,
-      GL_TEXTURE_CUBE_MAP_ARB,
-      GL_TEXTURE_RECTANGLE_NV,
-      GL_TEXTURE_1D_ARRAY_EXT,
-      GL_TEXTURE_2D_ARRAY_EXT,
      GL_TEXTURE_BUFFER,
-      GL_TEXTURE_EXTERNAL_OES
+      GL_TEXTURE_2D_ARRAY_EXT,
+      GL_TEXTURE_1D_ARRAY_EXT,
+      GL_TEXTURE_EXTERNAL_OES,
+      GL_TEXTURE_CUBE_MAP_ARB,
+      GL_TEXTURE_3D,
+      GL_TEXTURE_RECTANGLE_NV,
+      GL_TEXTURE_2D,
+      GL_TEXTURE_1D,
   };
   GLint tgt;

   STATIC_ASSERT(Elements(targets) == NUM_TEXTURE_TARGETS);
+   assert(targets[TEXTURE_2D_INDEX] == GL_TEXTURE_2D);
+   assert(targets[TEXTURE_CUBE_INDEX] == GL_TEXTURE_CUBE_MAP);

   for (tgt = 0; tgt < NUM_TEXTURE_TARGETS; tgt++) {
      if (!(ctx->Texture.ProxyTex[tgt]
--- a/src/mesa/main/uniform_query.cpp
+++ b/src/mesa/main/uniform_query.cpp
@@ -203,10 +203,18 @@ _mesa_get_uniform(struct gl_context *ctx, GLuint program, GLint location,
      const union gl_constant_value *const src =
 	 &uni->storage[offset * elements];

-      unsigned bytes = sizeof(uni->storage[0]) * elements;
-      if (bytes > (unsigned) bufSize) {
-	 elements = bufSize / sizeof(uni->storage[0]);
-	 bytes = bufSize;
+      assert(returnType == GLSL_TYPE_FLOAT || returnType == GLSL_TYPE_INT ||
+             returnType == GLSL_TYPE_UINT);
+      /* The three (currently) supported types all have the same size,
+       * which is of course the same as their union. That'll change
+       * with glGetUniformdv()...
+       */
+      unsigned bytes = sizeof(src[0]) * elements;
+      if (bufSize < 0 || bytes > (unsigned) bufSize) {
+	 _mesa_error( ctx, GL_INVALID_OPERATION,
+	             "glGetnUniform*vARB(out of bounds: bufSize is %d,"
+	             " but %u bytes are required)", bufSize, bytes );
+	 return;
      }

      /* If the return type and the uniform's native type are "compatible,"
--- a/src/mesa/main/uniforms.c
+++ b/src/mesa/main/uniforms.c
@@ -479,7 +479,7 @@ _mesa_GetnUniformdvARB(GLhandleARB program, GLint location,
   (void) params;

   /*
-   _mesa_get_uniform(ctx, program, location, bufSize, GL_DOUBLE, params);
+   _mesa_get_uniform(ctx, program, location, bufSize, GLSL_TYPE_DOUBLE, params);
   */
   _mesa_error(ctx, GL_INVALID_OPERATION, "glGetUniformdvARB"
               "(GL_ARB_gpu_shader_fp64 not implemented)");
--- a/src/mesa/main/version.h
+++ b/src/mesa/main/version.h
@@ -34,8 +34,8 @@ struct gl_context;
 /* Mesa version */
 #define MESA_MAJOR 8
 #define MESA_MINOR 0
-#define MESA_PATCH 0
-#define MESA_VERSION_STRING "8.0"
+#define MESA_PATCH 2
+#define MESA_VERSION_STRING "8.0.2"

 /* To make version comparison easy */
 #define MESA_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))
--- a/src/mesa/state_tracker/st_cb_blit.c
+++ b/src/mesa/state_tracker/st_cb_blit.c
@@ -178,7 +178,8 @@ st_BlitFramebuffer(struct gl_context *ctx,
      st->pipe->render_condition(st->pipe, NULL, 0);
   }

-   if (readFB->Visual.sampleBuffers > drawFB->Visual.sampleBuffers) {
+   if (readFB->Visual.sampleBuffers > drawFB->Visual.sampleBuffers &&
+       readFB->Visual.samples > 1) {
      struct pipe_resolve_info info;

      if (dstX0 < dstX1) {
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -2978,7 +2978,7 @@ glsl_to_tgsi_visitor::simplify_cmp(void)
   if (!tempWrites) {
      return;
   }
-   memset(tempWrites, 0, sizeof(tempWrites));
+   memset(tempWrites, 0, sizeof(unsigned) * MAX_TEMPS);
   memset(outputWrites, 0, sizeof(outputWrites));

   foreach_iter(exec_list_iterator, iter, this->instructions) {
--- a/src/mesa/swrast/s_context.h
+++ b/src/mesa/swrast/s_context.h
@@ -175,6 +175,9 @@ struct swrast_renderbuffer
   /** These fields are only valid while buffer is mapped for rendering */
   GLubyte *Map;
   GLint RowStride;    /**< in bytes */
+
+   /** For span rendering */
+   GLenum ColorType;
 };


--- a/src/mesa/swrast/s_fragprog.c
+++ b/src/mesa/swrast/s_fragprog.c
@@ -40,7 +40,8 @@ GLboolean
 _swrast_use_fragment_program(struct gl_context *ctx)
 {
   struct gl_fragment_program *fp = ctx->FragmentProgram._Current;
-   return fp && fp != ctx->FragmentProgram._TexEnvProgram;
+   return fp && !(fp == ctx->FragmentProgram._TexEnvProgram
+                  && fp->Base.NumInstructions == 0);
 }

 /**
--- a/src/mesa/swrast/s_renderbuffer.c
+++ b/src/mesa/swrast/s_renderbuffer.c
@@ -615,8 +615,31 @@ unmap_attachment(struct gl_context *ctx,

   srb->Map = NULL;
 }
- 
- 
+
+
+/**
+ * Determine what type to use (ubyte vs. float) for span colors for the
+ * given renderbuffer.
+ * See also _swrast_write_rgba_span().
+ */
+static void
+find_renderbuffer_colortype(struct gl_renderbuffer *rb)
+{
+   struct swrast_renderbuffer *srb = swrast_renderbuffer(rb);
+   GLuint rbMaxBits = _mesa_get_format_max_bits(rb->Format);
+   GLenum rbDatatype = _mesa_get_format_datatype(rb->Format);
+
+   if (rbDatatype == GL_UNSIGNED_NORMALIZED && rbMaxBits <= 8) {
+      /* the buffer's values fit in GLubyte values */
+      srb->ColorType = GL_UNSIGNED_BYTE;
+   }
+   else {
+      /* use floats otherwise */
+      srb->ColorType = GL_FLOAT;
+   }
+}
+
+
 /**
 * Map the renderbuffers we'll use for tri/line/point rendering.
 */
@@ -641,6 +664,7 @@ _swrast_map_renderbuffers(struct gl_context *ctx)

   for (buf = 0; buf < fb->_NumColorDrawBuffers; buf++) {
      map_attachment(ctx, fb, fb->_ColorDrawBufferIndexes[buf]);
+      find_renderbuffer_colortype(fb->_ColorDrawBuffers[buf]);
   }
 }
 
--- a/src/mesa/swrast/s_span.c
+++ b/src/mesa/swrast/s_span.c
@@ -1320,15 +1320,15 @@ _swrast_write_rgba_span( struct gl_context *ctx, SWspan *span)

         if (rb) {
            GLchan rgbaSave[MAX_WIDTH][4];
+            struct swrast_renderbuffer *srb = swrast_renderbuffer(rb);
+            GLenum colorType = srb->ColorType;

-	    GLenum datatype;
-	    GLuint comps;
+            assert(colorType == GL_UNSIGNED_BYTE ||
+                   colorType == GL_FLOAT);

-	    _mesa_format_to_type_and_comps(rb->Format, &datatype, &comps);
-
-            /* set span->array->rgba to colors for render buffer's datatype */
-            if (datatype != span->array->ChanType) {
-               convert_color_type(span, datatype, 0);
+            /* set span->array->rgba to colors for renderbuffer's datatype */
+            if (span->array->ChanType != colorType) {
+               convert_color_type(span, colorType, 0);
            }
            else {
               if (span->array->ChanType == GL_UNSIGNED_BYTE) {
--- a/src/mesa/tnl/t_context.c
+++ b/src/mesa/tnl/t_context.c
@@ -151,8 +151,7 @@ _tnl_InvalidateState( struct gl_context *ctx, GLuint new_state )
   if (ctx->RenderMode == GL_FEEDBACK)
      tnl->render_inputs_bitset |= BITFIELD64_BIT(_TNL_ATTRIB_TEX0);

-   if (ctx->Point._Attenuated ||
-       (ctx->VertexProgram._Enabled && ctx->VertexProgram.PointSizeEnabled))
+   if (ctx->Point._Attenuated || ctx->VertexProgram.PointSizeEnabled)
      tnl->render_inputs_bitset |= BITFIELD64_BIT(_TNL_ATTRIB_POINTSIZE);

   /* check for varying vars which are written by the vertex program */