docs: Update 10.3 release notes

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Bump version to 10.3 (final)
2014-09-19 19:43:01 +01:00 · 2014-09-19 19:27:45 +01:00 · 2014-09-16 22:18:34 +01:00 · 2014-09-16 22:17:58 +01:00 · 2014-09-16 22:17:40 +01:00 · 2014-09-16 22:16:58 +01:00
99 changed files with 1689 additions and 439 deletions
--- a/Makefile.am
+++ b/Makefile.am
@@ -64,14 +64,13 @@ IGNORE_FILES = \

 parsers: configure
 	$(MAKE) -C src/glsl glsl_parser.cpp glsl_parser.h glsl_lexer.cpp glcpp/glcpp-lex.c glcpp/glcpp-parse.c glcpp/glcpp-parse.h
-	$(MAKE) -C src/mesa program/lex.yy.c program/program_parse.tab.c program/program_parse.tab.h

 # Everything for new a Mesa release:
 ARCHIVES = $(PACKAGE_NAME).tar.gz \
 	$(PACKAGE_NAME).tar.bz2 \
 	$(PACKAGE_NAME).zip

-tarballs: md5
+tarballs: checksums
 	rm -f ../$(PACKAGE_DIR) $(PACKAGE_NAME).tar

 manifest.txt: .git
@@ -98,9 +97,9 @@ $(PACKAGE_NAME).zip: parsers ../$(PACKAGE_DIR) manifest.txt
 	zip -q -@ $(PACKAGE_NAME).zip < $(PACKAGE_DIR)/manifest.txt ; \
 	mv $(PACKAGE_NAME).zip $(PACKAGE_DIR)

-md5: $(ARCHIVES)
-	@-md5sum $(PACKAGE_NAME).tar.gz
-	@-md5sum $(PACKAGE_NAME).tar.bz2
-	@-md5sum $(PACKAGE_NAME).zip
+checksums: $(ARCHIVES)
+	@-sha256sum $(PACKAGE_NAME).tar.gz
+	@-sha256sum $(PACKAGE_NAME).tar.bz2
+	@-sha256sum $(PACKAGE_NAME).zip

 .PHONY: tarballs md5
--- a/2
+++ b/2
@@ -1 +1 @@
-10.3.0-devel
+10.3.0
--- a/configure.ac
+++ b/configure.ac
@@ -355,6 +355,24 @@ AC_LINK_IFELSE(
 LDFLAGS=$save_LDFLAGS
 AM_CONDITIONAL(HAVE_LD_VERSION_SCRIPT, test "$have_ld_version_script" = "yes")

+dnl
+dnl Check if linker supports dynamic list files
+dnl
+AC_MSG_CHECKING([if the linker supports --dynamic-list])
+save_LDFLAGS=$LDFLAGS
+LDFLAGS="$LDFLAGS -Wl,--dynamic-list=conftest.dyn"
+cat > conftest.dyn <<EOF
+{
+	radeon_drm_winsys_create;
+};
+EOF
+AC_LINK_IFELSE(
+    [AC_LANG_SOURCE([int main() { return 0;}])],
+    [have_ld_dynamic_list=yes;AC_MSG_RESULT(yes)],
+    [have_ld_dynamic_list=no; AC_MSG_RESULT(no)])
+LDFLAGS=$save_LDFLAGS
+AM_CONDITIONAL(HAVE_LD_DYNAMIC_LIST, test "$have_ld_dynamic_list" = "yes")
+
 dnl
 dnl compatibility symlinks
 dnl
@@ -802,6 +820,11 @@ fi

 AM_CONDITIONAL(HAVE_SHARED_GLAPI, test "x$enable_shared_glapi" = xyes)

+# Build the pipe-drivers as separate libraries/modules.
+# Do not touch this unless you know what you are doing.
+# XXX: Expose via configure option ?
+enable_shared_pipe_drivers=no
+
 dnl
 dnl Driver specific build directories
 dnl
@@ -822,7 +845,7 @@ esac
 if test "x$enable_dri" = xyes; then
    GALLIUM_WINSYS_DIRS="$GALLIUM_WINSYS_DIRS sw/dri"
    GALLIUM_STATE_TRACKERS_DIRS="dri $GALLIUM_STATE_TRACKERS_DIRS"
-    enable_gallium_loader=yes
+    enable_gallium_loader="$enable_shared_pipe_drivers"
 fi

 if test "x$enable_gallium_osmesa" = xyes; then
@@ -1295,7 +1318,8 @@ if test "x$enable_gallium_egl" = xyes; then

    GALLIUM_STATE_TRACKERS_DIRS="egl $GALLIUM_STATE_TRACKERS_DIRS"
    GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS egl-static"
-#    enable_gallium_loader=yes
+# XXX: Uncomment once converted to use static/shared pipe-drivers
+#    enable_gallium_loader=$enable_shared_pipe_drivers
 fi
 AM_CONDITIONAL(HAVE_GALLIUM_EGL, test "x$enable_gallium_egl" = xyes)

@@ -1324,7 +1348,7 @@ if test "x$enable_gallium_gbm" = xyes; then

    GALLIUM_STATE_TRACKERS_DIRS="gbm $GALLIUM_STATE_TRACKERS_DIRS"
    GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS gbm"
-    enable_gallium_loader=yes
+    enable_gallium_loader=$enable_shared_pipe_drivers
 fi
 AM_CONDITIONAL(HAVE_GALLIUM_GBM, test "x$enable_gallium_gbm" = xyes)

@@ -1341,7 +1365,7 @@ if test "x$enable_xa" = xyes; then
          Example: ./configure --enable-xa --with-gallium-drivers=svga...])
    fi
    GALLIUM_STATE_TRACKERS_DIRS="xa $GALLIUM_STATE_TRACKERS_DIRS"
-    enable_gallium_loader=yes
+    enable_gallium_loader=$enable_shared_pipe_drivers
 fi
 AM_CONDITIONAL(HAVE_ST_XA, test "x$enable_xa" = xyes)

@@ -1389,7 +1413,7 @@ fi
 if test "x$enable_xvmc" = xyes; then
    PKG_CHECK_MODULES([XVMC], [xvmc >= $XVMC_REQUIRED x11-xcb xcb-dri2 >= $XCBDRI2_REQUIRED])
    GALLIUM_STATE_TRACKERS_DIRS="$GALLIUM_STATE_TRACKERS_DIRS xvmc"
-    enable_gallium_loader=yes
+    enable_gallium_loader=$enable_shared_pipe_drivers
 fi
 AM_CONDITIONAL(HAVE_ST_XVMC, test "x$enable_xvmc" = xyes)

@@ -1397,14 +1421,14 @@ if test "x$enable_vdpau" = xyes; then
    PKG_CHECK_MODULES([VDPAU], [vdpau >= $VDPAU_REQUIRED x11-xcb xcb-dri2 >= $XCBDRI2_REQUIRED],
                      [VDPAU_LIBS="`$PKG_CONFIG --libs x11-xcb xcb-dri2`"])
    GALLIUM_STATE_TRACKERS_DIRS="$GALLIUM_STATE_TRACKERS_DIRS vdpau"
-    enable_gallium_loader=yes
+    enable_gallium_loader=$enable_shared_pipe_drivers
 fi
 AM_CONDITIONAL(HAVE_ST_VDPAU, test "x$enable_vdpau" = xyes)

 if test "x$enable_omx" = xyes; then
    PKG_CHECK_MODULES([OMX], [libomxil-bellagio >= $LIBOMXIL_BELLAGIO_REQUIRED x11-xcb xcb-dri2 >= $XCBDRI2_REQUIRED])
    GALLIUM_STATE_TRACKERS_DIRS="$GALLIUM_STATE_TRACKERS_DIRS omx"
-    enable_gallium_loader=yes
+    enable_gallium_loader=$enable_shared_pipe_drivers
 fi
 AM_CONDITIONAL(HAVE_ST_OMX, test "x$enable_omx" = xyes)

@@ -1456,6 +1480,7 @@ if test "x$enable_opencl" = xyes; then

    GALLIUM_STATE_TRACKERS_DIRS="$GALLIUM_STATE_TRACKERS_DIRS clover"
    GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS opencl"
+    # XXX: Use $enable_shared_pipe_drivers once converted to use static/shared pipe-drivers
    enable_gallium_loader=yes

    if test "x$enable_opencl_icd" = xyes; then
@@ -1630,6 +1655,7 @@ strip_unwanted_llvm_flags() {
    # Use \> (marks the end of the word)
    echo `$1` | sed \
 	-e 's/-DNDEBUG\>//g' \
+	-e 's/-D_GNU_SOURCE\>//g' \
 	-e 's/-pedantic\>//g' \
 	-e 's/-Wcovered-switch-default\>//g' \
 	-e 's/-O.\>//g' \
@@ -1756,6 +1782,7 @@ dnl
 dnl Gallium Tests
 dnl
 if test "x$enable_gallium_tests" = xyes; then
+    # XXX: Use $enable_shared_pipe_drivers once converted to use static/shared pipe-drivers
    enable_gallium_loader=yes
 fi
 AM_CONDITIONAL(HAVE_GALLIUM_TESTS, test "x$enable_gallium_tests" = xyes)
@@ -1889,6 +1916,9 @@ if test -n "$with_gallium_drivers"; then
        case "x$driver" in
        xsvga)
            HAVE_GALLIUM_SVGA=yes
+            if test "x$have_libdrm" != xyes; then
+                AC_MSG_ERROR([Building svga requires libdrm >= $LIBDRM_REQUIRED])
+            fi
            GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS svga softpipe"
            gallium_require_drm_loader
            gallium_check_st "svga/drm" "dri/vmwgfx" "xa/vmwgfx"
@@ -2051,9 +2081,7 @@ AM_CONDITIONAL(NEED_GALLIUM_SOFTPIPE_DRIVER, test "x$HAVE_GALLIUM_SVGA" = xyes -
 AM_CONDITIONAL(NEED_GALLIUM_LLVMPIPE_DRIVER, test "x$HAVE_GALLIUM_SOFTPIPE" = xyes \
                                                  && test "x$MESA_LLVM" = x1)

-# Enable static gallium targets for now.
-# Do not touch this unless you know what you are doing.
-AM_CONDITIONAL(HAVE_GALLIUM_STATIC_TARGETS, test "xyes" = xyes)
+AM_CONDITIONAL(HAVE_GALLIUM_STATIC_TARGETS, test "x$enable_shared_pipe_drivers" = xno)

 # NOTE: anything using xcb or other client side libs ends up in separate
 #       _CLIENT variables.  The pipe loader is built in two variants,
--- a/docs/relnotes/10.3.html
+++ b/docs/relnotes/10.3.html
@@ -14,7 +14,7 @@
 <iframe src="../contents.html"></iframe>
 <div class="content">

-<h1>Mesa 10.3 Release Notes / TBD</h1>
+<h1>Mesa 10.3 Release Notes / September 19, 2014</h1>

 <p>
 Mesa 10.3 is a new development release.
@@ -75,7 +75,249 @@ DRM drivers that don't have a full-fledged GEM (such as qxl or simpledrm)</li>

 <h2>Bug fixes</h2>

-TBD.
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=50754">Bug 50754</a> - Building 32 bit mesa on 64 bit OS fails since change for automake</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=53617">Bug 53617</a> - [llvmpipe] piglit fbo-depthtex regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=54372">Bug 54372</a> - GLX_INTEL_swap_event crashes driver when swapping window buffers</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=56127">Bug 56127</a> - [ILK bisected]unigine-sanctruary performance reduced by 98%</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=66184">Bug 66184</a> - src/mesa/state_tracker/st_glsl_to_tgsi.cpp:3216:simplify_cmp: Assertion `inst-&gt;dst.index &lt; 4096' failed.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=66452">Bug 66452</a> - JUNIPER UVD accelerated playback of WMV3 streams does not work</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=68365">Bug 68365</a> - [SNB Bisected]Piglit spec_ARB_framebuffer_object_fbo-blit-stretch  fail</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=70441">Bug 70441</a> - [Gen4-5 clip] Piglit spec_OpenGL_1.1_polygon-offset hits (execsize &gt;= width) assertion</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=73846">Bug 73846</a> - [llvmpipe] lp_test_format fails with llvm-3.5svn &gt;= r199602</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=74005">Bug 74005</a> - [i965 Bisected]Piglit/glx_glx-make-glxdrawable-current fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=74863">Bug 74863</a> - [r600g] HyperZ broken on RV770 and CYPRESS (Left 4 Dead 2 trees corruption) bisected!</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=75010">Bug 75010</a> - clang: error: unknown argument: '-fstack-protector-strong'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=75478">Bug 75478</a> - [BDW]Some Piglit and Ogles2conform cases cause GPU hang</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=75664">Bug 75664</a> - Unigine Valley &amp; Heaven &quot;error: syntax error, unexpected EXTENSION, expecting $end&quot; IVB HD4000</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=75878">Bug 75878</a> - [BDW] GPU hang running Raytracer WebGL demo</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=76188">Bug 76188</a> - EGL_EXT_image_dma_buf_import fd ownership is incorrect</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=76223">Bug 76223</a> - [radeonsi] luxmark segfault</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=76939">Bug 76939</a> - [BDW] GPU hang when running “Metro:Last Light “ /“Crusader Kings II”</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=77245">Bug 77245</a> - Bogus GL_ARB_explicit_attrib_location layout identifier warnings</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=77493">Bug 77493</a> - lp_test_arit fails with llvm &gt;= llvm-3.5svn r206094</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=77703">Bug 77703</a> - [ILK Bisected]Piglit glean_texCombine4 fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=77704">Bug 77704</a> - [IVB/HSW Bisected]Ogles3conform GL3Tests_shadow_shadow_execution_frag.test fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=77705">Bug 77705</a> - [SNB/IVB/HSW/BYT/BDW Bisected]Ogles3conform GL3Tests/packed_pixels/packed_pixels_pixelstore.test  segfault</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=77707">Bug 77707</a> - [ILK Bisected]Ogles2conform GL_sin_sin_float_frag_xvary.test fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=77740">Bug 77740</a> - i965: Relax accumulator dependency scheduling on Gen &lt; 6</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=77852">Bug 77852</a> - [BDW]Piglit spec_ARB_framebuffer_object_fbo-drawbuffers-none_glBlitFramebuffer fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=77856">Bug 77856</a> - [BDW]Piglit spec_OpenGL_3.0_clearbuffer-mixed-format fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=77865">Bug 77865</a> - [BDW] Many Ogles3conform framebuffer_blit cases fail</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=78225">Bug 78225</a> - Compile error due to undefined reference to `gbm_dri_backend', fix attached</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=78258">Bug 78258</a> - make check link_varyings.gl_ClipDistance failure</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=78403">Bug 78403</a> - query_renderer_implementation_unittest.cpp:144:4: error: expected primary-expression before ‘.’ token</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=78468">Bug 78468</a> - Compiling of shader gets stuck in infinite loop</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=78537">Bug 78537</a> - no anisotropic filtering in a native Half-Life 2</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=78546">Bug 78546</a> - [swrast] piglit copyteximage-border regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=78581">Bug 78581</a> - OpenCL: clBuildProgram prints error messages directly rather than storing them</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=78648">Bug 78648</a> - Texture artifacts in Kerbal Space Program</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=78665">Bug 78665</a> - macros in builtin_functions.cpp make invalid assumptions about M_PI definitions</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=78679">Bug 78679</a> - Gen4-5 code lost: runtime_check_aads_emit</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=78691">Bug 78691</a> - [G45 - Tesseract] Mesa 10.1.2 implementation error: Unsupported opcode 169872468 in FS</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=78692">Bug 78692</a> - Football Manager 2014, gameplay rendered black &amp; white</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=78716">Bug 78716</a> - Fix Mesa bugs for running Unreal Engine 4.1 Cave effects demo compiled for Linux</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=78803">Bug 78803</a> - gallivm/lp_bld_debug.cpp:42:28: fatal error: llvm/IR/Module.h: No such file or directory</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=78842">Bug 78842</a> - [swrast] piglit fcc-read-after-clear copy rb regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=78843">Bug 78843</a> - [swrast] piglit copyteximage 1D regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=78872">Bug 78872</a> - [ILK Bisected]Piglit spec_ARB_depth_buffer_float_fbo-depthstencil-GL_DEPTH32F_STENCIL8-blit Aborted</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=78875">Bug 78875</a> - [ILK Bisected]Webglc conformance/uniforms/uniform-default-values.html fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=78888">Bug 78888</a> - test_eu_compact.c:54:3: error: implicit declaration of function ‘brw_disasm’ [-Werror=implicit-function-declaration]</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=79029">Bug 79029</a> - INTEL_DEBUG=shader_time is full of lies</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=79095">Bug 79095</a> - x86/common_x86.c:348:14: error: use of undeclared identifier 'bit_SSE4_1'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=79115">Bug 79115</a> - glFramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_RENDERBUFFER, 0) doesn't unbind stencil buffer</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=79263">Bug 79263</a> - Linking error in egl_gallium.la when compiling 32 bit on multiarch</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=79294">Bug 79294</a> - Xlib-based build broken on non x86/x86-64 architectures</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=79373">Bug 79373</a> - Non-const initializers for matrix and vector constructors</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=79382">Bug 79382</a> - build error: multiple definition of `loader_get_pci_id_for_fd'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=79421">Bug 79421</a> - [llvmpipe] SIGSEGV src/gallium/drivers/llvmpipe/lp_rast_priv.h:218</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=79440">Bug 79440</a> - prog_hash_table.c:146: undefined reference to `_mesa_error_no_memory'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=79469">Bug 79469</a> - Commit e3cc0d90e14e62a0a787b6c07a6df0f5c84039be breaks unigine heaven</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=79534">Bug 79534</a> - gen&lt;7 renders garbage</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=79616">Bug 79616</a> - L4D2 crash on startup</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=79724">Bug 79724</a> - switch statement type check</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=79729">Bug 79729</a> - [i965] glClear on a multisample texture doesn't work</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=79809">Bug 79809</a> - radeonsi: mouse cursor corruption using weston on AMD Kaveri</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=79823">Bug 79823</a> - [NV30/gallium] Mozilla apps freeze on startup with nouveau-dri-10.2.1 libs on dual-screen</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=79885">Bug 79885</a> - commit b52a530 (gallium/egl: st_profiles are build time decision, treat them as such) broke egl</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=79903">Bug 79903</a> - [HSW Bisected]Some Piglit and Ogles2conform cases fail</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=79907">Bug 79907</a> - Mesa 10.2.1 --enable-vdpau default=auto broken</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=79948">Bug 79948</a> - [i965] Incorrect pixels when using discard and uniform loads</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=80015">Bug 80015</a> - Transparency glitches in native Civilization 5 (Civ5) port</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=80115">Bug 80115</a> - MESA_META_DRAW_BUFFERS induced GL_INVALID_VALUE errors</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=80211">Bug 80211</a> - [ILK/SNB Bisected]Piglit shaders_glsl-fs-copy-propagation-texcoords-1 fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=80247">Bug 80247</a> - Khronos conformance test  ES3-CTS.gtf.GL3Tests.transform_feedback.transform_feedback_vertex_id  fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=80254">Bug 80254</a> - pipe_loader_sw.c:90: undefined reference to `dri_create_sw_winsys'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=80541">Bug 80541</a> - [softpipe] piglit levelclamp regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=80561">Bug 80561</a> - Incorrect implementation of some VDPAU APIs.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=80614">Bug 80614</a> - [regression] Error in `omxregister-bellagio': munmap_chunk(): invalid pointer: 0x00007f5f76626dab</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=80778">Bug 80778</a> - [bisected regression] piglit spec/glsl-1.50/compiler/incorrect-in-layout-qualifier-repeated-prim.geom</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=80827">Bug 80827</a> - [radeonsi,R9 270X] Corruptions in window menus in KDE</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=80880">Bug 80880</a> - Unreal Engine 4 demos fail GLSL compiler assertion</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=80991">Bug 80991</a> - [BDW]Piglit spec_ARB_sample_shading_builtin-gl-sample-mask_2 fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=81020">Bug 81020</a> - [radeonsi][regresssion] Wireframe of background rendered through objects in Half-Life 2: Episode 2 with MSAA enabled</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=81150">Bug 81150</a> - [SNB]Piglit spec_arb_shading_language_packing_execution_built-in-functions_fs-packSnorm4x8 fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=81157">Bug 81157</a> - [BDW]Piglit some spec_glsl-1.50_execution_built-in-functions* cases fail</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=81450">Bug 81450</a> - [BDW]Piglit spec_glsl-1.30_execution_tex-miplevel-selection_textureGrad_1DArray cases intel_do_flush_locked failed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=81828">Bug 81828</a> - [BDW Bisected]Ogles3conform GL3Tests_packed_pixels_packed_pixels_pbo.test fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=81834">Bug 81834</a> - TGSI constant buffer overrun causes assertion failure</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=81857">Bug 81857</a> - [SNB+]Piglit spec_glsl-1.30_execution_switch_fs-default_last sporadically fail</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=81967">Bug 81967</a> - [regression] Selections in Blender renders wrong</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=82139">Bug 82139</a> - [r600g, bisected] multiple ubo piglit regressions</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=82159">Bug 82159</a> - No rule to make target `../../../../src/mesa/libmesa.la', needed by `collision'.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=82255">Bug 82255</a> - [VP2] Chroma planes are vertically stretched during VDPAU playback</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=82268">Bug 82268</a> - Add support for the OpenRISC architecture (or1k)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=82428">Bug 82428</a> - [radeonsi,R9 270X] System lockup when using mplayer/mpv with VDPAU</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=82472">Bug 82472</a> - piglit 16385-consecutive-chars regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=82483">Bug 82483</a> - format_srgb.h:145: undefined reference to `util_format_srgb_to_linear_8unorm_table'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=82517">Bug 82517</a> - [RADEONSI,VDPAU] SIGSEGV in map_msg_fb_buf called from ruvd_destroy, when closing a Tab with accelerated video player</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=82534">Bug 82534</a> - src\egl\main\eglapi.h : fatal error LNK1107: invalid or corrupt file: cannot read at 0x2E02</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=82536">Bug 82536</a> - u_current.h:72: undefined reference to `__imp__glapi_Dispatch'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=82538">Bug 82538</a> - Super Maryo Chronicles fails with st/mesa assertion failure</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=82539">Bug 82539</a> - vmw_screen_dri.lo In file included from vmw_screen_dri.c:41: vmwgfx_drm.h:32:17: error: drm.h: No such file or directory</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=82546">Bug 82546</a> - [regression] libOSMesa build failure</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=82574">Bug 82574</a> - GLSL: opt_vectorize goes wrong on texture lookups</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=82628">Bug 82628</a> - bisected: GALLIUM_HUD hangs radeon 7970M (PRIME)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=82671">Bug 82671</a> - [r600g-evergreen][compute]Empty kernel execution causes crash</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=82709">Bug 82709</a> - OpenCL not working on radeon hainan</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=82796">Bug 82796</a> - [IVB/BYT-M/HSW/BDW Bisected]Synmark2_v6.0_OglTerrainFlyInst/OglTerrainPanInst cannot run as image validation failed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=82804">Bug 82804</a> - unreal engine 4 rendering errors</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=82814">Bug 82814</a> - glDrawBuffers(0, NULL) segfaults in _mesa_drawbuffers</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=82828">Bug 82828</a> - Regression: Crash in 3Dmark2001</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=82846">Bug 82846</a> - [BDW Bisected] Gpu hang when running Lightsmark v2008/Warsow v1.0/Xonotic v0.7/unigine-demos</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=82881">Bug 82881</a> - test_vec4_register_coalesce regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=82882">Bug 82882</a> - [swrast] piglit glsl-fs-uniform-bool-1 regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=82929">Bug 82929</a> - [BDW Bisected]glxgears causes X hang</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=82932">Bug 82932</a> - [SNB+ Bisected]Ogles3conform ES3-CTS.shaders.indexing.vector_subscript.vec3_static_loop_subscript_write_direct_read_vertex fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=83046">Bug 83046</a> - [BDW bisected]] Warsow v1.0/Xonotic v0.7/Gputest v0.5_triangle_fullscreen/synmark2_v6/GLBenchmark v2.5.0/GLBenchmark v2.7.0/Ungine-demos performance reduced 30%~60%</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=83079">Bug 83079</a> - [NVC0] Dota 2 (Linux native and Wine) crash with Nouveau Drivers</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=83081">Bug 83081</a> - [BDW Bisected]Piglit spec_ARB_sample_shading_builtin-gl-sample-mask_2 is core dumped</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=83127">Bug 83127</a> - [ILK Bisected]Piglit glean_texCombine fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=83355">Bug 83355</a> - FTBFS: src/mesa/program/program_lexer.l:122:64: error: unknown type name 'YYSTYPE'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=83432">Bug 83432</a> - r600_query.c:269:r600_emit_query_end: Assertion `ctx-&gt;num_pipelinestat_queries &gt; 0' failed [Gallium HUD]</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=83468">Bug 83468</a> - [UBO] Using bool from UBO as if-statement condition asserts</li>
+
+</ul>

 <h2>Changes</h2>

--- a/include/pci_ids/radeonsi_pci_ids.h
+++ b/include/pci_ids/radeonsi_pci_ids.h
@@ -38,6 +38,7 @@ CHIPSET(0x6828, VERDE_6828, VERDE)
 CHIPSET(0x6829, VERDE_6829, VERDE)
 CHIPSET(0x682A, VERDE_682A, VERDE)
 CHIPSET(0x682B, VERDE_682B, VERDE)
+CHIPSET(0x682C, VERDE_682C, VERDE)
 CHIPSET(0x682D, VERDE_682D, VERDE)
 CHIPSET(0x682F, VERDE_682F, VERDE)
 CHIPSET(0x6830, VERDE_6830, VERDE)
@@ -54,8 +55,11 @@ CHIPSET(0x6600, OLAND_6600, OLAND)
 CHIPSET(0x6601, OLAND_6601, OLAND)
 CHIPSET(0x6602, OLAND_6602, OLAND)
 CHIPSET(0x6603, OLAND_6603, OLAND)
+CHIPSET(0x6604, OLAND_6604, OLAND)
+CHIPSET(0x6605, OLAND_6605, OLAND)
 CHIPSET(0x6606, OLAND_6606, OLAND)
 CHIPSET(0x6607, OLAND_6607, OLAND)
+CHIPSET(0x6608, OLAND_6608, OLAND)
 CHIPSET(0x6610, OLAND_6610, OLAND)
 CHIPSET(0x6611, OLAND_6611, OLAND)
 CHIPSET(0x6613, OLAND_6613, OLAND)
@@ -73,6 +77,8 @@ CHIPSET(0x666F, HAINAN_666F, HAINAN)

 CHIPSET(0x6640, BONAIRE_6640, BONAIRE)
 CHIPSET(0x6641, BONAIRE_6641, BONAIRE)
+CHIPSET(0x6646, BONAIRE_6646, BONAIRE)
+CHIPSET(0x6647, BONAIRE_6647, BONAIRE)
 CHIPSET(0x6649, BONAIRE_6649, BONAIRE)
 CHIPSET(0x6650, BONAIRE_6650, BONAIRE)
 CHIPSET(0x6651, BONAIRE_6651, BONAIRE)
@@ -132,6 +138,7 @@ CHIPSET(0x1313, KAVERI_1313, KAVERI)
 CHIPSET(0x1315, KAVERI_1315, KAVERI)
 CHIPSET(0x1316, KAVERI_1316, KAVERI)
 CHIPSET(0x1317, KAVERI_1317, KAVERI)
+CHIPSET(0x1318, KAVERI_1318, KAVERI)
 CHIPSET(0x131B, KAVERI_131B, KAVERI)
 CHIPSET(0x131C, KAVERI_131C, KAVERI)
 CHIPSET(0x131D, KAVERI_131D, KAVERI)
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -1850,7 +1850,7 @@ lp_build_trunc(struct lp_build_context *bld,
      const struct lp_type type = bld->type;
      struct lp_type inttype;
      struct lp_build_context intbld;
-      LLVMValueRef cmpval = lp_build_const_vec(bld->gallivm, type, 2^24);
+      LLVMValueRef cmpval = lp_build_const_vec(bld->gallivm, type, 1<<24);
      LLVMValueRef trunc, res, anosign, mask;
      LLVMTypeRef int_vec_type = bld->int_vec_type;
      LLVMTypeRef vec_type = bld->vec_type;
@@ -1905,7 +1905,7 @@ lp_build_round(struct lp_build_context *bld,
      const struct lp_type type = bld->type;
      struct lp_type inttype;
      struct lp_build_context intbld;
-      LLVMValueRef cmpval = lp_build_const_vec(bld->gallivm, type, 2^24);
+      LLVMValueRef cmpval = lp_build_const_vec(bld->gallivm, type, 1<<24);
      LLVMValueRef res, anosign, mask;
      LLVMTypeRef int_vec_type = bld->int_vec_type;
      LLVMTypeRef vec_type = bld->vec_type;
@@ -1958,7 +1958,7 @@ lp_build_floor(struct lp_build_context *bld,
      const struct lp_type type = bld->type;
      struct lp_type inttype;
      struct lp_build_context intbld;
-      LLVMValueRef cmpval = lp_build_const_vec(bld->gallivm, type, 2^24);
+      LLVMValueRef cmpval = lp_build_const_vec(bld->gallivm, type, 1<<24);
      LLVMValueRef trunc, res, anosign, mask;
      LLVMTypeRef int_vec_type = bld->int_vec_type;
      LLVMTypeRef vec_type = bld->vec_type;
@@ -2027,7 +2027,7 @@ lp_build_ceil(struct lp_build_context *bld,
      const struct lp_type type = bld->type;
      struct lp_type inttype;
      struct lp_build_context intbld;
-      LLVMValueRef cmpval = lp_build_const_vec(bld->gallivm, type, 2^24);
+      LLVMValueRef cmpval = lp_build_const_vec(bld->gallivm, type, 1<<24);
      LLVMValueRef trunc, res, anosign, mask, tmp;
      LLVMTypeRef int_vec_type = bld->int_vec_type;
      LLVMTypeRef vec_type = bld->vec_type;
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
@@ -464,6 +464,7 @@ lp_build_pack2(struct gallivm_state *gallivm,
   if((util_cpu_caps.has_sse2 || util_cpu_caps.has_altivec) &&
       src_type.width * src_type.length >= 128) {
      const char *intrinsic = NULL;
+      boolean swap_intrinsic_operands = FALSE;

      switch(src_type.width) {
      case 32:
@@ -482,6 +483,9 @@ lp_build_pack2(struct gallivm_state *gallivm,
           } else {
              intrinsic = "llvm.ppc.altivec.vpkuwus";
           }
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
+           swap_intrinsic_operands = TRUE;
+#endif
         }
         break;
      case 16:
@@ -490,12 +494,18 @@ lp_build_pack2(struct gallivm_state *gallivm,
              intrinsic = "llvm.x86.sse2.packsswb.128";
            } else if (util_cpu_caps.has_altivec) {
              intrinsic = "llvm.ppc.altivec.vpkshss";
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
+              swap_intrinsic_operands = TRUE;
+#endif
            }
         } else {
            if (util_cpu_caps.has_sse2) {
              intrinsic = "llvm.x86.sse2.packuswb.128";
            } else if (util_cpu_caps.has_altivec) {
 	      intrinsic = "llvm.ppc.altivec.vpkshus";
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
+              swap_intrinsic_operands = TRUE;
+#endif
            }
         }
         break;
@@ -504,7 +514,11 @@ lp_build_pack2(struct gallivm_state *gallivm,
      if (intrinsic) {
         if (src_type.width * src_type.length == 128) {
            LLVMTypeRef intr_vec_type = lp_build_vec_type(gallivm, intr_type);
-            res = lp_build_intrinsic_binary(builder, intrinsic, intr_vec_type, lo, hi);
+            if (swap_intrinsic_operands) {
+               res = lp_build_intrinsic_binary(builder, intrinsic, intr_vec_type, hi, lo);
+            } else {
+               res = lp_build_intrinsic_binary(builder, intrinsic, intr_vec_type, lo, hi);
+            }
            if (dst_vec_type != intr_vec_type) {
               res = LLVMBuildBitCast(builder, res, dst_vec_type, "");
            }
@@ -513,6 +527,8 @@ lp_build_pack2(struct gallivm_state *gallivm,
            int num_split = src_type.width * src_type.length / 128;
            int i;
            int nlen = 128 / src_type.width;
+            int lo_off = swap_intrinsic_operands ? nlen : 0;
+            int hi_off = swap_intrinsic_operands ? 0 : nlen;
            struct lp_type ndst_type = lp_type_unorm(dst_type.width, 128);
            struct lp_type nintr_type = lp_type_unorm(intr_type.width, 128);
            LLVMValueRef tmpres[LP_MAX_VECTOR_WIDTH / 128];
@@ -524,9 +540,9 @@ lp_build_pack2(struct gallivm_state *gallivm,

            for (i = 0; i < num_split / 2; i++) {
               tmplo = lp_build_extract_range(gallivm,
-                                              lo, i*nlen*2, nlen);
+                                              lo, i*nlen*2 + lo_off, nlen);
               tmphi = lp_build_extract_range(gallivm,
-                                              lo, i*nlen*2 + nlen, nlen);
+                                              lo, i*nlen*2 + hi_off, nlen);
               tmpres[i] = lp_build_intrinsic_binary(builder, intrinsic,
                                                     nintr_vec_type, tmplo, tmphi);
               if (ndst_vec_type != nintr_vec_type) {
@@ -535,9 +551,9 @@ lp_build_pack2(struct gallivm_state *gallivm,
            }
            for (i = 0; i < num_split / 2; i++) {
               tmplo = lp_build_extract_range(gallivm,
-                                              hi, i*nlen*2, nlen);
+                                              hi, i*nlen*2 + lo_off, nlen);
               tmphi = lp_build_extract_range(gallivm,
-                                              hi, i*nlen*2 + nlen, nlen);
+                                              hi, i*nlen*2 + hi_off, nlen);
               tmpres[i+num_split/2] = lp_build_intrinsic_binary(builder, intrinsic,
                                                                 nintr_vec_type,
                                                                 tmplo, tmphi);
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -40,6 +40,7 @@


 #include "pipe/p_compiler.h"
+#include "util/u_debug.h"


 #ifdef __cplusplus
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -1060,6 +1060,7 @@ vl_compositor_render(struct vl_compositor_state *s,
      s->scissor.maxx = dst_surface->width;
      s->scissor.maxy = dst_surface->height;
   }
+   c->pipe->set_scissor_states(c->pipe, 0, 1, &s->scissor);

   gen_vertex_data(c, s, dirty_area);

@@ -1072,7 +1073,6 @@ vl_compositor_render(struct vl_compositor_state *s,
      dirty_area->x1 = dirty_area->y1 = MIN_DIRTY;
   }

-   c->pipe->set_scissor_states(c->pipe, 0, 1, &s->scissor);
   c->pipe->set_framebuffer_state(c->pipe, &c->fb_state);
   c->pipe->bind_vs_state(c->pipe, c->vs);
   c->pipe->set_vertex_buffers(c->pipe, 0, 1, &c->vertex_buf);
--- a/src/gallium/drivers/freedreno/a2xx/fd2_context.c
+++ b/src/gallium/drivers/freedreno/a2xx/fd2_context.c
@@ -98,6 +98,7 @@ fd2_context_create(struct pipe_screen *pscreen, void *priv)
 	pctx = &fd2_ctx->base.base;

 	fd2_ctx->base.dev = fd_device_ref(screen->dev);
+	fd2_ctx->base.screen = fd_screen(pscreen);

 	pctx->destroy = fd2_context_destroy;
 	pctx->create_blend_state = fd2_blend_state_create;
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -215,14 +215,19 @@ emit_textures(struct fd_ringbuffer *ring,
 		OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
 				CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
 		for (i = 0; i < tex->num_textures; i++) {
-			static const struct fd3_pipe_sampler_view dummy_view = {};
+			static const struct fd3_pipe_sampler_view dummy_view = {
+					.base.u.tex.first_level = 1,
+			};
 			const struct fd3_pipe_sampler_view *view = tex->textures[i] ?
 					fd3_pipe_sampler_view(tex->textures[i]) :
 					&dummy_view;
 			struct fd_resource *rsc = view->tex_resource;
+			unsigned start = view->base.u.tex.first_level;
+			unsigned end   = view->base.u.tex.last_level;

-			for (j = 0; j < view->mipaddrs; j++) {
-				struct fd_resource_slice *slice = fd_resource_slice(rsc, j);
+			for (j = 0; j < (end - start + 1); j++) {
+				struct fd_resource_slice *slice =
+						fd_resource_slice(rsc, j + start);
 				OUT_RELOC(ring, rsc->bo, slice->offset, 0, 0);
 			}

--- a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
@@ -144,7 +144,8 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
 {
 	struct fd3_pipe_sampler_view *so = CALLOC_STRUCT(fd3_pipe_sampler_view);
 	struct fd_resource *rsc = fd_resource(prsc);
-	unsigned miplevels = cso->u.tex.last_level - cso->u.tex.first_level;
+	unsigned lvl = cso->u.tex.first_level;
+	unsigned miplevels = cso->u.tex.last_level - lvl;

 	if (!so)
 		return NULL;
@@ -156,7 +157,6 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
 	so->base.context = pctx;

 	so->tex_resource =  rsc;
-	so->mipaddrs = 1 + miplevels;

 	so->texconst0 =
 			A3XX_TEX_CONST_0_TYPE(tex_type(prsc->target)) |
@@ -170,11 +170,11 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,

 	so->texconst1 =
 			A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(cso->format)) |
-			A3XX_TEX_CONST_1_WIDTH(prsc->width0) |
-			A3XX_TEX_CONST_1_HEIGHT(prsc->height0);
+			A3XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
+			A3XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
 	/* when emitted, A3XX_TEX_CONST_2_INDX() must be OR'd in: */
 	so->texconst2 =
-			A3XX_TEX_CONST_2_PITCH(rsc->slices[0].pitch * rsc->cpp);
+			A3XX_TEX_CONST_2_PITCH(rsc->slices[lvl].pitch * rsc->cpp);
 	so->texconst3 = 0x00000000;  /* ??? */

 	return &so->base;
--- a/src/gallium/drivers/freedreno/a3xx/fd3_texture.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.h
@@ -51,7 +51,6 @@ fd3_sampler_stateobj(struct pipe_sampler_state *samp)
 struct fd3_pipe_sampler_view {
 	struct pipe_sampler_view base;
 	struct fd_resource *tex_resource;
-	uint32_t mipaddrs;
 	uint32_t texconst0, texconst1, texconst2, texconst3;
 };

--- a/src/gallium/drivers/freedreno/freedreno_resource.c
+++ b/src/gallium/drivers/freedreno/freedreno_resource.c
@@ -304,7 +304,36 @@ fail:
 	return NULL;
 }

-static bool render_blit(struct pipe_context *pctx, struct pipe_blit_info *info);
+static void fd_blitter_pipe_begin(struct fd_context *ctx);
+static void fd_blitter_pipe_end(struct fd_context *ctx);
+
+/**
+ * _copy_region using pipe (3d engine)
+ */
+static bool
+fd_blitter_pipe_copy_region(struct fd_context *ctx,
+		struct pipe_resource *dst,
+		unsigned dst_level,
+		unsigned dstx, unsigned dsty, unsigned dstz,
+		struct pipe_resource *src,
+		unsigned src_level,
+		const struct pipe_box *src_box)
+{
+	/* not until we allow rendertargets to be buffers */
+	if (dst->target == PIPE_BUFFER || src->target == PIPE_BUFFER)
+		return false;
+
+	if (!util_blitter_is_copy_supported(ctx->blitter, dst, src))
+		return false;
+
+	fd_blitter_pipe_begin(ctx);
+	util_blitter_copy_texture(ctx->blitter,
+			dst, dst_level, dstx, dsty, dstz,
+			src, src_level, src_box);
+	fd_blitter_pipe_end(ctx);
+
+	return true;
+}

 /**
 * Copy a block of pixels from one resource to another.
@@ -320,40 +349,33 @@ fd_resource_copy_region(struct pipe_context *pctx,
 		unsigned src_level,
 		const struct pipe_box *src_box)
 {
+	struct fd_context *ctx = fd_context(pctx);
+
 	/* TODO if we have 2d core, or other DMA engine that could be used
 	 * for simple copies and reasonably easily synchronized with the 3d
 	 * core, this is where we'd plug it in..
 	 */
-	struct pipe_blit_info info = {
-		.dst = {
-			.resource = dst,
-			.box = {
-				.x      = dstx,
-				.y      = dsty,
-				.z      = dstz,
-				.width  = src_box->width,
-				.height = src_box->height,
-				.depth  = src_box->depth,
-			},
-			.format = util_format_linear(dst->format),
-		},
-		.src = {
-			.resource = src,
-			.box      = *src_box,
-			.format   = util_format_linear(src->format),
-		},
-		.mask = PIPE_MASK_RGBA,
-		.filter = PIPE_TEX_FILTER_NEAREST,
-	};
-	render_blit(pctx, &info);
+
+	/* try blit on 3d pipe: */
+	if (fd_blitter_pipe_copy_region(ctx,
+			dst, dst_level, dstx, dsty, dstz,
+			src, src_level, src_box))
+		return;
+
+	/* else fallback to pure sw: */
+	util_resource_copy_region(pctx,
+			dst, dst_level, dstx, dsty, dstz,
+			src, src_level, src_box);
 }

-/* Optimal hardware path for blitting pixels.
+/**
+ * Optimal hardware path for blitting pixels.
 * Scaling, format conversion, up- and downsampling (resolve) are allowed.
 */
 static void
 fd_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
 {
+	struct fd_context *ctx = fd_context(pctx);
 	struct pipe_blit_info info = *blit_info;

 	if (info.src.resource->nr_samples > 1 &&
@@ -373,21 +395,21 @@ fd_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
 		info.mask &= ~PIPE_MASK_S;
 	}

-	render_blit(pctx, &info);
-}
-
-static bool
-render_blit(struct pipe_context *pctx, struct pipe_blit_info *info)
-{
-	struct fd_context *ctx = fd_context(pctx);
-
-	if (!util_blitter_is_blit_supported(ctx->blitter, info)) {
+	if (!util_blitter_is_blit_supported(ctx->blitter, &info)) {
 		DBG("blit unsupported %s -> %s",
-				util_format_short_name(info->src.resource->format),
-				util_format_short_name(info->dst.resource->format));
-		return false;
+				util_format_short_name(info.src.resource->format),
+				util_format_short_name(info.dst.resource->format));
+		return;
 	}

+	fd_blitter_pipe_begin(ctx);
+	util_blitter_blit(ctx->blitter, &info);
+	fd_blitter_pipe_end(ctx);
+}
+
+static void
+fd_blitter_pipe_begin(struct fd_context *ctx)
+{
 	util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vertexbuf.vb);
 	util_blitter_save_vertex_elements(ctx->blitter, ctx->vtx);
 	util_blitter_save_vertex_shader(ctx->blitter, ctx->prog.vp);
@@ -407,15 +429,21 @@ render_blit(struct pipe_context *pctx, struct pipe_blit_info *info)
 			ctx->fragtex.num_textures, ctx->fragtex.textures);

 	fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_BLIT);
-	util_blitter_blit(ctx->blitter, info);
-	fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_NULL);
-
-	return true;
 }

 static void
-fd_flush_resource(struct pipe_context *ctx, struct pipe_resource *resource)
+fd_blitter_pipe_end(struct fd_context *ctx)
 {
+	fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_NULL);
+}
+
+static void
+fd_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
+{
+	struct fd_resource *rsc = fd_resource(prsc);
+
+	if (rsc->dirty)
+		fd_context_render(pctx);
 }

 void
--- a/src/gallium/drivers/freedreno/ir3/ir3_dump.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_dump.c
@@ -322,7 +322,8 @@ static void ir3_block_dump(struct ir3_dump_ctx *ctx,

 	/* draw instruction graph: */
 	for (i = 0; i < block->noutputs; i++)
-		dump_instr(ctx, block->outputs[i]);
+		if (block->outputs[i])
+			dump_instr(ctx, block->outputs[i]);

 	/* draw outputs: */
 	fprintf(ctx->f, "output%lx [shape=record,label=\"outputs", PTRID(block));
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -174,15 +174,29 @@ NVC0LegalizePostRA::findOverwritingDefs(const Instruction *texi,
 }

 void
-NVC0LegalizePostRA::findFirstUses(const Instruction *texi,
-                                  const Instruction *insn,
-                                  std::list<TexUse> &uses)
+NVC0LegalizePostRA::findFirstUses(
+      const Instruction *texi,
+      const Instruction *insn,
+      std::list<TexUse> &uses,
+      std::tr1::unordered_set<const Instruction *>& visited)
 {
   for (int d = 0; insn->defExists(d); ++d) {
      Value *v = insn->getDef(d);
      for (Value::UseIterator u = v->uses.begin(); u != v->uses.end(); ++u) {
         Instruction *usei = (*u)->getInsn();

+         // NOTE: In case of a loop that overwrites a value but never uses
+         // it, it can happen that we have a cycle of uses that consists only
+         // of phis and no-op moves and will thus cause an infinite loop here
+         // since these are not considered actual uses.
+         // The most obvious (and perhaps the only) way to prevent this is to
+         // remember which instructions we've already visited.
+
+         if (visited.find(usei) != visited.end())
+            continue;
+
+         visited.insert(usei);
+
         if (usei->op == OP_PHI || usei->op == OP_UNION) {
            // need a barrier before WAW cases
            for (int s = 0; usei->srcExists(s); ++s) {
@@ -197,11 +211,11 @@ NVC0LegalizePostRA::findFirstUses(const Instruction *texi,
             usei->op == OP_PHI ||
             usei->op == OP_UNION) {
            // these uses don't manifest in the machine code
-            findFirstUses(texi, usei, uses);
+            findFirstUses(texi, usei, uses, visited);
         } else
         if (usei->op == OP_MOV && usei->getDef(0)->equals(usei->getSrc(0)) &&
             usei->subOp != NV50_IR_SUBOP_MOV_FINAL) {
-            findFirstUses(texi, usei, uses);
+            findFirstUses(texi, usei, uses, visited);
         } else {
            addTexUse(uses, usei, insn);
         }
@@ -257,8 +271,10 @@ NVC0LegalizePostRA::insertTextureBarriers(Function *fn)
   uses = new std::list<TexUse>[texes.size()];
   if (!uses)
      return false;
-   for (size_t i = 0; i < texes.size(); ++i)
-      findFirstUses(texes[i], texes[i], uses[i]);
+   for (size_t i = 0; i < texes.size(); ++i) {
+      std::tr1::unordered_set<const Instruction *> visited;
+      findFirstUses(texes[i], texes[i], uses[i], visited);
+   }

   // determine the barrier level at each use
   for (size_t i = 0; i < texes.size(); ++i) {
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -20,6 +20,8 @@
 * OTHER DEALINGS IN THE SOFTWARE.
 */

+#include <tr1/unordered_set>
+
 #include "codegen/nv50_ir.h"
 #include "codegen/nv50_ir_build_util.h"

@@ -69,7 +71,8 @@ private:
   bool insertTextureBarriers(Function *);
   inline bool insnDominatedBy(const Instruction *, const Instruction *) const;
   void findFirstUses(const Instruction *tex, const Instruction *def,
-                      std::list<TexUse>&);
+                      std::list<TexUse>&,
+                      std::tr1::unordered_set<const Instruction *>&);
   void findOverwritingDefs(const Instruction *tex, Instruction *insn,
                            const BasicBlock *term,
                            std::list<TexUse>&);
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -567,6 +567,10 @@ ConstantFolding::expr(Instruction *i,
      ImmediateValue src0;
      if (i->src(0).getImmediate(src0))
         expr(i, src0, *i->getSrc(1)->asImm());
+      if (i->saturate && !prog->getTarget()->isSatSupported(i)) {
+         bld.setPosition(i, false);
+         i->setSrc(1, bld.loadImm(NULL, res.data.u32));
+      }
   } else {
      i->op = i->saturate ? OP_SAT : OP_MOV; /* SAT handled by unary() */
   }
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -1657,6 +1657,10 @@ RegAlloc::execFunc()
           ret && i <= func->loopNestingBound;
           sequence = func->cfg.nextSequence(), ++i)
         ret = buildLiveSets(BasicBlock::get(func->cfg.getRoot()));
+      // reset marker
+      for (ArrayList::Iterator bi = func->allBBlocks.iterator();
+           !bi.end(); bi.next())
+         BasicBlock::get(bi)->liveSet.marker = false;
      if (!ret)
         break;
      func->orderInstructions(this->insns);
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
@@ -449,7 +449,7 @@ TargetNV50::isModSupported(const Instruction *insn, int s, Modifier mod) const
         return false;
      }
   }
-   if (s > 3)
+   if (s >= 3)
      return false;
   return (mod & Modifier(opInfo[insn->op].srcMods[s])) == mod;
 }
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
@@ -423,7 +423,7 @@ TargetNVC0::isModSupported(const Instruction *insn, int s, Modifier mod) const
         return false;
      }
   }
-   if (s > 3)
+   if (s >= 3)
      return false;
   return (mod & Modifier(opInfo[insn->op].srcMods[s])) == mod;
 }
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_util.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_util.cpp
@@ -254,7 +254,9 @@ bool BitSet::resize(unsigned int nBits)
      return false;
   }
   if (n > p)
-      memset(&data[4 * p + 4], 0, (n - p) * 4);
+      memset(&data[p], 0, (n - p) * 4);
+   if (nBits < size && (nBits % 32))
+      data[(nBits + 31) / 32 - 1] &= (1 << (nBits % 32)) - 1;

   size = nBits;
   return true;
@@ -274,8 +276,8 @@ bool BitSet::allocate(unsigned int nBits, bool zero)
   if (zero)
      memset(data, 0, (size + 7) / 8);
   else
-   if (nBits)
-      data[(size + 31) / 32 - 1] = 0; // clear unused bits (e.g. for popCount)
+   if (size % 32) // clear unused bits (e.g. for popCount)
+      data[(size + 31) / 32 - 1] &= (1 << (size % 32)) - 1;

   return data;
 }
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_util.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_util.h
@@ -484,6 +484,7 @@ public:
         FREE(data);
   }

+   // allocate will keep old data iff size is unchanged
   bool allocate(unsigned int nBits, bool zero);
   bool resize(unsigned int nBits); // keep old data, zero additional bits

--- a/src/gallium/drivers/nouveau/nouveau_vp3_video.h
+++ b/src/gallium/drivers/nouveau/nouveau_vp3_video.h
@@ -39,6 +39,8 @@ struct nouveau_vp3_video_buffer {
 #define VP_OFFSET 0x200
 #define COMM_OFFSET 0x500

+#define NOUVEAU_VP3_BSP_RESERVED_SIZE 0x700
+
 #define NOUVEAU_VP3_DEBUG_FENCE 0

 #if NOUVEAU_VP3_DEBUG_FENCE
--- a/src/gallium/drivers/nouveau/nouveau_vp3_video_vp.c
+++ b/src/gallium/drivers/nouveau/nouveau_vp3_video_vp.c
@@ -78,10 +78,10 @@ struct mpeg4_picparm_vp {
 	uint8_t top_field_first; // bool, written to vuc

 	uint8_t pad4[3]; // 59, 5a, 5b, contains garbage on blob
-	uint32_t pad5[0x10]; // 5c...9c non-inclusive, but WHY?

-	uint32_t intra[0x10]; // 9c
-	uint32_t non_intra[0x10]; // bc
+	uint32_t intra[0x10]; // 5c
+	uint32_t non_intra[0x10]; // 9c
+	uint32_t pad5[0x10]; // bc what does this do?
 	// udc..uff pad?
 };

@@ -196,11 +196,15 @@ nouveau_vp3_handle_references(struct nouveau_vp3_decoder *dec, struct nouveau_vp
   /* Try to find a real empty spot first, there should be one..
    */
   for (i = 0; i < dec->base.max_references + 1; ++i) {
-      if (dec->refs[i].last_used != seq) {
+      if (dec->refs[i].vidbuf == target) {
         empty_spot = i;
         break;
-      }
+      } else if (!dec->refs[i].last_used) {
+         empty_spot = i;
+      } else if (empty_spot == ~0U && dec->refs[i].last_used != seq)
+         empty_spot = i;
   }
+
   assert(empty_spot < dec->base.max_references+1);
   dec->refs[empty_spot].last_used = seq;
 //   debug_printf("Kicked %p to add %p to slot %i\n", dec->refs[empty_spot].vidbuf, target, empty_spot);
@@ -267,7 +271,6 @@ nouveau_vp3_fill_picparm_mpeg4_vp(struct nouveau_vp3_decoder *dec,
 {
   struct mpeg4_picparm_vp pic_vp_stub = {}, *pic_vp = &pic_vp_stub;
   uint32_t ring, ret = 0x01014; // !async_shutdown << 16 | watchdog << 12 | irq_record << 4 | unk;
-   assert(!(dec->base.width & 0xf));
   *is_ref = desc->vop_coding_type <= 1;

   pic_vp->width = dec->base.width;
@@ -463,14 +466,45 @@ void nouveau_vp3_vp_caps(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
   case PIPE_VIDEO_FORMAT_MPEG12:
      *caps = nouveau_vp3_fill_picparm_mpeg12_vp(dec, desc.mpeg12, refs, is_ref, vp);
      nouveau_vp3_handle_references(dec, refs, dec->fence_seq, target);
+      switch (desc.mpeg12->picture_structure) {
+      case PIPE_MPEG12_PICTURE_STRUCTURE_FIELD_TOP:
+         dec->refs[target->valid_ref].decoded_top = 1;
+         break;
+      case PIPE_MPEG12_PICTURE_STRUCTURE_FIELD_BOTTOM:
+         dec->refs[target->valid_ref].decoded_bottom = 1;
+         break;
+      default:
+         dec->refs[target->valid_ref].decoded_top = 1;
+         dec->refs[target->valid_ref].decoded_bottom = 1;
+         break;
+      }
      return;
   case PIPE_VIDEO_FORMAT_MPEG4:
      *caps = nouveau_vp3_fill_picparm_mpeg4_vp(dec, desc.mpeg4, refs, is_ref, vp);
      nouveau_vp3_handle_references(dec, refs, dec->fence_seq, target);
+      // XXX: Correct?
+      if (!desc.mpeg4->interlaced) {
+         dec->refs[target->valid_ref].decoded_top = 1;
+         dec->refs[target->valid_ref].decoded_bottom = 1;
+      } else if (desc.mpeg4->top_field_first) {
+         if (!dec->refs[target->valid_ref].decoded_top)
+            dec->refs[target->valid_ref].decoded_top = 1;
+         else
+            dec->refs[target->valid_ref].decoded_bottom = 1;
+      } else {
+         if (!dec->refs[target->valid_ref].decoded_bottom)
+            dec->refs[target->valid_ref].decoded_bottom = 1;
+         else
+            dec->refs[target->valid_ref].decoded_top = 1;
+      }
      return;
   case PIPE_VIDEO_FORMAT_VC1: {
      *caps = nouveau_vp3_fill_picparm_vc1_vp(dec, desc.vc1, refs, is_ref, vp);
      nouveau_vp3_handle_references(dec, refs, dec->fence_seq, target);
+      if (desc.vc1->frame_coding_mode == 3)
+         debug_printf("Field-Interlaced possibly incorrectly handled\n");
+      dec->refs[target->valid_ref].decoded_top = 1;
+      dec->refs[target->valid_ref].decoded_bottom = 1;
      return;
   }
   case PIPE_VIDEO_FORMAT_MPEG4_AVC: {
--- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
@@ -585,9 +585,12 @@ nv50_stage_sampler_states_bind(struct nv50_context *nv50, int s,
         nv50_screen_tsc_unlock(nv50->screen, old);
   }
   assert(nv50->num_samplers[s] <= PIPE_MAX_SAMPLERS);
-   for (; i < nv50->num_samplers[s]; ++i)
-      if (nv50->samplers[s][i])
+   for (; i < nv50->num_samplers[s]; ++i) {
+      if (nv50->samplers[s][i]) {
         nv50_screen_tsc_unlock(nv50->screen, nv50->samplers[s][i]);
+         nv50->samplers[s][i] = NULL;
+      }
+   }

   nv50->num_samplers[s] = nr;

--- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
@@ -54,8 +54,8 @@ nv50_validate_fb(struct nv50_context *nv50)
      assert(mt->layout_3d || !array_mode || array_size == 1);

      BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(i)), 5);
-      PUSH_DATAh(push, bo->offset + sf->offset);
-      PUSH_DATA (push, bo->offset + sf->offset);
+      PUSH_DATAh(push, mt->base.address + sf->offset);
+      PUSH_DATA (push, mt->base.address + sf->offset);
      PUSH_DATA (push, nv50_format_table[sf->base.format].rt);
      if (likely(nouveau_bo_memtype(bo))) {
         PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode);
@@ -97,8 +97,8 @@ nv50_validate_fb(struct nv50_context *nv50)
      int unk = mt->base.base.target == PIPE_TEXTURE_3D || sf->depth == 1;

      BEGIN_NV04(push, NV50_3D(ZETA_ADDRESS_HIGH), 5);
-      PUSH_DATAh(push, bo->offset + sf->offset);
-      PUSH_DATA (push, bo->offset + sf->offset);
+      PUSH_DATAh(push, mt->base.address + sf->offset);
+      PUSH_DATA (push, mt->base.address + sf->offset);
      PUSH_DATA (push, nv50_format_table[fb->zsbuf->format].rt);
      PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode);
      PUSH_DATA (push, mt->layer_stride >> 2);
--- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -114,8 +114,8 @@ nv50_2d_texture_set(struct nouveau_pushbuf *push, int dst,
      PUSH_DATA (push, mt->level[level].pitch);
      PUSH_DATA (push, width);
      PUSH_DATA (push, height);
-      PUSH_DATAh(push, bo->offset + offset);
-      PUSH_DATA (push, bo->offset + offset);
+      PUSH_DATAh(push, mt->base.address + offset);
+      PUSH_DATA (push, mt->base.address + offset);
   } else {
      BEGIN_NV04(push, SUBC_2D(mthd), 5);
      PUSH_DATA (push, format);
@@ -126,8 +126,8 @@ nv50_2d_texture_set(struct nouveau_pushbuf *push, int dst,
      BEGIN_NV04(push, SUBC_2D(mthd + 0x18), 4);
      PUSH_DATA (push, width);
      PUSH_DATA (push, height);
-      PUSH_DATAh(push, bo->offset + offset);
-      PUSH_DATA (push, bo->offset + offset);
+      PUSH_DATAh(push, mt->base.address + offset);
+      PUSH_DATA (push, mt->base.address + offset);
   }

 #if 0
@@ -299,8 +299,8 @@ nv50_clear_render_target(struct pipe_context *pipe,
   BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1);
   PUSH_DATA (push, 1);
   BEGIN_NV04(push, NV50_3D(RT_ADDRESS_HIGH(0)), 5);
-   PUSH_DATAh(push, bo->offset + sf->offset);
-   PUSH_DATA (push, bo->offset + sf->offset);
+   PUSH_DATAh(push, mt->base.address + sf->offset);
+   PUSH_DATA (push, mt->base.address + sf->offset);
   PUSH_DATA (push, nv50_format_table[dst->format].rt);
   PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode);
   PUSH_DATA (push, mt->layer_stride >> 2);
@@ -381,8 +381,8 @@ nv50_clear_depth_stencil(struct pipe_context *pipe,
   nv50->scissors_dirty |= 1;

   BEGIN_NV04(push, NV50_3D(ZETA_ADDRESS_HIGH), 5);
-   PUSH_DATAh(push, bo->offset + sf->offset);
-   PUSH_DATA (push, bo->offset + sf->offset);
+   PUSH_DATAh(push, mt->base.address + sf->offset);
+   PUSH_DATA (push, mt->base.address + sf->offset);
   PUSH_DATA (push, nv50_format_table[dst->format].rt);
   PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode);
   PUSH_DATA (push, mt->layer_stride >> 2);
--- a/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_transfer.c
@@ -24,6 +24,8 @@ nv50_m2mf_rect_setup(struct nv50_m2mf_rect *rect,
   rect->bo = mt->base.bo;
   rect->domain = mt->base.domain;
   rect->base = mt->level[l].offset;
+   if (mt->base.bo->offset != mt->base.address)
+      rect->base += mt->base.address - mt->base.bo->offset;
   rect->pitch = mt->level[l].pitch;
   if (util_format_is_plain(res->format)) {
      rect->width = w << mt->ms_x;
--- a/src/gallium/drivers/nouveau/nv50/nv84_video.c
+++ b/src/gallium/drivers/nouveau/nv50/nv84_video.c
@@ -482,12 +482,14 @@ nv84_create_decoder(struct pipe_context *context,
      mip.level[0].pitch = surf.width * 4;
      mip.base.domain = NOUVEAU_BO_VRAM;
      mip.base.bo = dec->mbring;
+      mip.base.address = dec->mbring->offset;
      context->clear_render_target(context, &surf.base, &color, 0, 0, 64, 4760);
      surf.offset = dec->vpring->size / 2 - 0x1000;
      surf.width = 1024;
      surf.height = 1;
      mip.level[0].pitch = surf.width * 4;
      mip.base.bo = dec->vpring;
+      mip.base.address = dec->vpring->offset;
      context->clear_render_target(context, &surf.base, &color, 0, 0, 1024, 1);
      surf.offset = dec->vpring->size - 0x1000;
      context->clear_render_target(context, &surf.base, &color, 0, 0, 1024, 1);
@@ -683,17 +685,14 @@ nv84_video_buffer_create(struct pipe_context *pipe,
                      bo_size, &cfg, &buffer->full))
      goto error;

-   mt0->base.bo = buffer->interlaced;
+   nouveau_bo_ref(buffer->interlaced, &mt0->base.bo);
   mt0->base.domain = NOUVEAU_BO_VRAM;
-   mt0->base.offset = 0;
-   mt0->base.address = buffer->interlaced->offset + mt0->base.offset;
-   nouveau_bo_ref(buffer->interlaced, &empty);
+   mt0->base.address = buffer->interlaced->offset;

-   mt1->base.bo = buffer->interlaced;
+   nouveau_bo_ref(buffer->interlaced, &mt1->base.bo);
   mt1->base.domain = NOUVEAU_BO_VRAM;
-   mt1->base.offset = mt0->layer_stride * 2;
-   mt1->base.address = buffer->interlaced->offset + mt1->base.offset;
-   nouveau_bo_ref(buffer->interlaced, &empty);
+   mt1->base.offset = mt0->total_size;
+   mt1->base.address = buffer->interlaced->offset + mt0->total_size;

   memset(&sv_templ, 0, sizeof(sv_templ));
   for (component = 0, i = 0; i < 2; ++i ) {
--- a/src/gallium/drivers/nouveau/nv50/nv98_video_bsp.c
+++ b/src/gallium/drivers/nouveau/nv50/nv98_video_bsp.c
@@ -42,8 +42,8 @@ nv98_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
   struct nouveau_pushbuf *push = dec->pushbuf[0];
   enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile);
   uint32_t bsp_addr, comm_addr, inter_addr;
-   uint32_t slice_size, bucket_size, ring_size;
-   uint32_t caps;
+   uint32_t slice_size, bucket_size, ring_size, bsp_size;
+   uint32_t caps, i;
   int ret;
   struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];
   struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];
@@ -65,6 +65,41 @@ nv98_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
   fence_extra = 4;
 #endif

+   bsp_size = NOUVEAU_VP3_BSP_RESERVED_SIZE;
+   for (i = 0; i < num_buffers; i++)
+      bsp_size += num_bytes[i];
+   bsp_size += 256; /* the 4 end markers */
+
+   if (!bsp_bo || bsp_size > bsp_bo->size) {
+      struct nouveau_bo *tmp_bo = NULL;
+
+      /* round up to the nearest mb */
+      bsp_size += (1 << 20) - 1;
+      bsp_size &= ~((1 << 20) - 1);
+
+      ret = nouveau_bo_new(dec->bitplane_bo->device, NOUVEAU_BO_VRAM, 0, bsp_size, NULL, &tmp_bo);
+      if (ret) {
+         debug_printf("reallocating bsp %u -> %u failed with %i\n",
+                      bsp_bo ? (unsigned)bsp_bo->size : 0, bsp_size, ret);
+         return -1;
+      }
+      nouveau_bo_ref(NULL, &bsp_bo);
+      bo_refs[0].bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH] = bsp_bo = tmp_bo;
+   }
+
+   if (!inter_bo || bsp_bo->size * 4 > inter_bo->size) {
+      struct nouveau_bo *tmp_bo = NULL;
+
+      ret = nouveau_bo_new(dec->bitplane_bo->device, NOUVEAU_BO_VRAM, 0, bsp_bo->size * 4, NULL, &tmp_bo);
+      if (ret) {
+         debug_printf("reallocating inter %u -> %u failed with %i\n",
+                      inter_bo ? (unsigned)inter_bo->size : 0, (unsigned)bsp_bo->size * 4, ret);
+         return -1;
+      }
+      nouveau_bo_ref(NULL, &inter_bo);
+      bo_refs[1].bo = dec->inter_bo[comm_seq & 1] = inter_bo = tmp_bo;
+   }
+
   ret = nouveau_bo_map(bsp_bo, NOUVEAU_BO_WR, dec->client);
   if (ret) {
      debug_printf("map failed: %i %s\n", ret, strerror(-ret));
--- a/src/gallium/drivers/nouveau/nv50/nv98_video_vp.c
+++ b/src/gallium/drivers/nouveau/nv50/nv98_video_vp.c
@@ -59,7 +59,6 @@ static void dump_comm_vp(struct nouveau_vp3_decoder *dec, struct comm *comm, u32
 static void
 nv98_decoder_kick_ref(struct nouveau_vp3_decoder *dec, struct nouveau_vp3_video_buffer *target)
 {
-   dec->refs[target->valid_ref].vidbuf = NULL;
   dec->refs[target->valid_ref].last_used = 0;
 //   debug_printf("Unreffed %p\n", target);
 }
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
@@ -261,7 +261,6 @@ nvc0_miptree_create(struct pipe_screen *pscreen,

   if (pt->usage == PIPE_USAGE_STAGING) {
      switch (pt->target) {
-      case PIPE_TEXTURE_1D:
      case PIPE_TEXTURE_2D:
      case PIPE_TEXTURE_RECT:
         if (pt->last_level == 0 &&
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_video.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_video.c
@@ -173,16 +173,12 @@ nvc0_create_decoder(struct pipe_context *context,
      ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM,
                           0x100, 4 << 20, &cfg, &dec->inter_bo[0]);
   if (!ret) {
-      if (!kepler)
-         nouveau_bo_ref(dec->inter_bo[0], &dec->inter_bo[1]);
-      else
-         ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM,
-                              0x100, dec->inter_bo[0]->size, &cfg,
-                              &dec->inter_bo[1]);
+      ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM,
+                           0x100, dec->inter_bo[0]->size, &cfg,
+                           &dec->inter_bo[1]);
   }
   if (ret)
      goto fail;
-
   switch (u_reduce_video_profile(templ->profile)) {
   case PIPE_VIDEO_FORMAT_MPEG12: {
      codec = 1;
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c
@@ -42,8 +42,8 @@ nvc0_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
   struct nouveau_pushbuf *push = dec->pushbuf[0];
   enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile);
   uint32_t bsp_addr, comm_addr, inter_addr;
-   uint32_t slice_size, bucket_size, ring_size;
-   uint32_t caps;
+   uint32_t slice_size, bucket_size, ring_size, bsp_size;
+   uint32_t caps, i;
   int ret;
   struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];
   struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];
@@ -65,6 +65,49 @@ nvc0_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
   fence_extra = 4;
 #endif

+   bsp_size = NOUVEAU_VP3_BSP_RESERVED_SIZE;
+   for (i = 0; i < num_buffers; i++)
+      bsp_size += num_bytes[i];
+   bsp_size += 256; /* the 4 end markers */
+
+   if (!bsp_bo || bsp_size > bsp_bo->size) {
+      union nouveau_bo_config cfg;
+      struct nouveau_bo *tmp_bo = NULL;
+
+      cfg.nvc0.tile_mode = 0x10;
+      cfg.nvc0.memtype = 0xfe;
+
+      /* round up to the nearest mb */
+      bsp_size += (1 << 20) - 1;
+      bsp_size &= ~((1 << 20) - 1);
+
+      ret = nouveau_bo_new(dec->bitplane_bo->device, NOUVEAU_BO_VRAM, 0, bsp_size, &cfg, &tmp_bo);
+      if (ret) {
+         debug_printf("reallocating bsp %u -> %u failed with %i\n",
+                      bsp_bo ? (unsigned)bsp_bo->size : 0, bsp_size, ret);
+         return -1;
+      }
+      nouveau_bo_ref(NULL, &bsp_bo);
+      bo_refs[0].bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH] = bsp_bo = tmp_bo;
+   }
+
+   if (!inter_bo || bsp_bo->size * 4 > inter_bo->size) {
+      union nouveau_bo_config cfg;
+      struct nouveau_bo *tmp_bo = NULL;
+
+      cfg.nvc0.tile_mode = 0x10;
+      cfg.nvc0.memtype = 0xfe;
+
+      ret = nouveau_bo_new(dec->bitplane_bo->device, NOUVEAU_BO_VRAM, 0, bsp_bo->size * 4, &cfg, &tmp_bo);
+      if (ret) {
+         debug_printf("reallocating inter %u -> %u failed with %i\n",
+                      inter_bo ? (unsigned)inter_bo->size : 0, (unsigned)bsp_bo->size * 4, ret);
+         return -1;
+      }
+      nouveau_bo_ref(NULL, &inter_bo);
+      bo_refs[1].bo = dec->inter_bo[comm_seq & 1] = inter_bo = tmp_bo;
+   }
+
   ret = nouveau_bo_map(bsp_bo, NOUVEAU_BO_WR, dec->client);
   if (ret) {
      debug_printf("map failed: %i %s\n", ret, strerror(-ret));
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c
@@ -59,7 +59,6 @@ static void dump_comm_vp(struct nouveau_vp3_decoder *dec, struct comm *comm, u32
 static void
 nvc0_decoder_kick_ref(struct nouveau_vp3_decoder *dec, struct nouveau_vp3_video_buffer *target)
 {
-   dec->refs[target->valid_ref].vidbuf = NULL;
   dec->refs[target->valid_ref].last_used = 0;
 //   debug_printf("Unreffed %p\n", target);
 }
--- a/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c
+++ b/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c
@@ -572,14 +572,16 @@ static void do_advanced_regalloc(struct regalloc_state * s)
 	graph = ra_alloc_interference_graph(ra_state->regs,
 						node_count + s->NumInputs);

+	for (node_index = 0; node_index < node_count; node_index++) {
+		ra_set_node_class(graph, node_index, node_classes[node_index]);
+	}
+
 	/* Build the interference graph */
 	for (var_ptr = variables, node_index = 0; var_ptr;
 					var_ptr = var_ptr->Next,node_index++) {
 		struct rc_list * a, * b;
 		unsigned int b_index;

-		ra_set_node_class(graph, node_index, node_classes[node_index]);
-
 		for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1;
 						b; b = b->Next, b_index++) {
 			struct rc_variable * var_a = a->Item;
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -440,7 +440,8 @@ static void r600_clear(struct pipe_context *ctx, unsigned buffers,
 	}

 	r600_blitter_begin(ctx, R600_CLEAR);
-	util_blitter_clear(rctx->blitter, fb->width, fb->height, 1,
+	util_blitter_clear(rctx->blitter, fb->width, fb->height,
+			   util_framebuffer_get_num_layers(fb),
 			   buffers, color, depth, stencil);
 	r600_blitter_end(ctx);

--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -1245,12 +1245,6 @@ static bool r600_update_derived_state(struct r600_context *rctx)
 			}
 		}

-		if (rctx->b.chip_class >= EVERGREEN) {
-			evergreen_update_db_shader_control(rctx);
-		} else {
-			r600_update_db_shader_control(rctx);
-		}
-
 		if (unlikely(!ps_dirty && rctx->ps_shader && rctx->rasterizer &&
 				((rctx->rasterizer->sprite_coord_enable != rctx->ps_shader->current->sprite_coord_enable) ||
 						(rctx->rasterizer->flatshade != rctx->ps_shader->current->flatshade)))) {
@@ -1264,6 +1258,12 @@ static bool r600_update_derived_state(struct r600_context *rctx)
 		update_shader_atom(ctx, &rctx->pixel_shader, rctx->ps_shader->current);
 	}

+	if (rctx->b.chip_class >= EVERGREEN) {
+		evergreen_update_db_shader_control(rctx);
+	} else {
+		r600_update_db_shader_control(rctx);
+	}
+
 	/* on R600 we stuff masks + txq info into one constant buffer */
 	/* on evergreen we only need a txq info one */
 	if (rctx->b.chip_class < EVERGREEN) {
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -807,12 +807,40 @@ void r600_suspend_nontimer_queries(struct r600_common_context *ctx)
 	assert(ctx->num_cs_dw_nontimer_queries_suspend == 0);
 }

+static unsigned r600_queries_num_cs_dw_for_resuming(struct r600_common_context *ctx)
+{
+	struct r600_query *query;
+	unsigned num_dw = 0;
+
+	LIST_FOR_EACH_ENTRY(query, &ctx->active_nontimer_queries, list) {
+		/* begin + end */
+		num_dw += query->num_cs_dw * 2;
+
+		/* Workaround for the fact that
+		 * num_cs_dw_nontimer_queries_suspend is incremented for every
+		 * resumed query, which raises the bar in need_cs_space for
+		 * queries about to be resumed.
+		 */
+		num_dw += query->num_cs_dw;
+	}
+	/* primitives generated query */
+	num_dw += ctx->streamout.enable_atom.num_dw;
+	/* guess for ZPASS enable or PERFECT_ZPASS_COUNT enable updates */
+	num_dw += 13;
+
+	return num_dw;
+}
+
 void r600_resume_nontimer_queries(struct r600_common_context *ctx)
 {
 	struct r600_query *query;

 	assert(ctx->num_cs_dw_nontimer_queries_suspend == 0);

+	/* Check CS space here. Resuming must not be interrupted by flushes. */
+	ctx->need_gfx_cs_space(&ctx->b,
+			       r600_queries_num_cs_dw_for_resuming(ctx), TRUE);
+
 	LIST_FOR_EACH_ENTRY(query, &ctx->active_nontimer_queries, list) {
 		r600_emit_query_begin(ctx, query);
 	}
--- a/src/gallium/drivers/radeon/radeon_video.c
+++ b/src/gallium/drivers/radeon/radeon_video.c
@@ -251,8 +251,11 @@ int rvid_get_video_param(struct pipe_screen *screen,
 			       profile != PIPE_VIDEO_PROFILE_VC1_MAIN;
 		case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
 		case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
-			/* and MPEG2 only with shaders */
-			return codec != PIPE_VIDEO_FORMAT_MPEG12;
+			/* MPEG2 only with shaders and no support for
+			   interlacing on R6xx style UVD */
+			return codec != PIPE_VIDEO_FORMAT_MPEG12 &&
+			       /* TODO: RV770 might actually work */
+			       rscreen->family > CHIP_RV770;
 		default:
 			break;
 		}
--- a/src/gallium/state_trackers/dri/dri2.c
+++ b/src/gallium/state_trackers/dri/dri2.c
@@ -1328,6 +1328,7 @@ dri_kms_init_screen(__DRIscreen * sPriv)
   const __DRIconfig **configs;
   struct dri_screen *screen;
   struct pipe_screen *pscreen = NULL;
+   uint64_t cap;

   screen = CALLOC_STRUCT(dri_screen);
   if (!screen)
@@ -1339,6 +1340,13 @@ dri_kms_init_screen(__DRIscreen * sPriv)
   sPriv->driverPrivate = (void *)screen;

   pscreen = kms_swrast_create_screen(screen->fd);
+
+   if (drmGetCap(sPriv->fd, DRM_CAP_PRIME, &cap) == 0 &&
+          (cap & DRM_PRIME_CAP_IMPORT)) {
+      dri2ImageExtension.createImageFromFds = dri2_from_fds;
+      dri2ImageExtension.createImageFromDmaBufs = dri2_from_dma_bufs;
+   }
+
   sPriv->extensions = dri_screen_extensions;

   /* dri_init_screen_helper checks pscreen for us */
--- a/src/gallium/state_trackers/vdpau/device.c
+++ b/src/gallium/state_trackers/vdpau/device.c
@@ -42,6 +42,8 @@ vdp_imp_device_create_x11(Display *display, int screen, VdpDevice *device,
                          VdpGetProcAddress **get_proc_address)
 {
   struct pipe_screen *pscreen;
+   struct pipe_resource *res, res_tmpl;
+   struct pipe_sampler_view sv_tmpl;
   vlVdpDevice *dev = NULL;
   VdpStatus ret;

@@ -79,6 +81,43 @@ vdp_imp_device_create_x11(Display *display, int screen, VdpDevice *device,
      goto no_context;
   }

+   memset(&res_tmpl, 0, sizeof(res_tmpl));
+
+   res_tmpl.target = PIPE_TEXTURE_2D;
+   res_tmpl.format = PIPE_FORMAT_R8G8B8A8_UNORM;
+   res_tmpl.width0 = 1;
+   res_tmpl.height0 = 1;
+   res_tmpl.depth0 = 1;
+   res_tmpl.array_size = 1;
+   res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW;
+   res_tmpl.usage = PIPE_USAGE_DEFAULT;
+
+   if (!CheckSurfaceParams(pscreen, &res_tmpl)) {
+      ret = VDP_STATUS_NO_IMPLEMENTATION;
+      goto no_resource;
+   }
+
+   res = pscreen->resource_create(pscreen, &res_tmpl);
+   if (!res) {
+      ret = VDP_STATUS_RESOURCES;
+      goto no_resource;
+   }
+
+   memset(&sv_tmpl, 0, sizeof(sv_tmpl));
+   u_sampler_view_default_template(&sv_tmpl, res, res->format);
+
+   sv_tmpl.swizzle_r = PIPE_SWIZZLE_ONE;
+   sv_tmpl.swizzle_g = PIPE_SWIZZLE_ONE;
+   sv_tmpl.swizzle_b = PIPE_SWIZZLE_ONE;
+   sv_tmpl.swizzle_a = PIPE_SWIZZLE_ONE;
+
+   dev->dummy_sv = dev->context->create_sampler_view(dev->context, res, &sv_tmpl);
+   pipe_resource_reference(&res, NULL);
+   if (!dev->dummy_sv) {
+      ret = VDP_STATUS_RESOURCES;
+      goto no_resource;
+   }
+
   *device = vlAddDataHTAB(dev);
   if (*device == 0) {
      ret = VDP_STATUS_ERROR;
@@ -93,8 +132,9 @@ vdp_imp_device_create_x11(Display *display, int screen, VdpDevice *device,
   return VDP_STATUS_OK;

 no_handle:
+   pipe_sampler_view_reference(&dev->dummy_sv, NULL);
+no_resource:
   dev->context->destroy(dev->context);
-   /* Destroy vscreen */
 no_context:
   vl_screen_destroy(dev->vscreen);
 no_vscreen:
@@ -185,6 +225,7 @@ vlVdpDeviceFree(vlVdpDevice *dev)
 {
   pipe_mutex_destroy(dev->mutex);
   vl_compositor_cleanup(&dev->compositor);
+   pipe_sampler_view_reference(&dev->dummy_sv, NULL);
   dev->context->destroy(dev->context);
   vl_screen_destroy(dev->vscreen);
   FREE(dev);
--- a/src/gallium/state_trackers/vdpau/output.c
+++ b/src/gallium/state_trackers/vdpau/output.c
@@ -624,9 +624,9 @@ vlVdpOutputSurfaceRenderOutputSurface(VdpOutputSurface destination_surface,
                                      uint32_t flags)
 {
   vlVdpOutputSurface *dst_vlsurface;
-   vlVdpOutputSurface *src_vlsurface;

   struct pipe_context *context;
+   struct pipe_sampler_view *src_sv;
   struct vl_compositor *compositor;
   struct vl_compositor_state *cstate;

@@ -639,12 +639,19 @@ vlVdpOutputSurfaceRenderOutputSurface(VdpOutputSurface destination_surface,
   if (!dst_vlsurface)
      return VDP_STATUS_INVALID_HANDLE;

-   src_vlsurface = vlGetDataHTAB(source_surface);
-   if (!src_vlsurface)
-      return VDP_STATUS_INVALID_HANDLE;
+   if (source_surface == VDP_INVALID_HANDLE) {
+      src_sv = dst_vlsurface->device->dummy_sv;

-   if (dst_vlsurface->device != src_vlsurface->device)
-      return VDP_STATUS_HANDLE_DEVICE_MISMATCH;
+   } else {
+      vlVdpOutputSurface *src_vlsurface = vlGetDataHTAB(source_surface);
+      if (!src_vlsurface)
+         return VDP_STATUS_INVALID_HANDLE;
+
+      if (dst_vlsurface->device != src_vlsurface->device)
+         return VDP_STATUS_HANDLE_DEVICE_MISMATCH;
+
+      src_sv = src_vlsurface->sampler_view;
+   }

   pipe_mutex_lock(dst_vlsurface->device->mutex);
   vlVdpResolveDelayedRendering(dst_vlsurface->device, NULL, NULL);
@@ -657,7 +664,7 @@ vlVdpOutputSurfaceRenderOutputSurface(VdpOutputSurface destination_surface,

   vl_compositor_clear_layers(cstate);
   vl_compositor_set_layer_blend(cstate, 0, blend, false);
-   vl_compositor_set_rgba_layer(cstate, compositor, 0, src_vlsurface->sampler_view,
+   vl_compositor_set_rgba_layer(cstate, compositor, 0, src_sv,
                                RectToPipe(source_rect, &src_rect), NULL,
                                ColorsToPipe(colors, flags, vlcolors));
   STATIC_ASSERT(VL_COMPOSITOR_ROTATE_0 == VDP_OUTPUT_SURFACE_RENDER_ROTATE_0);
@@ -688,9 +695,9 @@ vlVdpOutputSurfaceRenderBitmapSurface(VdpOutputSurface destination_surface,
                                      uint32_t flags)
 {
   vlVdpOutputSurface *dst_vlsurface;
-   vlVdpBitmapSurface *src_vlsurface;

   struct pipe_context *context;
+   struct pipe_sampler_view *src_sv;
   struct vl_compositor *compositor;
   struct vl_compositor_state *cstate;

@@ -703,12 +710,19 @@ vlVdpOutputSurfaceRenderBitmapSurface(VdpOutputSurface destination_surface,
   if (!dst_vlsurface)
      return VDP_STATUS_INVALID_HANDLE;

-   src_vlsurface = vlGetDataHTAB(source_surface);
-   if (!src_vlsurface)
-      return VDP_STATUS_INVALID_HANDLE;
+   if (source_surface == VDP_INVALID_HANDLE) {
+      src_sv = dst_vlsurface->device->dummy_sv;

-   if (dst_vlsurface->device != src_vlsurface->device)
-      return VDP_STATUS_HANDLE_DEVICE_MISMATCH;
+   } else {
+      vlVdpBitmapSurface *src_vlsurface = vlGetDataHTAB(source_surface);
+      if (!src_vlsurface)
+         return VDP_STATUS_INVALID_HANDLE;
+
+      if (dst_vlsurface->device != src_vlsurface->device)
+         return VDP_STATUS_HANDLE_DEVICE_MISMATCH;
+
+      src_sv = src_vlsurface->sampler_view;
+   }

   context = dst_vlsurface->device->context;
   compositor = &dst_vlsurface->device->compositor;
@@ -721,7 +735,7 @@ vlVdpOutputSurfaceRenderBitmapSurface(VdpOutputSurface destination_surface,

   vl_compositor_clear_layers(cstate);
   vl_compositor_set_layer_blend(cstate, 0, blend, false);
-   vl_compositor_set_rgba_layer(cstate, compositor, 0, src_vlsurface->sampler_view,
+   vl_compositor_set_rgba_layer(cstate, compositor, 0, src_sv,
                                RectToPipe(source_rect, &src_rect), NULL,
                                ColorsToPipe(colors, flags, vlcolors));
   vl_compositor_set_layer_rotation(cstate, 0, flags & 3);
--- a/src/gallium/state_trackers/vdpau/vdpau_private.h
+++ b/src/gallium/state_trackers/vdpau/vdpau_private.h
@@ -348,6 +348,7 @@ typedef struct
   struct vl_screen *vscreen;
   struct pipe_context *context;
   struct vl_compositor compositor;
+   struct pipe_sampler_view *dummy_sv;
   pipe_mutex mutex;

   struct {
--- a/src/gallium/targets/dri/Makefile.am
+++ b/src/gallium/targets/dri/Makefile.am
@@ -26,7 +26,6 @@ gallium_dri_la_LDFLAGS = \
 	-shrext .so \
 	-module \
 	-avoid-version \
-	-Wl,--dynamic-list=$(top_srcdir)/src/gallium/targets/dri-vdpau.dyn \
 	$(GC_SECTIONS)

 if HAVE_LD_VERSION_SCRIPT
@@ -34,6 +33,11 @@ gallium_dri_la_LDFLAGS += \
 	-Wl,--version-script=$(top_srcdir)/src/gallium/targets/dri/dri.sym
 endif # HAVE_LD_VERSION_SCRIPT

+if HAVE_LD_DYNAMIC_LIST
+gallium_dri_la_LDFLAGS += \
+	-Wl,--dynamic-list=$(top_srcdir)/src/gallium/targets/dri-vdpau.dyn
+endif # HAVE_LD_DYNAMIC_LIST
+
 gallium_dri_la_LIBADD = \
 	$(top_builddir)/src/mesa/libmesagallium.la \
 	$(top_builddir)/src/mesa/drivers/dri/common/libdricommon.la \
--- a/src/gallium/targets/vdpau/Makefile.am
+++ b/src/gallium/targets/vdpau/Makefile.am
@@ -15,7 +15,6 @@ libvdpau_gallium_la_LDFLAGS = \
 	-module \
 	-no-undefined \
 	-version-number $(VDPAU_MAJOR):$(VDPAU_MINOR) \
-	-Wl,--dynamic-list=$(top_srcdir)/src/gallium/targets/dri-vdpau.dyn \
 	$(GC_SECTIONS) \
 	$(LD_NO_UNDEFINED)

@@ -24,6 +23,11 @@ libvdpau_gallium_la_LDFLAGS += \
 	-Wl,--version-script=$(top_srcdir)/src/gallium/targets/vdpau/vdpau.sym
 endif # HAVE_LD_VERSION_SCRIPT

+if HAVE_LD_DYNAMIC_LIST
+libvdpau_gallium_la_LDFLAGS += \
+	-Wl,--dynamic-list=$(top_srcdir)/src/gallium/targets/dri-vdpau.dyn
+endif # HAVE_LD_DYNAMIC_LIST
+
 libvdpau_gallium_la_LIBADD = \
 	$(top_builddir)/src/gallium/state_trackers/vdpau/libvdpautracker.la \
 	$(top_builddir)/src/gallium/auxiliary/libgallium.la \
--- a/src/gallium/winsys/svga/drm/vmw_screen_dri.c
+++ b/src/gallium/winsys/svga/drm/vmw_screen_dri.c
@@ -238,7 +238,7 @@ out_mip:

 static struct svga_winsys_surface *
 vmw_drm_surface_from_handle(struct svga_winsys_screen *sws,
-			    struct winsys_handle *whandle,
+                            struct winsys_handle *whandle,
 			    SVGA3dSurfaceFormat *format)
 {
    struct vmw_svga_winsys_surface *vsrf;
@@ -248,7 +248,8 @@ vmw_drm_surface_from_handle(struct svga_winsys_screen *sws,
    struct drm_vmw_surface_arg *req = &arg.req;
    struct drm_vmw_surface_create_req *rep = &arg.rep;
    uint32_t handle = 0;
-    SVGA3dSize size;
+    struct drm_vmw_size size;
+    SVGA3dSize base_size;
    int ret;
    int i;

@@ -274,7 +275,7 @@ vmw_drm_surface_from_handle(struct svga_winsys_screen *sws,

    memset(&arg, 0, sizeof(arg));
    req->sid = handle;
-    rep->size_addr = (size_t)&size;
+    rep->size_addr = (unsigned long)&size;

    ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_REF_SURFACE,
 			      &arg, sizeof(arg));
@@ -324,7 +325,11 @@ vmw_drm_surface_from_handle(struct svga_winsys_screen *sws,
    *format = rep->format;

    /* Estimate usage, for early flushing. */
-    vsrf->size = svga3dsurface_get_serialized_size(rep->format, size,
+
+    base_size.width = size.width;
+    base_size.height = size.height;
+    base_size.depth = size.depth;
+    vsrf->size = svga3dsurface_get_serialized_size(rep->format, base_size,
                                                   rep->mip_levels[0],
                                                   FALSE);

--- a/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c
+++ b/src/gallium/winsys/sw/kms-dri/kms_dri_sw_winsys.c
@@ -38,6 +38,7 @@
 #include <sys/mman.h>
 #include <unistd.h>
 #include <dlfcn.h>
+#include <fcntl.h>
 #include <xf86drm.h>

 #include "pipe/p_compiler.h"
@@ -121,7 +122,7 @@ kms_sw_displaytarget_create(struct sw_winsys *ws,
   int ret;

   kms_sw_dt = CALLOC_STRUCT(kms_sw_displaytarget);
-   if(!kms_sw_dt)
+   if (!kms_sw_dt)
      goto no_dt;

   kms_sw_dt->ref_count = 1;
@@ -210,6 +211,38 @@ kms_sw_displaytarget_map(struct sw_winsys *ws,
   return kms_sw_dt->mapped;
 }

+static struct kms_sw_displaytarget *
+kms_sw_displaytarget_add_from_prime(struct kms_sw_winsys *kms_sw, int fd)
+{
+   uint32_t handle = -1;
+   struct kms_sw_displaytarget * kms_sw_dt;
+   int ret;
+
+   ret = drmPrimeFDToHandle(kms_sw->fd, fd, &handle);
+
+   if (ret)
+      return NULL;
+
+   kms_sw_dt = CALLOC_STRUCT(kms_sw_displaytarget);
+   if (!kms_sw_dt)
+      return NULL;
+
+   kms_sw_dt->ref_count = 1;
+   kms_sw_dt->handle = handle;
+   kms_sw_dt->size = lseek(fd, 0, SEEK_END);
+
+   if (kms_sw_dt->size == (off_t)-1) {
+      FREE(kms_sw_dt);
+      return NULL;
+   }
+
+   lseek(fd, 0, SEEK_SET);
+
+   list_add(&kms_sw_dt->link, &kms_sw->bo_list);
+
+   return kms_sw_dt;
+}
+
 static void
 kms_sw_displaytarget_unmap(struct sw_winsys *ws,
                           struct sw_displaytarget *dt)
@@ -231,17 +264,34 @@ kms_sw_displaytarget_from_handle(struct sw_winsys *ws,
   struct kms_sw_winsys *kms_sw = kms_sw_winsys(ws);
   struct kms_sw_displaytarget *kms_sw_dt;

-   assert(whandle->type == DRM_API_HANDLE_TYPE_KMS);
+   assert(whandle->type == DRM_API_HANDLE_TYPE_KMS ||
+          whandle->type == DRM_API_HANDLE_TYPE_FD);

-   LIST_FOR_EACH_ENTRY(kms_sw_dt, &kms_sw->bo_list, link) {
-      if (kms_sw_dt->handle == whandle->handle) {
+   switch(whandle->type) {
+   case DRM_API_HANDLE_TYPE_FD:
+      kms_sw_dt = kms_sw_displaytarget_add_from_prime(kms_sw, whandle->handle);
+      if (kms_sw_dt) {
         kms_sw_dt->ref_count++;
-
-         DEBUG("KMS-DEBUG: imported buffer %u (size %u)\n", kms_sw_dt->handle, kms_sw_dt->size);
-
+         kms_sw_dt->width = templ->width0;
+         kms_sw_dt->height = templ->height0;
+         kms_sw_dt->stride = whandle->stride;
         *stride = kms_sw_dt->stride;
-         return (struct sw_displaytarget *)kms_sw_dt;
      }
+      return (struct sw_displaytarget *)kms_sw_dt;
+   case DRM_API_HANDLE_TYPE_KMS:
+      LIST_FOR_EACH_ENTRY(kms_sw_dt, &kms_sw->bo_list, link) {
+         if (kms_sw_dt->handle == whandle->handle) {
+            kms_sw_dt->ref_count++;
+
+            DEBUG("KMS-DEBUG: imported buffer %u (size %u)\n", kms_sw_dt->handle, kms_sw_dt->size);
+
+            *stride = kms_sw_dt->stride;
+            return (struct sw_displaytarget *)kms_sw_dt;
+         }
+      }
+      /* fallthrough */
+   default:
+      break;
   }

   assert(0);
@@ -253,16 +303,26 @@ kms_sw_displaytarget_get_handle(struct sw_winsys *winsys,
                                struct sw_displaytarget *dt,
                                struct winsys_handle *whandle)
 {
+   struct kms_sw_winsys *kms_sw = kms_sw_winsys(winsys);
   struct kms_sw_displaytarget *kms_sw_dt = kms_sw_displaytarget(dt);

-   if (whandle->type == DRM_API_HANDLE_TYPE_KMS) {
+   switch(whandle->type) {
+   case DRM_API_HANDLE_TYPE_KMS:
      whandle->handle = kms_sw_dt->handle;
      whandle->stride = kms_sw_dt->stride;
-   } else {
+      return TRUE;
+   case DRM_API_HANDLE_TYPE_FD:
+      if (!drmPrimeHandleToFD(kms_sw->fd, kms_sw_dt->handle,
+                             DRM_CLOEXEC, &whandle->handle)) {
+         whandle->stride = kms_sw_dt->stride;
+         return TRUE;
+      }
+      /* fallthrough */
+   default:
      whandle->handle = 0;
      whandle->stride = 0;
+      return FALSE;
   }
-   return TRUE;
 }

 static void
@@ -315,4 +375,4 @@ kms_dri_create_winsys(int fd)
   return &ws->base;
 }

-/* vim: set sw=3 ts=8 sts=3 expandtab: */
+/* vim: set sw=3 ts=8 sts=3 expandtab: */
--- a/src/glsl/Makefile.sources
+++ b/src/glsl/Makefile.sources
@@ -76,6 +76,7 @@ LIBGLSL_FILES = \
 	$(GLSL_SRCDIR)/lower_vec_index_to_swizzle.cpp \
 	$(GLSL_SRCDIR)/lower_vector.cpp \
 	$(GLSL_SRCDIR)/lower_vector_insert.cpp \
+	$(GLSL_SRCDIR)/lower_vertex_id.cpp \
 	$(GLSL_SRCDIR)/lower_output_reads.cpp \
 	$(GLSL_SRCDIR)/lower_ubo_reference.cpp \
 	$(GLSL_SRCDIR)/opt_algebraic.cpp \
--- a/src/glsl/glcpp/glcpp-lex.l
+++ b/src/glsl/glcpp/glcpp-lex.l
@@ -289,8 +289,14 @@ HEXADECIMAL_INTEGER	0[xX][0-9a-fA-F]+[uU]?
 }

 	/* Swallow empty #pragma directives, (to avoid confusing the
-	 * downstream compiler). */
-<HASH>pragma{HSPACE}*/{NEWLINE} {
+	 * downstream compiler).
+	 *
+	 * Note: We use a simple regular expression for the lookahead
+	 * here. Specifically, we cannot use the complete {NEWLINE} expression
+	 * since it uses alternation and we've found that there's a flex bug
+	 * where using alternation in the lookahead portion of a pattern
+	 * triggers a buffer overrun. */
+<HASH>pragma{HSPACE}*/[\r\n] {
 	BEGIN INITIAL;
 }

--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -125,6 +125,8 @@ bool optimize_redundant_jumps(exec_list *instructions);
 bool optimize_split_arrays(exec_list *instructions, bool linked);
 bool lower_offset_arrays(exec_list *instructions);

+bool lower_vertex_id(gl_shader *shader);
+
 ir_rvalue *
 compare_index_block(exec_list *instructions, ir_variable *index,
 		    unsigned base, unsigned components, void *mem_ctx);
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -1115,8 +1115,8 @@ move_non_declarations(exec_list *instructions, exec_node *last,
 /**
 * Get the function signature for main from a shader
 */
-static ir_function_signature *
-get_main_function_signature(gl_shader *sh)
+ir_function_signature *
+link_get_main_function_signature(gl_shader *sh)
 {
   ir_function *const f = sh->symbols->get_function("main");
   if (f != NULL) {
@@ -1644,7 +1644,7 @@ link_intrastage_shaders(void *mem_ctx,
    */
   gl_shader *main = NULL;
   for (unsigned i = 0; i < num_shaders; i++) {
-      if (get_main_function_signature(shader_list[i]) != NULL) {
+      if (link_get_main_function_signature(shader_list[i]) != NULL) {
 	 main = shader_list[i];
 	 break;
      }
@@ -1673,7 +1673,8 @@ link_intrastage_shaders(void *mem_ctx,
   /* The a pointer to the main function in the final linked shader (i.e., the
    * copy of the original shader that contained the main function).
    */
-   ir_function_signature *const main_sig = get_main_function_signature(linked);
+   ir_function_signature *const main_sig =
+      link_get_main_function_signature(linked);

   /* Move any instructions other than variable declarations or function
    * declarations into main.
@@ -1736,6 +1737,9 @@ link_intrastage_shaders(void *mem_ctx,
      }
   }

+   if (ctx->Const.VertexID_is_zero_based)
+      lower_vertex_id(linked);
+
   /* Make a pass over all variable declarations to ensure that arrays with
    * unspecified sizes have a size specified.  The size is inferred from the
    * max_array_access field.
--- a/src/glsl/linker.h
+++ b/src/glsl/linker.h
@@ -26,6 +26,9 @@
 #ifndef GLSL_LINKER_H
 #define GLSL_LINKER_H

+ir_function_signature *
+link_get_main_function_signature(gl_shader *sh);
+
 extern bool
 link_function_calls(gl_shader_program *prog, gl_shader *main,
 		    gl_shader **shader_list, unsigned num_shaders);
--- a/src/glsl/lower_variable_index_to_cond_assign.cpp
+++ b/src/glsl/lower_variable_index_to_cond_assign.cpp
@@ -76,7 +76,7 @@ compare_index_block(exec_list *instructions, ir_variable *index,
   ir_rvalue *broadcast_index = new(mem_ctx) ir_dereference_variable(index);

   assert(index->type->is_scalar());
-   assert(index->type->base_type == GLSL_TYPE_INT);
+   assert(index->type->base_type == GLSL_TYPE_INT || index->type->base_type == GLSL_TYPE_UINT);
   assert(components >= 1 && components <= 4);

   if (components > 1) {
--- a/src/glsl/lower_vertex_id.cpp
+++ b/src/glsl/lower_vertex_id.cpp
@@ -0,0 +1,144 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file lower_vertex_id.cpp
+ *
+ * There exists hardware, such as i965, that does not implement the OpenGL
+ * semantic for gl_VertexID.  Instead, that hardware does not include the
+ * value of basevertex in the gl_VertexID value.  To implement the OpenGL
+ * semantic, we'll have to convert gl_Vertex_ID to
+ * gl_VertexIDMESA+gl_BaseVertexMESA.
+ */
+
+#include "glsl_symbol_table.h"
+#include "ir_hierarchical_visitor.h"
+#include "ir.h"
+#include "ir_builder.h"
+#include "linker.h"
+#include "program/prog_statevars.h"
+
+namespace {
+
+class lower_vertex_id_visitor : public ir_hierarchical_visitor {
+public:
+   explicit lower_vertex_id_visitor(ir_function_signature *main_sig,
+                                    exec_list *ir_list)
+      : progress(false), VertexID(NULL), gl_VertexID(NULL),
+        gl_BaseVertex(NULL), main_sig(main_sig), ir_list(ir_list)
+   {
+      foreach_in_list(ir_instruction, ir, ir_list) {
+         ir_variable *const var = ir->as_variable();
+
+         if (var != NULL && var->data.mode == ir_var_system_value &&
+             var->data.location == SYSTEM_VALUE_BASE_VERTEX) {
+            gl_BaseVertex = var;
+            break;
+         }
+      }
+   }
+
+   virtual ir_visitor_status visit(ir_dereference_variable *);
+
+   bool progress;
+
+private:
+   ir_variable *VertexID;
+   ir_variable *gl_VertexID;
+   ir_variable *gl_BaseVertex;
+
+   ir_function_signature *main_sig;
+   exec_list *ir_list;
+};
+
+} /* anonymous namespace */
+
+ir_visitor_status
+lower_vertex_id_visitor::visit(ir_dereference_variable *ir)
+{
+   if (ir->var->data.mode != ir_var_system_value ||
+       ir->var->data.location != SYSTEM_VALUE_VERTEX_ID)
+      return visit_continue;
+
+   if (VertexID == NULL) {
+      const glsl_type *const int_t = glsl_type::int_type;
+      void *const mem_ctx = ralloc_parent(ir);
+
+      VertexID = new(mem_ctx) ir_variable(int_t, "__VertexID",
+                                          ir_var_temporary);
+      ir_list->push_head(VertexID);
+
+      gl_VertexID = new(mem_ctx) ir_variable(int_t, "gl_VertexIDMESA",
+                                             ir_var_system_value);
+      gl_VertexID->data.how_declared = ir_var_declared_implicitly;
+      gl_VertexID->data.read_only = true;
+      gl_VertexID->data.location = SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
+      gl_VertexID->data.explicit_location = true;
+      gl_VertexID->data.explicit_index = 0;
+      ir_list->push_head(gl_VertexID);
+
+      if (gl_BaseVertex == NULL) {
+         gl_BaseVertex = new(mem_ctx) ir_variable(int_t, "gl_BaseVertex",
+                                                  ir_var_system_value);
+         gl_BaseVertex->data.how_declared = ir_var_declared_implicitly;
+         gl_BaseVertex->data.read_only = true;
+         gl_BaseVertex->data.location = SYSTEM_VALUE_BASE_VERTEX;
+         gl_BaseVertex->data.explicit_location = true;
+         gl_BaseVertex->data.explicit_index = 0;
+         ir_list->push_head(gl_BaseVertex);
+      }
+
+      ir_instruction *const inst =
+         ir_builder::assign(VertexID,
+                            ir_builder::add(gl_VertexID, gl_BaseVertex));
+
+      main_sig->body.push_head(inst);
+   }
+
+   ir->var = VertexID;
+   progress = true;
+
+   return visit_continue;
+}
+
+bool
+lower_vertex_id(gl_shader *shader)
+{
+   /* gl_VertexID only exists in the vertex shader.
+    */
+   if (shader->Stage != MESA_SHADER_VERTEX)
+      return false;
+
+   ir_function_signature *const main_sig =
+      link_get_main_function_signature(shader);
+   if (main_sig == NULL) {
+      assert(main_sig != NULL);
+      return false;
+   }
+
+   lower_vertex_id_visitor v(main_sig, shader->ir);
+
+   v.run(shader->ir);
+
+   return v.progress;
+}
--- a/src/glsl/opt_constant_folding.cpp
+++ b/src/glsl/opt_constant_folding.cpp
@@ -79,6 +79,11 @@ ir_constant_folding_visitor::handle_rvalue(ir_rvalue **rvalue)
      }
   }

+   /* Ditto for swizzles. */
+   ir_swizzle *swiz = (*rvalue)->as_swizzle();
+   if (swiz && !swiz->val->as_constant())
+      return;
+
   ir_constant *constant = (*rvalue)->constant_expression_value();
   if (constant) {
      *rvalue = constant;
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -396,25 +396,6 @@ _mesa_meta_init(struct gl_context *ctx)
   ctx->Meta = CALLOC_STRUCT(gl_meta_state);
 }

-static GLenum
-gl_buffer_index_to_drawbuffers_enum(gl_buffer_index bufindex)
-{
-   assert(bufindex < BUFFER_COUNT);
-
-   if (bufindex >= BUFFER_COLOR0)
-      return GL_COLOR_ATTACHMENT0 + bufindex - BUFFER_COLOR0;
-   else if (bufindex == BUFFER_FRONT_LEFT)
-      return GL_FRONT_LEFT;
-   else if (bufindex == BUFFER_FRONT_RIGHT)
-      return GL_FRONT_RIGHT;
-   else if (bufindex == BUFFER_BACK_LEFT)
-      return GL_BACK_LEFT;
-   else if (bufindex == BUFFER_BACK_RIGHT)
-      return GL_BACK_RIGHT;
-
-   return GL_NONE;
-}
-
 /**
 * Free context meta-op state.
 * To be called once during context destruction.
@@ -806,20 +787,9 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
   }

   if (state & MESA_META_DRAW_BUFFERS) {
-      int buf, real_color_buffers = 0;
-      memset(save->ColorDrawBuffers, 0, sizeof(save->ColorDrawBuffers));
-
-      for (buf = 0; buf < ctx->Const.MaxDrawBuffers; buf++) {
-         int buf_index = ctx->DrawBuffer->_ColorDrawBufferIndexes[buf];
-         if (buf_index == -1)
-            continue;
-
-         save->ColorDrawBuffers[buf] =
-            gl_buffer_index_to_drawbuffers_enum(buf_index);
-
-         if (++real_color_buffers >= ctx->DrawBuffer->_NumColorDrawBuffers)
-            break;
-      }
+      struct gl_framebuffer *fb = ctx->DrawBuffer;
+      memcpy(save->ColorDrawBuffers, fb->ColorDrawBuffer,
+             sizeof(save->ColorDrawBuffers));
   }

   /* misc */
@@ -1224,7 +1194,7 @@ _mesa_meta_end(struct gl_context *ctx)
      _mesa_BindRenderbuffer(GL_RENDERBUFFER, save->RenderbufferName);

   if (state & MESA_META_DRAW_BUFFERS) {
-      _mesa_DrawBuffers(ctx->Const.MaxDrawBuffers, save->ColorDrawBuffers);
+      _mesa_drawbuffers(ctx, ctx->Const.MaxDrawBuffers, save->ColorDrawBuffers, NULL);
   }

   ctx->Meta->SaveStackDepth--;
--- a/src/mesa/drivers/common/meta_copy_image.c
+++ b/src/mesa/drivers/common/meta_copy_image.c
@@ -74,7 +74,7 @@ make_view(struct gl_context *ctx, struct gl_texture_image *tex_image,
                              tex_image->Depth,
                              0, internal_format, tex_format);

-   view_tex_obj->MinLevel = 0;
+   view_tex_obj->MinLevel = tex_image->Level;
   view_tex_obj->NumLevels = 1;
   view_tex_obj->MinLayer = tex_obj->MinLayer;
   view_tex_obj->NumLayers = tex_obj->NumLayers;
--- a/src/mesa/drivers/dri/i965/brw_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp
@@ -78,7 +78,7 @@ void
 brw_blorp_surface_info::set(struct brw_context *brw,
                            struct intel_mipmap_tree *mt,
                            unsigned int level, unsigned int layer,
-                            bool is_render_target)
+                            mesa_format format, bool is_render_target)
 {
   brw_blorp_mip_info::set(mt, level, layer);
   this->num_samples = mt->num_samples;
@@ -86,7 +86,10 @@ brw_blorp_surface_info::set(struct brw_context *brw,
   this->map_stencil_as_y_tiled = false;
   this->msaa_layout = mt->msaa_layout;

-   switch (mt->format) {
+   if (format == MESA_FORMAT_NONE)
+      format = mt->format;
+
+   switch (format) {
   case MESA_FORMAT_S_UINT8:
      /* The miptree is a W-tiled stencil buffer.  Surface states can't be set
       * up for W tiling, so we'll need to use Y tiling and have the WM
@@ -115,7 +118,7 @@ brw_blorp_surface_info::set(struct brw_context *brw,
      this->brw_surfaceformat = BRW_SURFACEFORMAT_R16_UNORM;
      break;
   default: {
-      mesa_format linear_format = _mesa_get_srgb_format_linear(mt->format);
+      mesa_format linear_format = _mesa_get_srgb_format_linear(format);
      if (is_render_target) {
         assert(brw->format_supported_as_render_target[linear_format]);
         this->brw_surfaceformat = brw->render_target_format[linear_format];
--- a/src/mesa/drivers/dri/i965/brw_blorp.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp.h
@@ -39,8 +39,10 @@ void
 brw_blorp_blit_miptrees(struct brw_context *brw,
                        struct intel_mipmap_tree *src_mt,
                        unsigned src_level, unsigned src_layer,
+                        mesa_format src_format,
                        struct intel_mipmap_tree *dst_mt,
                        unsigned dst_level, unsigned dst_layer,
+                        mesa_format dst_format,
                        float src_x0, float src_y0,
                        float src_x1, float src_y1,
                        float dst_x0, float dst_y0,
@@ -121,7 +123,7 @@ public:
   void set(struct brw_context *brw,
            struct intel_mipmap_tree *mt,
            unsigned int level, unsigned int layer,
-            bool is_render_target);
+            mesa_format format, bool is_render_target);

   uint32_t compute_tile_offsets(uint32_t *tile_x, uint32_t *tile_y) const;

@@ -346,8 +348,10 @@ public:
   brw_blorp_blit_params(struct brw_context *brw,
                         struct intel_mipmap_tree *src_mt,
                         unsigned src_level, unsigned src_layer,
+                         mesa_format src_format,
                         struct intel_mipmap_tree *dst_mt,
                         unsigned dst_level, unsigned dst_layer,
+                         mesa_format dst_format,
                         GLfloat src_x0, GLfloat src_y0,
                         GLfloat src_x1, GLfloat src_y1,
                         GLfloat dst_x0, GLfloat dst_y0,
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -56,8 +56,10 @@ void
 brw_blorp_blit_miptrees(struct brw_context *brw,
                        struct intel_mipmap_tree *src_mt,
                        unsigned src_level, unsigned src_layer,
+                        mesa_format src_format,
                        struct intel_mipmap_tree *dst_mt,
                        unsigned dst_level, unsigned dst_layer,
+                        mesa_format dst_format,
                        float src_x0, float src_y0,
                        float src_x1, float src_y1,
                        float dst_x0, float dst_y0,
@@ -84,8 +86,8 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
       mirror_x, mirror_y);

   brw_blorp_blit_params params(brw,
-                                src_mt, src_level, src_layer,
-                                dst_mt, dst_level, dst_layer,
+                                src_mt, src_level, src_layer, src_format,
+                                dst_mt, dst_level, dst_layer, dst_format,
                                src_x0, src_y0,
                                src_x1, src_y1,
                                dst_x0, dst_y0,
@@ -98,8 +100,8 @@ brw_blorp_blit_miptrees(struct brw_context *brw,

 static void
 do_blorp_blit(struct brw_context *brw, GLbitfield buffer_bit,
-              struct intel_renderbuffer *src_irb,
-              struct intel_renderbuffer *dst_irb,
+              struct intel_renderbuffer *src_irb, mesa_format src_format,
+              struct intel_renderbuffer *dst_irb, mesa_format dst_format,
              GLfloat srcX0, GLfloat srcY0, GLfloat srcX1, GLfloat srcY1,
              GLfloat dstX0, GLfloat dstY0, GLfloat dstX1, GLfloat dstY1,
              GLenum filter, bool mirror_x, bool mirror_y)
@@ -111,7 +113,9 @@ do_blorp_blit(struct brw_context *brw, GLbitfield buffer_bit,
   /* Do the blit */
   brw_blorp_blit_miptrees(brw,
                           src_mt, src_irb->mt_level, src_irb->mt_layer,
+                           src_format,
                           dst_mt, dst_irb->mt_level, dst_irb->mt_layer,
+                           dst_format,
                           srcX0, srcY0, srcX1, srcY1,
                           dstX0, dstY0, dstX1, dstY1,
                           filter, mirror_x, mirror_y);
@@ -153,8 +157,11 @@ try_blorp_blit(struct brw_context *brw,
      for (unsigned i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; ++i) {
         dst_irb = intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i]);
 	 if (dst_irb)
-            do_blorp_blit(brw, buffer_bit, src_irb, dst_irb, srcX0, srcY0,
-                          srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
+            do_blorp_blit(brw, buffer_bit,
+                          src_irb, src_irb->Base.Base.Format,
+                          dst_irb, dst_irb->Base.Base.Format,
+                          srcX0, srcY0, srcX1, srcY1,
+                          dstX0, dstY0, dstX1, dstY1,
                          filter, mirror_x, mirror_y);
      }
      break;
@@ -174,7 +181,8 @@ try_blorp_blit(struct brw_context *brw,
          (dst_mt->format == MESA_FORMAT_Z24_UNORM_X8_UINT))
         return false;

-      do_blorp_blit(brw, buffer_bit, src_irb, dst_irb, srcX0, srcY0,
+      do_blorp_blit(brw, buffer_bit, src_irb, MESA_FORMAT_NONE,
+                    dst_irb, MESA_FORMAT_NONE, srcX0, srcY0,
                    srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
                    filter, mirror_x, mirror_y);
      break;
@@ -183,7 +191,8 @@ try_blorp_blit(struct brw_context *brw,
         intel_renderbuffer(read_fb->Attachment[BUFFER_STENCIL].Renderbuffer);
      dst_irb =
         intel_renderbuffer(draw_fb->Attachment[BUFFER_STENCIL].Renderbuffer);
-      do_blorp_blit(brw, buffer_bit, src_irb, dst_irb, srcX0, srcY0,
+      do_blorp_blit(brw, buffer_bit, src_irb, MESA_FORMAT_NONE,
+                    dst_irb, MESA_FORMAT_NONE, srcX0, srcY0,
                    srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
                    filter, mirror_x, mirror_y);
      break;
@@ -219,8 +228,8 @@ brw_blorp_copytexsubimage(struct brw_context *brw,
   if (brw->gen < 6 || brw->gen >= 8)
      return false;

-   if (_mesa_get_format_base_format(src_mt->format) !=
-       _mesa_get_format_base_format(dst_mt->format)) {
+   if (_mesa_get_format_base_format(src_rb->Format) !=
+       _mesa_get_format_base_format(dst_image->TexFormat)) {
      return false;
   }

@@ -233,7 +242,7 @@ brw_blorp_copytexsubimage(struct brw_context *brw,
      return false;
   }

-   if (!brw->format_supported_as_render_target[dst_mt->format])
+   if (!brw->format_supported_as_render_target[dst_image->TexFormat])
      return false;

   /* Source clipping shouldn't be necessary, since copytexsubimage (in
@@ -268,7 +277,9 @@ brw_blorp_copytexsubimage(struct brw_context *brw,

   brw_blorp_blit_miptrees(brw,
                           src_mt, src_irb->mt_level, src_irb->mt_layer,
+                           src_rb->Format,
                           dst_mt, dst_level, dst_slice,
+                           dst_image->TexFormat,
                           srcX0, srcY0, srcX1, srcY1,
                           dstX0, dstY0, dstX1, dstY1,
                           GL_NEAREST, false, mirror_y);
@@ -291,7 +302,9 @@ brw_blorp_copytexsubimage(struct brw_context *brw,
      if (src_mt != dst_mt) {
         brw_blorp_blit_miptrees(brw,
                                 src_mt, src_irb->mt_level, src_irb->mt_layer,
+                                 src_mt->format,
                                 dst_mt, dst_level, dst_slice,
+                                 dst_mt->format,
                                 srcX0, srcY0, srcX1, srcY1,
                                 dstX0, dstY0, dstX1, dstY1,
                                 GL_NEAREST, false, mirror_y);
@@ -1822,8 +1835,10 @@ compute_msaa_layout_for_pipeline(struct brw_context *brw, unsigned num_samples,
 brw_blorp_blit_params::brw_blorp_blit_params(struct brw_context *brw,
                                             struct intel_mipmap_tree *src_mt,
                                             unsigned src_level, unsigned src_layer,
+                                             mesa_format src_format,
                                             struct intel_mipmap_tree *dst_mt,
                                             unsigned dst_level, unsigned dst_layer,
+                                             mesa_format dst_format,
                                             GLfloat src_x0, GLfloat src_y0,
                                             GLfloat src_x1, GLfloat src_y1,
                                             GLfloat dst_x0, GLfloat dst_y0,
@@ -1831,8 +1846,8 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct brw_context *brw,
                                             GLenum filter,
                                             bool mirror_x, bool mirror_y)
 {
-   src.set(brw, src_mt, src_level, src_layer, false);
-   dst.set(brw, dst_mt, dst_level, dst_layer, true);
+   src.set(brw, src_mt, src_level, src_layer, src_format, false);
+   dst.set(brw, dst_mt, dst_level, dst_layer, dst_format, true);

   /* Even though we do multisample resolves at the time of the blit, OpenGL
    * specification defines them as if they happen at the time of rendering,
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -483,6 +483,7 @@ brw_initialize_context_constants(struct brw_context *brw)
      ctx->Const.QuadsFollowProvokingVertexConvention = false;

   ctx->Const.NativeIntegers = true;
+   ctx->Const.VertexID_is_zero_based = true;

   /* Regarding the CMP instruction, the Ivybridge PRM says:
    *
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -553,6 +553,7 @@ struct brw_vs_prog_data {
   GLbitfield64 inputs_read;

   bool uses_vertexid;
+   bool uses_instanceid;
 };


@@ -1061,6 +1062,21 @@ struct brw_context
   /* Whether the last depth/stencil packets were both NULL. */
   bool no_depth_or_stencil;

+   struct {
+      /** Does the current draw use the index buffer? */
+      bool indexed;
+
+      int start_vertex_location;
+      int base_vertex_location;
+
+      /**
+       * Buffer and offset used for GL_ARB_shader_draw_parameters
+       * (for now, only gl_BaseVertex).
+       */
+      drm_intel_bo *draw_params_bo;
+      uint32_t draw_params_offset;
+   } draw;
+
   struct {
      struct brw_vertex_element inputs[VERT_ATTRIB_MAX];
      struct brw_vertex_buffer buffers[VERT_ATTRIB_MAX];
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -176,26 +176,19 @@ static void brw_emit_prim(struct brw_context *brw,
 {
   int verts_per_instance;
   int vertex_access_type;
-   int start_vertex_location;
-   int base_vertex_location;
   int indirect_flag;

   DBG("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode),
       prim->start, prim->count);

-   start_vertex_location = prim->start;
-   base_vertex_location = prim->basevertex;
   if (prim->indexed) {
      vertex_access_type = brw->gen >= 7 ?
         GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
         GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM;
-      start_vertex_location += brw->ib.start_vertex_offset;
-      base_vertex_location += brw->vb.start_vertex_bias;
   } else {
      vertex_access_type = brw->gen >= 7 ?
         GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL :
         GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
-      start_vertex_location += brw->vb.start_vertex_bias;
   }

   /* We only need to trim the primitive count on pre-Gen6. */
@@ -270,10 +263,10 @@ static void brw_emit_prim(struct brw_context *brw,
                vertex_access_type);
   }
   OUT_BATCH(verts_per_instance);
-   OUT_BATCH(start_vertex_location);
+   OUT_BATCH(brw->draw.start_vertex_location);
   OUT_BATCH(prim->num_instances);
   OUT_BATCH(prim->base_instance);
-   OUT_BATCH(base_vertex_location);
+   OUT_BATCH(brw->draw.base_vertex_location);
   ADVANCE_BATCH();

   /* Only used on Sandybridge; harmless to set elsewhere. */
@@ -436,12 +429,35 @@ static bool brw_try_draw_prims( struct gl_context *ctx,
            brw_merge_inputs(brw, arrays);
         }
      }
+
+      brw->draw.indexed = prims[i].indexed;
+      brw->draw.start_vertex_location = prims[i].start;
+      brw->draw.base_vertex_location = prims[i].basevertex;
+
+      drm_intel_bo_unreference(brw->draw.draw_params_bo);
+
+      if (prims[i].is_indirect) {
+         /* Point draw_params_bo at the indirect buffer. */
+         brw->draw.draw_params_bo =
+            intel_buffer_object(ctx->DrawIndirectBuffer)->buffer;
+         drm_intel_bo_reference(brw->draw.draw_params_bo);
+         brw->draw.draw_params_offset =
+            prims[i].indirect_offset + (prims[i].indexed ? 12 : 8);
+      } else {
+         /* Set draw_params_bo to NULL so brw_prepare_vertices knows it
+          * has to upload gl_BaseVertex and such if they're needed.
+          */
+         brw->draw.draw_params_bo = NULL;
+         brw->draw.draw_params_offset = 0;
+      }
+
      if (brw->gen < 6)
 	 brw_set_prim(brw, &prims[i]);
      else
 	 gen6_set_prim(brw, &prims[i]);

 retry:
+
      /* Note that before the loop, brw->state.dirty.brw was set to != 0, and
       * that the state updated in the loop outside of this block is that in
       * *_set_prim or intel_batchbuffer_flush(), which only impacts
--- a/src/mesa/drivers/dri/i965/brw_draw.h
+++ b/src/mesa/drivers/dri/i965/brw_draw.h
@@ -47,6 +47,8 @@ void brw_draw_prims( struct gl_context *ctx,
 void brw_draw_init( struct brw_context *brw );
 void brw_draw_destroy( struct brw_context *brw );

+void brw_prepare_shader_draw_parameters(struct brw_context *);
+
 /* brw_primitive_restart.c */
 GLboolean
 brw_handle_primitive_restart(struct gl_context *ctx,
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -604,16 +604,83 @@ brw_prepare_vertices(struct brw_context *brw)
   brw->vb.nr_buffers = j;
 }

-static void brw_emit_vertices(struct brw_context *brw)
+void
+brw_prepare_shader_draw_parameters(struct brw_context *brw)
+{
+   int *gl_basevertex_value;
+   if (brw->draw.indexed) {
+      brw->draw.start_vertex_location += brw->ib.start_vertex_offset;
+      brw->draw.base_vertex_location += brw->vb.start_vertex_bias;
+      gl_basevertex_value = &brw->draw.base_vertex_location;
+   } else {
+      brw->draw.start_vertex_location += brw->vb.start_vertex_bias;
+      gl_basevertex_value = &brw->draw.start_vertex_location;
+   }
+
+   /* For non-indirect draws, upload gl_BaseVertex. */
+   if (brw->vs.prog_data->uses_vertexid && brw->draw.draw_params_bo == NULL) {
+      intel_upload_data(brw, gl_basevertex_value, 4, 4,
+			&brw->draw.draw_params_bo,
+                        &brw->draw.draw_params_offset);
+   }
+}
+
+/**
+ * Emit a VERTEX_BUFFER_STATE entry (part of 3DSTATE_VERTEX_BUFFERS).
+ */
+static void
+emit_vertex_buffer_state(struct brw_context *brw,
+                         unsigned buffer_nr,
+                         drm_intel_bo *bo,
+                         unsigned bo_ending_address,
+                         unsigned bo_offset,
+                         unsigned stride,
+                         unsigned step_rate)
 {
   struct gl_context *ctx = &brw->ctx;
-   GLuint i, nr_elements;
+   uint32_t dw0;
+
+   if (brw->gen >= 6) {
+      dw0 = (buffer_nr << GEN6_VB0_INDEX_SHIFT) |
+            (step_rate ? GEN6_VB0_ACCESS_INSTANCEDATA
+                       : GEN6_VB0_ACCESS_VERTEXDATA);
+   } else {
+      dw0 = (buffer_nr << BRW_VB0_INDEX_SHIFT) |
+            (step_rate ? BRW_VB0_ACCESS_INSTANCEDATA
+                       : BRW_VB0_ACCESS_VERTEXDATA);
+   }
+
+   if (brw->gen >= 7)
+      dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE;
+
+   if (brw->gen == 7)
+      dw0 |= GEN7_MOCS_L3 << 16;
+
+   WARN_ONCE(stride >= (brw->gen >= 5 ? 2048 : 2047),
+             "VBO stride %d too large, bad rendering may occur\n",
+             stride);
+   OUT_BATCH(dw0 | (stride << BRW_VB0_PITCH_SHIFT));
+   OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, bo_offset);
+   if (brw->gen >= 5) {
+      OUT_RELOC(bo, I915_GEM_DOMAIN_VERTEX, 0, bo_ending_address);
+   } else {
+      OUT_BATCH(0);
+   }
+   OUT_BATCH(step_rate);
+}
+
+static void brw_emit_vertices(struct brw_context *brw)
+{
+   GLuint i;

   brw_prepare_vertices(brw);
+   brw_prepare_shader_draw_parameters(brw);

   brw_emit_query_begin(brw);

-   nr_elements = brw->vb.nr_enabled + brw->vs.prog_data->uses_vertexid;
+   unsigned nr_elements = brw->vb.nr_enabled;
+   if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid)
+      ++nr_elements;

   /* If the VS doesn't read any inputs (calculating vertex position from
    * a state variable for some reason, for example), emit a single pad
@@ -647,47 +714,33 @@ static void brw_emit_vertices(struct brw_context *brw)
   /* Now emit VB and VEP state packets.
    */

-   if (brw->vb.nr_buffers) {
+   unsigned nr_buffers =
+      brw->vb.nr_buffers + brw->vs.prog_data->uses_vertexid;
+
+   if (nr_buffers) {
      if (brw->gen >= 6) {
-	 assert(brw->vb.nr_buffers <= 33);
+	 assert(nr_buffers <= 33);
      } else {
-	 assert(brw->vb.nr_buffers <= 17);
+	 assert(nr_buffers <= 17);
      }

-      BEGIN_BATCH(1 + 4*brw->vb.nr_buffers);
-      OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4*brw->vb.nr_buffers - 1));
+      BEGIN_BATCH(1 + 4 * nr_buffers);
+      OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4 * nr_buffers - 1));
      for (i = 0; i < brw->vb.nr_buffers; i++) {
 	 struct brw_vertex_buffer *buffer = &brw->vb.buffers[i];
-	 uint32_t dw0;
+         emit_vertex_buffer_state(brw, i, buffer->bo, buffer->bo->size - 1,
+                                  buffer->offset, buffer->stride,
+                                  buffer->step_rate);

-	 if (brw->gen >= 6) {
-	    dw0 = buffer->step_rate
-	             ? GEN6_VB0_ACCESS_INSTANCEDATA
-	             : GEN6_VB0_ACCESS_VERTEXDATA;
-	    dw0 |= i << GEN6_VB0_INDEX_SHIFT;
-	 } else {
-	    dw0 = buffer->step_rate
-	             ? BRW_VB0_ACCESS_INSTANCEDATA
-	             : BRW_VB0_ACCESS_VERTEXDATA;
-	    dw0 |= i << BRW_VB0_INDEX_SHIFT;
-	 }
+      }

-	 if (brw->gen >= 7)
-	    dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE;
-
-         if (brw->gen == 7)
-	    dw0 |= GEN7_MOCS_L3 << 16;
-
-         WARN_ONCE(buffer->stride >= (brw->gen >= 5 ? 2048 : 2047),
-                   "VBO stride %d too large, bad rendering may occur\n",
-                   buffer->stride);
-	 OUT_BATCH(dw0 | (buffer->stride << BRW_VB0_PITCH_SHIFT));
-	 OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->offset);
-	 if (brw->gen >= 5) {
-	    OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->bo->size - 1);
-	 } else
-	    OUT_BATCH(0);
-	 OUT_BATCH(buffer->step_rate);
+      if (brw->vs.prog_data->uses_vertexid) {
+         emit_vertex_buffer_state(brw, brw->vb.nr_buffers,
+                                  brw->draw.draw_params_bo,
+                                  brw->draw.draw_params_bo->size - 1,
+                                  brw->draw.draw_params_offset,
+                                  0,  /* stride */
+                                  0); /* step rate */
      }
      ADVANCE_BATCH();
   }
@@ -773,18 +826,35 @@ static void brw_emit_vertices(struct brw_context *brw)
                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
   }

-   if (brw->vs.prog_data->uses_vertexid) {
+   if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) {
      uint32_t dw0 = 0, dw1 = 0;
+      uint32_t comp0 = BRW_VE1_COMPONENT_STORE_0;
+      uint32_t comp1 = BRW_VE1_COMPONENT_STORE_0;
+      uint32_t comp2 = BRW_VE1_COMPONENT_STORE_0;
+      uint32_t comp3 = BRW_VE1_COMPONENT_STORE_0;

-      dw1 = ((BRW_VE1_COMPONENT_STORE_VID << BRW_VE1_COMPONENT_0_SHIFT) |
-	     (BRW_VE1_COMPONENT_STORE_IID << BRW_VE1_COMPONENT_1_SHIFT) |
-	     (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
-	     (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
+      if (brw->vs.prog_data->uses_vertexid) {
+         comp0 = BRW_VE1_COMPONENT_STORE_SRC;
+         comp2 = BRW_VE1_COMPONENT_STORE_VID;
+      }
+
+      if (brw->vs.prog_data->uses_instanceid) {
+         comp3 = BRW_VE1_COMPONENT_STORE_IID;
+      }
+
+      dw1 = (comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
+            (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
+            (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
+            (comp3 << BRW_VE1_COMPONENT_3_SHIFT);

      if (brw->gen >= 6) {
-	 dw0 |= GEN6_VE0_VALID;
+         dw0 |= GEN6_VE0_VALID |
+                brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT |
+                BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT;
      } else {
-	 dw0 |= BRW_VE0_VALID;
+         dw0 |= BRW_VE0_VALID |
+                brw->vb.nr_buffers << BRW_VE0_INDEX_SHIFT |
+                BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT;
 	 dw1 |= (i * 4) << BRW_VE1_DST_OFFSET_SHIFT;
      }

--- a/src/mesa/drivers/dri/i965/brw_eu_compact.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_compact.c
@@ -1029,19 +1029,17 @@ update_uip_jip(struct brw_context *brw, brw_inst *insn,
 {
   int scale = brw->gen >= 8 ? sizeof(brw_compact_inst) : 1;

-   int32_t jip = brw_inst_jip(brw, insn);
-   jip -= scale *
-      compacted_between(this_old_ip, this_old_ip + jip, compacted_counts);
-   brw_inst_set_jip(brw, insn, jip);
+   int32_t jip = brw_inst_jip(brw, insn) / scale;
+   jip -= compacted_between(this_old_ip, this_old_ip + jip, compacted_counts);
+   brw_inst_set_jip(brw, insn, jip * scale);

   if (brw_inst_opcode(brw, insn) == BRW_OPCODE_ENDIF ||
       brw_inst_opcode(brw, insn) == BRW_OPCODE_WHILE)
      return;

-   int32_t uip = brw_inst_uip(brw, insn);
-   uip -= scale *
-      compacted_between(this_old_ip, this_old_ip + uip, compacted_counts);
-   brw_inst_set_uip(brw, insn, uip);
+   int32_t uip = brw_inst_uip(brw, insn) / scale;
+   uip -= compacted_between(this_old_ip, this_old_ip + uip, compacted_counts);
+   brw_inst_set_uip(brw, insn, uip * scale);
 }

 void
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1759,16 +1759,25 @@ fs_visitor::compact_virtual_grfs()
   }

   /* Patch all the references to delta_x/delta_y, since they're used in
-    * register allocation.
+    * register allocation.  If they're unused, switch them to BAD_FILE so
+    * we don't think some random VGRF is delta_x/delta_y.
    */
   for (unsigned i = 0; i < ARRAY_SIZE(delta_x); i++) {
-      if (delta_x[i].file == GRF && remap_table[delta_x[i].reg] != -1) {
-         delta_x[i].reg = remap_table[delta_x[i].reg];
+      if (delta_x[i].file == GRF) {
+         if (remap_table[delta_x[i].reg] != -1) {
+            delta_x[i].reg = remap_table[delta_x[i].reg];
+         } else {
+            delta_x[i].file = BAD_FILE;
+         }
      }
   }
   for (unsigned i = 0; i < ARRAY_SIZE(delta_y); i++) {
-      if (delta_y[i].file == GRF && remap_table[delta_y[i].reg] != -1) {
-         delta_y[i].reg = remap_table[delta_y[i].reg];
+      if (delta_y[i].file == GRF) {
+         if (remap_table[delta_y[i].reg] != -1) {
+            delta_y[i].reg = remap_table[delta_y[i].reg];
+         } else {
+            delta_y[i].file = BAD_FILE;
+         }
      }
   }
 }
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -458,6 +458,7 @@ fs_visitor::assign_regs(bool allow_spilling)
       * that register and set it to the appropriate class.
       */
      if (screen->wm_reg_sets[rsi].aligned_pairs_class >= 0 &&
+          this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].file == GRF &&
          this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg == i) {
         c = screen->wm_reg_sets[rsi].aligned_pairs_class;
      }
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -109,10 +109,10 @@ fs_visitor::visit(ir_variable *ir)
       * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
       * variables, so no need for them to be in variable_ht.
       *
-       * Atomic counters take no uniform storage, no need to do
-       * anything here.
+       * Some uniforms, such as samplers and atomic counters, have no actual
+       * storage, so we should ignore them.
       */
-      if (ir->is_in_uniform_block() || ir->type->contains_atomic())
+      if (ir->is_in_uniform_block() || type_size(ir->type) == 0)
         return;

      if (dispatch_width == 16) {
@@ -2238,7 +2238,7 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
 {
   ir_expression *expr = ir->as_expression();

-   if (!expr) {
+   if (!expr || expr->operation == ir_binop_ubo_load) {
      ir->accept(this);

      fs_inst *inst = emit(AND(reg_null_d, this->result, fs_reg(1)));
@@ -2246,10 +2246,10 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
      return;
   }

-   fs_reg op[2];
+   fs_reg op[3];
   fs_inst *inst;

-   assert(expr->get_num_operands() <= 2);
+   assert(expr->get_num_operands() <= 3);
   for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
      assert(expr->operands[i]->type->is_scalar());

@@ -2336,6 +2336,22 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
               brw_conditional_for_comparison(expr->operation)));
      break;

+   case ir_triop_csel: {
+      /* Expand the boolean condition into the flag register. */
+      inst = emit(MOV(reg_null_d, op[0]));
+      inst->conditional_mod = BRW_CONDITIONAL_NZ;
+
+      /* Select which boolean to return. */
+      fs_reg temp(this, expr->operands[1]->type);
+      inst = emit(SEL(temp, op[1], op[2]));
+      inst->predicate = BRW_PREDICATE_NORMAL;
+
+      /* Expand the result to a condition code. */
+      inst = emit(MOV(reg_null_d, temp));
+      inst->conditional_mod = BRW_CONDITIONAL_NZ;
+      break;
+   }
+
   default:
      unreachable("not reached");
   }
@@ -2350,12 +2366,12 @@ fs_visitor::emit_if_gen6(ir_if *ir)
 {
   ir_expression *expr = ir->condition->as_expression();

-   if (expr) {
-      fs_reg op[2];
+   if (expr && expr->operation != ir_binop_ubo_load) {
+      fs_reg op[3];
      fs_inst *inst;
      fs_reg temp;

-      assert(expr->get_num_operands() <= 2);
+      assert(expr->get_num_operands() <= 3);
      for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
 	 assert(expr->operands[i]->type->is_scalar());

@@ -2399,6 +2415,21 @@ fs_visitor::emit_if_gen6(ir_if *ir)
 	 emit(IF(op[0], op[1],
                 brw_conditional_for_comparison(expr->operation)));
 	 return;
+
+      case ir_triop_csel: {
+         /* Expand the boolean condition into the flag register. */
+         fs_inst *inst = emit(MOV(reg_null_d, op[0]));
+         inst->conditional_mod = BRW_CONDITIONAL_NZ;
+
+         /* Select which boolean to use as the result. */
+         fs_reg temp(this, expr->operands[1]->type);
+         inst = emit(SEL(temp, op[1], op[2]));
+         inst->predicate = BRW_PREDICATE_NORMAL;
+
+	 emit(IF(temp, fs_reg(0), BRW_CONDITIONAL_NZ));
+	 return;
+      }
+
      default:
 	 unreachable("not reached");
      }
--- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
+++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
@@ -282,6 +282,7 @@ get_fast_clear_rect(struct brw_context *brw, struct gl_framebuffer *fb,
       * factor is 2 vertically and either 2 or 8 horizontally.
       */
      switch (irb->mt->num_samples) {
+      case 2:
      case 4:
         x_scaledown = 8;
         break;
@@ -641,13 +642,19 @@ get_resolve_rect(struct brw_context *brw,
    *     with respect to render target being resolved.
    *
    * The scaledown factors in the table that follows are related to the
-    * alignment size returned by intel_get_non_msrt_mcs_alignment(), but with
-    * X and Y alignment each divided by 2.
+    * alignment size returned by intel_get_non_msrt_mcs_alignment() by a
+    * multiplier.  For IVB and HSW, we divide by two, for BDW we multiply
+    * by 8 and 16.
    */

   intel_get_non_msrt_mcs_alignment(brw, mt, &x_align, &y_align);
-   x_scaledown = x_align / 2;
-   y_scaledown = y_align / 2;
+   if (brw->gen >= 8) {
+      x_scaledown = x_align * 8;
+      y_scaledown = y_align * 16;
+   } else {
+      x_scaledown = x_align / 2;
+      y_scaledown = y_align / 2;
+   }
   rect->x0 = rect->y0 = 0;
   rect->x1 = ALIGN(mt->logical_width0, x_scaledown) / x_scaledown;
   rect->y1 = ALIGN(mt->logical_height0, y_scaledown) / y_scaledown;
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -98,7 +98,7 @@ static const struct brw_tracked_state *gen4_atoms[] =
   &brw_psp_urb_cbs,

   &brw_drawing_rect,
-   &brw_indices,
+   &brw_indices, /* must come before brw_vertices */
   &brw_index_buffer,
   &brw_vertices,

@@ -169,7 +169,7 @@ static const struct brw_tracked_state *gen6_atoms[] =

   &brw_drawing_rect,

-   &brw_indices,
+   &brw_indices, /* must come before brw_vertices */
   &brw_index_buffer,
   &brw_vertices,
 };
@@ -244,7 +244,7 @@ static const struct brw_tracked_state *gen7_atoms[] =

   &brw_drawing_rect,

-   &brw_indices,
+   &brw_indices, /* must come before brw_vertices */
   &brw_index_buffer,
   &brw_vertices,

--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -933,9 +933,9 @@ vec4_visitor::opt_set_dependency_control()
 }

 bool
-vec4_instruction::can_reswizzle_dst(int dst_writemask,
-                                    int swizzle,
-                                    int swizzle_mask)
+vec4_instruction::can_reswizzle(int dst_writemask,
+                                int swizzle,
+                                int swizzle_mask)
 {
   /* If this instruction sets anything not referenced by swizzle, then we'd
    * totally break it when we reswizzle.
@@ -977,9 +977,10 @@ vec4_instruction::can_reswizzle_dst(int dst_writemask,
 * e.g. for swizzle=yywx, MUL a.xy b c -> MUL a.yy_x b.yy z.yy_x
 */
 void
-vec4_instruction::reswizzle_dst(int dst_writemask, int swizzle)
+vec4_instruction::reswizzle(int dst_writemask, int swizzle)
 {
   int new_writemask = 0;
+   int new_swizzle[4] = { 0 };

   switch (opcode) {
   default:
@@ -996,6 +997,19 @@ vec4_instruction::reswizzle_dst(int dst_writemask, int swizzle)
         }
         break;
      }
+
+      for (int i = 0; i < 3; i++) {
+         if (src[i].file == BAD_FILE || src[i].file == IMM)
+            continue;
+
+         for (int c = 0; c < 4; c++) {
+            new_swizzle[c] = BRW_GET_SWZ(src[i].swizzle, BRW_GET_SWZ(swizzle, c));
+         }
+
+         src[i].swizzle = BRW_SWIZZLE4(new_swizzle[0], new_swizzle[1],
+                                       new_swizzle[2], new_swizzle[3]);
+      }
+
      /* fallthrough */
   case BRW_OPCODE_DP4:
   case BRW_OPCODE_DP3:
@@ -1102,9 +1116,9 @@ vec4_visitor::opt_register_coalesce()
            }

            /* If we can't handle the swizzle, bail. */
-            if (!scan_inst->can_reswizzle_dst(inst->dst.writemask,
-                                              inst->src[0].swizzle,
-                                              swizzle_mask)) {
+            if (!scan_inst->can_reswizzle(inst->dst.writemask,
+                                          inst->src[0].swizzle,
+                                          swizzle_mask)) {
               break;
            }

@@ -1190,8 +1204,8 @@ vec4_visitor::opt_register_coalesce()
 	    if (scan_inst->dst.file == GRF &&
 		scan_inst->dst.reg == inst->src[0].reg &&
 		scan_inst->dst.reg_offset == inst->src[0].reg_offset) {
-               scan_inst->reswizzle_dst(inst->dst.writemask,
-                                        inst->src[0].swizzle);
+               scan_inst->reswizzle(inst->dst.writemask,
+                                    inst->src[0].swizzle);
 	       scan_inst->dst.file = inst->dst.file;
 	       scan_inst->dst.reg = inst->dst.reg;
 	       scan_inst->dst.reg_offset = inst->dst.reg_offset;
@@ -1552,7 +1566,7 @@ vec4_vs_visitor::setup_attributes(int payload_reg)
    * don't represent it with a flag in inputs_read, so we call it
    * VERT_ATTRIB_MAX.
    */
-   if (vs_prog_data->uses_vertexid) {
+   if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid) {
      attribute_map[VERT_ATTRIB_MAX] = payload_reg + nr_attributes;
      nr_attributes++;
   }
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -220,8 +220,8 @@ public:
   bool header_present;

   bool is_send_from_grf();
-   bool can_reswizzle_dst(int dst_writemask, int swizzle, int swizzle_mask);
-   void reswizzle_dst(int dst_writemask, int swizzle);
+   bool can_reswizzle(int dst_writemask, int swizzle, int swizzle_mask);
+   void reswizzle(int dst_writemask, int swizzle);
   bool can_do_source_mods(struct brw_context *brw);

   bool reads_flag()
--- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
@@ -224,14 +224,18 @@ vec4_visitor::opt_cse_local(bblock_t *block)
            /* Kill any AEB entries using registers that don't get reused any
             * more -- a sure sign they'll fail operands_match().
             */
-            int last_reg_use = MAX2(MAX2(virtual_grf_end[src->reg * 4 + 0],
-                                         virtual_grf_end[src->reg * 4 + 1]),
-                                    MAX2(virtual_grf_end[src->reg * 4 + 2],
-                                         virtual_grf_end[src->reg * 4 + 3]));
-            if (src->file == GRF && last_reg_use < ip) {
-               entry->remove();
-               ralloc_free(entry);
-               break;
+            if (src->file == GRF) {
+               assert((src->reg * 4 + 3) < (virtual_grf_count * 4));
+
+               int last_reg_use = MAX2(MAX2(virtual_grf_end[src->reg * 4 + 0],
+                                            virtual_grf_end[src->reg * 4 + 1]),
+                                       MAX2(virtual_grf_end[src->reg * 4 + 2],
+                                            virtual_grf_end[src->reg * 4 + 3]));
+               if (last_reg_use < ip) {
+                  entry->remove();
+                  ralloc_free(entry);
+                  break;
+               }
            }
         }
      }
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -588,10 +588,10 @@ type_size(const struct glsl_type *type)
      }
      return size;
   case GLSL_TYPE_SAMPLER:
-      /* Samplers take up one slot in UNIFORMS[], but they're baked in
-       * at link time.
+      /* Samplers take up no register space, since they're baked in at
+       * link time.
       */
-      return 1;
+      return 0;
   case GLSL_TYPE_ATOMIC_UINT:
      return 0;
   case GLSL_TYPE_IMAGE:
@@ -776,11 +776,11 @@ vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir,

   *predicate = BRW_PREDICATE_NORMAL;

-   if (expr) {
-      src_reg op[2];
+   if (expr && expr->operation != ir_binop_ubo_load) {
+      src_reg op[3];
      vec4_instruction *inst;

-      assert(expr->get_num_operands() <= 2);
+      assert(expr->get_num_operands() <= 3);
      for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
 	 expr->operands[i]->accept(this);
 	 op[i] = this->result;
@@ -852,6 +852,22 @@ vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir,
 		  brw_conditional_for_comparison(expr->operation)));
 	 break;

+      case ir_triop_csel: {
+         /* Expand the boolean condition into the flag register. */
+         inst = emit(MOV(dst_null_d(), op[0]));
+         inst->conditional_mod = BRW_CONDITIONAL_NZ;
+
+         /* Select which boolean to return. */
+         dst_reg temp(this, expr->operands[1]->type);
+         inst = emit(BRW_OPCODE_SEL, temp, op[1], op[2]);
+         inst->predicate = BRW_PREDICATE_NORMAL;
+
+         /* Expand the result to a condition code. */
+         inst = emit(MOV(dst_null_d(), src_reg(temp)));
+         inst->conditional_mod = BRW_CONDITIONAL_NZ;
+         break;
+      }
+
      default:
 	 unreachable("not reached");
      }
@@ -881,11 +897,11 @@ vec4_visitor::emit_if_gen6(ir_if *ir)
 {
   ir_expression *expr = ir->condition->as_expression();

-   if (expr) {
-      src_reg op[2];
+   if (expr && expr->operation != ir_binop_ubo_load) {
+      src_reg op[3];
      dst_reg temp;

-      assert(expr->get_num_operands() <= 2);
+      assert(expr->get_num_operands() <= 3);
      for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
 	 expr->operands[i]->accept(this);
 	 op[i] = this->result;
@@ -945,6 +961,20 @@ vec4_visitor::emit_if_gen6(ir_if *ir)
 	 emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
 	 return;

+      case ir_triop_csel: {
+         /* Expand the boolean condition into the flag register. */
+         vec4_instruction *inst = emit(MOV(dst_null_d(), op[0]));
+         inst->conditional_mod = BRW_CONDITIONAL_NZ;
+
+         /* Select which boolean to return. */
+         dst_reg temp(this, expr->operands[1]->type);
+         inst = emit(BRW_OPCODE_SEL, temp, op[1], op[2]);
+         inst->predicate = BRW_PREDICATE_NORMAL;
+
+         emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
+         return;
+      }
+
      default:
 	 unreachable("not reached");
      }
@@ -993,10 +1023,10 @@ vec4_visitor::visit(ir_variable *ir)
       * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
       * variables, so no need for them to be in variable_ht.
       *
-       * Atomic counters take no uniform storage, no need to do
-       * anything here.
+       * Some uniforms, such as samplers and atomic counters, have no actual
+       * storage, so we should ignore them.
       */
-      if (ir->is_in_uniform_block() || ir->type->contains_atomic())
+      if (ir->is_in_uniform_block() || type_size(ir->type) == 0)
         return;

      /* Track how big the whole uniform variable is, in case we need to put a
@@ -1135,6 +1165,13 @@ vec4_visitor::try_emit_mad(ir_expression *ir)
 bool
 vec4_visitor::try_emit_b2f_of_compare(ir_expression *ir)
 {
+   /* This optimization relies on CMP setting the destination to 0 when
+    * false.  Early hardware only sets the least significant bit, and
+    * leaves the other bits undefined.  So we can't use it.
+    */
+   if (brw->gen < 6)
+      return false;
+
   ir_expression *const cmp = ir->operands[0]->as_expression();

   if (cmp == NULL)
@@ -2589,7 +2626,7 @@ vec4_visitor::visit(ir_texture *ir)
      } else if (ir->op == ir_txf_ms) {
         emit(MOV(dst_reg(MRF, param_base + 1, sample_index_type, WRITEMASK_X),
                  sample_index));
-         if (brw->gen >= 7)
+         if (brw->gen >= 7) {
            /* MCS data is in the first channel of `mcs`, but we need to get it into
             * the .y channel of the second vec4 of params, so replicate .x across
             * the whole vec4 and then mask off everything except .y
@@ -2597,6 +2634,7 @@ vec4_visitor::visit(ir_texture *ir)
            mcs.swizzle = BRW_SWIZZLE_XXXX;
            emit(MOV(dst_reg(MRF, param_base + 1, glsl_type::uint_type, WRITEMASK_Y),
                     mcs));
+         }
         inst->mlen++;
      } else if (ir->op == ir_txd) {
 	 const glsl_type *type = lod_type;
--- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
@@ -151,14 +151,20 @@ vec4_vs_visitor::make_reg_for_system_value(ir_variable *ir)
    * it VERT_ATTRIB_MAX, which setup_attributes() picks up on.
    */
   dst_reg *reg = new(mem_ctx) dst_reg(ATTR, VERT_ATTRIB_MAX);
-   vs_prog_data->uses_vertexid = true;

   switch (ir->data.location) {
-   case SYSTEM_VALUE_VERTEX_ID:
+   case SYSTEM_VALUE_BASE_VERTEX:
      reg->writemask = WRITEMASK_X;
+      vs_prog_data->uses_vertexid = true;
+      break;
+   case SYSTEM_VALUE_VERTEX_ID:
+   case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
+      reg->writemask = WRITEMASK_Z;
+      vs_prog_data->uses_vertexid = true;
      break;
   case SYSTEM_VALUE_INSTANCE_ID:
-      reg->writemask = WRITEMASK_Y;
+      reg->writemask = WRITEMASK_W;
+      vs_prog_data->uses_instanceid = true;
      break;
   default:
      unreachable("not reached");
--- a/src/mesa/drivers/dri/i965/gen6_clip_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c
@@ -98,6 +98,37 @@ upload_clip_state(struct brw_context *brw)

   dw2 |= GEN6_CLIP_GB_TEST;

+   /* We need to disable guardband clipping if the guardband (which we always
+    * program to the maximum screen-space bounding box of 8K x 8K) will be
+    * smaller than the viewport.
+    *
+    * Closely examining the clip determination formulas in the documentation
+    * reveals that objects will be discarded entirely if they're outside the
+    * (small) guardband, even if they're within the (large) viewport:
+    *
+    *     TR = TR_GB || TR_VPXY || TR_VPZ || TR_UC || TR_NEGW
+    *     TA   = !TR && TA_GB && TA_VPZ && TA_NEGW
+    *     MC = !(TA || TR)
+    *
+    * (TA is "Trivial Accept", TR is "Trivial Reject", MC is "Must Clip".)
+    *
+    * Disabling guardband clipping removes the TR_GB condition, which means
+    * they'll be considered MC ("Must Clip") unless they're rejected for
+    * some other reason.
+    *
+    * Note that there is no TA_VPXY condition.  If there were, objects entirely
+    * inside a 16384x16384 viewport would be trivially accepted, breaking the
+    * "objects must have a screenspace bounding box not exceeding 8K in the X
+    * or Y direction" restriction.  Instead, they're clipped.
+    */
+   for (unsigned i = 0; i < ctx->Const.MaxViewports; i++) {
+      if (ctx->ViewportArray[i].Width > 8192 ||
+          ctx->ViewportArray[i].Height > 8192) {
+         dw2 &= ~GEN6_CLIP_GB_TEST;
+         break;
+      }
+   }
+
   /* If the viewport dimensions are smaller than the drawable dimensions,
    * we have to disable guardband clipping prior to Gen8.  We always program
    * the guardband to a fixed size, which is almost always larger than the
--- a/src/mesa/drivers/dri/i965/gen6_queryobj.c
+++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c
@@ -84,11 +84,16 @@ brw_store_register_mem64(struct brw_context *brw,

 static void
 write_primitives_generated(struct brw_context *brw,
-                           drm_intel_bo *query_bo, int idx)
+                           drm_intel_bo *query_bo, int stream, int idx)
 {
   intel_batchbuffer_emit_mi_flush(brw);

-   brw_store_register_mem64(brw, query_bo, CL_INVOCATION_COUNT, idx);
+   if (brw->gen >= 7 && stream > 0) {
+      brw_store_register_mem64(brw, query_bo,
+                               GEN7_SO_PRIM_STORAGE_NEEDED(stream), idx);
+   } else {
+      brw_store_register_mem64(brw, query_bo, CL_INVOCATION_COUNT, idx);
+   }
 }

 static void
@@ -239,7 +244,7 @@ gen6_begin_query(struct gl_context *ctx, struct gl_query_object *q)
      break;

   case GL_PRIMITIVES_GENERATED:
-      write_primitives_generated(brw, query->bo, 0);
+      write_primitives_generated(brw, query->bo, query->Base.Stream, 0);
      break;

   case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
@@ -277,7 +282,7 @@ gen6_end_query(struct gl_context *ctx, struct gl_query_object *q)
      break;

   case GL_PRIMITIVES_GENERATED:
-      write_primitives_generated(brw, query->bo, 1);
+      write_primitives_generated(brw, query->bo, query->Base.Stream, 1);
      break;

   case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
--- a/src/mesa/drivers/dri/i965/gen8_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/gen8_draw_upload.c
@@ -41,8 +41,9 @@ gen8_emit_vertices(struct brw_context *brw)
   struct gl_context *ctx = &brw->ctx;

   brw_prepare_vertices(brw);
+   brw_prepare_shader_draw_parameters(brw);

-   if (brw->vs.prog_data->uses_vertexid) {
+   if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) {
      unsigned vue = brw->vb.nr_enabled;

      WARN_ONCE(brw->vs.prog_data->inputs_read & VERT_BIT_EDGEFLAG,
@@ -52,14 +53,28 @@ gen8_emit_vertices(struct brw_context *brw)
                "Trying to insert VID/IID past 33rd vertex element, "
                "need to reorder the vertex attrbutes.");

+      unsigned dw1 = 0;
+      if (brw->vs.prog_data->uses_vertexid) {
+         dw1 |= GEN8_SGVS_ENABLE_VERTEX_ID |
+                (2 << GEN8_SGVS_VERTEX_ID_COMPONENT_SHIFT) |  /* .z channel */
+                (vue << GEN8_SGVS_VERTEX_ID_ELEMENT_OFFSET_SHIFT);
+      }
+
+      if (brw->vs.prog_data->uses_instanceid) {
+         dw1 |= GEN8_SGVS_ENABLE_INSTANCE_ID |
+                (3 << GEN8_SGVS_INSTANCE_ID_COMPONENT_SHIFT) | /* .w channel */
+                (vue << GEN8_SGVS_INSTANCE_ID_ELEMENT_OFFSET_SHIFT);
+      }
+
      BEGIN_BATCH(2);
      OUT_BATCH(_3DSTATE_VF_SGVS << 16 | (2 - 2));
-      OUT_BATCH(GEN8_SGVS_ENABLE_VERTEX_ID |
-                (0 << GEN8_SGVS_VERTEX_ID_COMPONENT_SHIFT) |   /* .x channel */
-                (vue << GEN8_SGVS_VERTEX_ID_ELEMENT_OFFSET_SHIFT) |
-                GEN8_SGVS_ENABLE_INSTANCE_ID |
-                (1 << GEN8_SGVS_INSTANCE_ID_COMPONENT_SHIFT) | /* .y channel */
-                (vue << GEN8_SGVS_INSTANCE_ID_ELEMENT_OFFSET_SHIFT));
+      OUT_BATCH(dw1);
+      ADVANCE_BATCH();
+
+      BEGIN_BATCH(3);
+      OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2));
+      OUT_BATCH(brw->vb.nr_buffers | GEN8_VF_INSTANCING_ENABLE);
+      OUT_BATCH(0);
      ADVANCE_BATCH();
   } else {
      BEGIN_BATCH(2);
@@ -91,11 +106,12 @@ gen8_emit_vertices(struct brw_context *brw)
   }

   /* Now emit 3DSTATE_VERTEX_BUFFERS and 3DSTATE_VERTEX_ELEMENTS packets. */
-   if (brw->vb.nr_buffers) {
-      assert(brw->vb.nr_buffers <= 33);
+   unsigned nr_buffers = brw->vb.nr_buffers + brw->vs.prog_data->uses_vertexid;
+   if (nr_buffers) {
+      assert(nr_buffers <= 33);

-      BEGIN_BATCH(1 + 4*brw->vb.nr_buffers);
-      OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4*brw->vb.nr_buffers - 1));
+      BEGIN_BATCH(1 + 4 * nr_buffers);
+      OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4 * nr_buffers - 1));
      for (unsigned i = 0; i < brw->vb.nr_buffers; i++) {
         struct brw_vertex_buffer *buffer = &brw->vb.buffers[i];
         uint32_t dw0 = 0;
@@ -109,10 +125,19 @@ gen8_emit_vertices(struct brw_context *brw)
         OUT_RELOC64(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->offset);
         OUT_BATCH(buffer->bo->size);
      }
+
+      if (brw->vs.prog_data->uses_vertexid) {
+         OUT_BATCH(brw->vb.nr_buffers << GEN6_VB0_INDEX_SHIFT |
+                   GEN7_VB0_ADDRESS_MODIFYENABLE |
+                   BDW_MOCS_WB << 16);
+         OUT_RELOC64(brw->draw.draw_params_bo, I915_GEM_DOMAIN_VERTEX, 0,
+                     brw->draw.draw_params_offset);
+         OUT_BATCH(brw->draw.draw_params_bo->size);
+      }
      ADVANCE_BATCH();
   }

-   unsigned nr_elements = brw->vb.nr_enabled;
+   unsigned nr_elements = brw->vb.nr_enabled + brw->vs.prog_data->uses_vertexid;

   /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS,
    * presumably for VertexID/InstanceID.
@@ -180,6 +205,16 @@ gen8_emit_vertices(struct brw_context *brw)
                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
   }
+
+   if (brw->vs.prog_data->uses_vertexid) {
+      OUT_BATCH(GEN6_VE0_VALID |
+                brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT |
+                BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT);
+      OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
+                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
+                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
+                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
+   }
   ADVANCE_BATCH();

   for (unsigned i = 0; i < brw->vb.nr_enabled; i++) {
--- a/src/mesa/drivers/dri/i965/intel_copy_image.c
+++ b/src/mesa/drivers/dri/i965/intel_copy_image.c
@@ -40,6 +40,7 @@ copy_image_with_blitter(struct brw_context *brw,
                        int src_width, int src_height)
 {
   GLuint bw, bh;
+   uint32_t src_image_x, src_image_y, dst_image_x, dst_image_y;
   int cpp;

   /* The blitter doesn't understand multisampling at all. */
@@ -70,43 +71,53 @@ copy_image_with_blitter(struct brw_context *brw,
      return false;
   }

+   intel_miptree_get_image_offset(src_mt, src_level, src_z,
+                                  &src_image_x, &src_image_y);
+
   if (_mesa_is_format_compressed(src_mt->format)) {
      _mesa_get_format_block_size(src_mt->format, &bw, &bh);

      assert(src_x % bw == 0);
-      assert(src_y % bw == 0);
+      assert(src_y % bh == 0);
      assert(src_width % bw == 0);
-      assert(src_height % bw == 0);
+      assert(src_height % bh == 0);

      src_x /= (int)bw;
-      src_y /= (int)bw;
+      src_y /= (int)bh;
      src_width /= (int)bw;
-      src_height /= (int)bw;
+      src_height /= (int)bh;
+
+      /* Inside of the miptree, the x offsets are stored in pixels while
+       * the y offsets are stored in blocks.  We need to scale just the x
+       * offset.
+       */
+      src_image_x /= bw;

      cpp = _mesa_get_format_bytes(src_mt->format);
   } else {
      cpp = src_mt->cpp;
   }
+   src_x += src_image_x;
+   src_y += src_image_y;
+
+   intel_miptree_get_image_offset(dst_mt, dst_level, dst_z,
+                                  &dst_image_x, &dst_image_y);

   if (_mesa_is_format_compressed(dst_mt->format)) {
      _mesa_get_format_block_size(dst_mt->format, &bw, &bh);

      assert(dst_x % bw == 0);
-      assert(dst_y % bw == 0);
+      assert(dst_y % bh == 0);

      dst_x /= (int)bw;
-      dst_y /= (int)bw;
+      dst_y /= (int)bh;
+
+      /* Inside of the miptree, the x offsets are stored in pixels while
+       * the y offsets are stored in blocks.  We need to scale just the x
+       * offset.
+       */
+      dst_image_x /= bw;
   }
-
-   uint32_t src_image_x, src_image_y;
-   intel_miptree_get_image_offset(src_mt, src_level, src_z,
-                                  &src_image_x, &src_image_y);
-   src_x += src_image_x;
-   src_y += src_image_y;
-
-   uint32_t dst_image_x, dst_image_y;
-   intel_miptree_get_image_offset(dst_mt, dst_level, dst_z,
-                                  &dst_image_x, &dst_image_y);
   dst_x += dst_image_x;
   dst_y += dst_image_y;

@@ -243,9 +254,11 @@ intel_copy_image_sub_data(struct gl_context *ctx,
   intel_miptree_all_slices_resolve_depth(brw, intel_dst_image->mt);
   intel_miptree_resolve_color(brw, intel_dst_image->mt);

-   if (copy_image_with_blitter(brw, intel_src_image->mt, src_image->Level,
+   unsigned src_level = src_image->Level + src_image->TexObject->MinLevel;
+   unsigned dst_level = dst_image->Level + dst_image->TexObject->MinLevel;
+   if (copy_image_with_blitter(brw, intel_src_image->mt, src_level,
                               src_x, src_y, src_z,
-                               intel_dst_image->mt, src_image->Level,
+                               intel_dst_image->mt, dst_level,
                               dst_x, dst_y, dst_z,
                               src_width, src_height))
      return;
@@ -253,9 +266,9 @@ intel_copy_image_sub_data(struct gl_context *ctx,
   /* This is a worst-case scenario software fallback that maps the two
    * textures and does a memcpy between them.
    */
-   copy_image_with_memcpy(brw, intel_src_image->mt, src_image->Level,
+   copy_image_with_memcpy(brw, intel_src_image->mt, src_level,
                          src_x, src_y, src_z,
-                          intel_dst_image->mt, src_image->Level,
+                          intel_dst_image->mt, dst_level,
                          dst_x, dst_y, dst_z,
                          src_width, src_height);
 }
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -1677,8 +1677,8 @@ intel_miptree_updownsample(struct brw_context *brw,
 {
   if (brw->gen < 8) {
      brw_blorp_blit_miptrees(brw,
-                              src, 0 /* level */, 0 /* layer */,
-                              dst, 0 /* level */, 0 /* layer */,
+                              src, 0 /* level */, 0 /* layer */, src->format,
+                              dst, 0 /* level */, 0 /* layer */, dst->format,
                              0, 0,
                              src->logical_width0, src->logical_height0,
                              0, 0,
@@ -1698,7 +1698,9 @@ intel_miptree_updownsample(struct brw_context *brw,

      brw_blorp_blit_miptrees(brw,
                              src->stencil_mt, 0 /* level */, 0 /* layer */,
+                              src->stencil_mt->format,
                              dst->stencil_mt, 0 /* level */, 0 /* layer */,
+                              dst->stencil_mt->format,
                              0, 0,
                              src->logical_width0, src->logical_height0,
                              0, 0,
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -217,12 +217,18 @@ static struct intel_image_format intel_image_formats[] = {
   { __DRI_IMAGE_FOURCC_ARGB8888, __DRI_IMAGE_COMPONENTS_RGBA, 1,
     { { 0, 0, 0, __DRI_IMAGE_FORMAT_ARGB8888, 4 } } },

+   { __DRI_IMAGE_FOURCC_ABGR8888, __DRI_IMAGE_COMPONENTS_RGBA, 1,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_ABGR8888, 4 } } },
+
   { __DRI_IMAGE_FOURCC_SARGB8888, __DRI_IMAGE_COMPONENTS_RGBA, 1,
     { { 0, 0, 0, __DRI_IMAGE_FORMAT_SARGB8, 4 } } },

   { __DRI_IMAGE_FOURCC_XRGB8888, __DRI_IMAGE_COMPONENTS_RGB, 1,
     { { 0, 0, 0, __DRI_IMAGE_FORMAT_XRGB8888, 4 }, } },

+   { __DRI_IMAGE_FOURCC_XBGR8888, __DRI_IMAGE_COMPONENTS_RGB, 1,
+     { { 0, 0, 0, __DRI_IMAGE_FORMAT_XBGR8888, 4 }, } },
+
   { __DRI_IMAGE_FOURCC_RGB565, __DRI_IMAGE_COMPONENTS_RGB, 1,
     { { 0, 0, 0, __DRI_IMAGE_FORMAT_RGB565, 2 } } },

--- a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
+++ b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
@@ -241,8 +241,5 @@ TEST_F(register_coalesce_test, test_channel_mul_grf)

   register_coalesce(v);

-   /* This path isn't supported yet in the reswizzling code, so we're checking
-    * that we haven't done anything bad to scalar non-DP[234]s.
-    */
-   EXPECT_NE(mul->dst.reg, to.reg);
+   EXPECT_EQ(mul->dst.reg, to.reg);
 }
--- a/src/mesa/drivers/dri/nouveau/nouveau_render_t.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_render_t.c
@@ -56,21 +56,21 @@
 */
 #define EMIT_VBO(out, ctx, start, delta, n) do {			\
 		struct nouveau_render_state *render = to_render_state(ctx); \
-		int npush = n;						\
+		int _npush = n;						\
 									\
-		while (npush) {						\
-			int npack = MIN2(npush, MAX_PACKET * MAX_OUT_##out); \
-			npush -= npack;					\
+		while (_npush) {						\
+			int _npack = MIN2(_npush, MAX_PACKET * MAX_OUT_##out); \
+			_npush -= _npack;					\
 									\
-			BATCH_PACKET_##out((npack + MAX_OUT_##out - 1)	\
+			BATCH_PACKET_##out((_npack + MAX_OUT_##out - 1)	\
 					   / MAX_OUT_##out);		\
-			while (npack) {					\
-				int nout = MIN2(npack, MAX_OUT_##out);	\
-				npack -= nout;				\
+			while (_npack) {				\
+				int _nout = MIN2(_npack, MAX_OUT_##out);\
+				_npack -= _nout;			\
 									\
 				OUT_INDICES_##out(render, start, delta, \
-						  nout);		\
-				start += nout;				\
+						  _nout);		\
+				start += _nout;				\
 			}						\
 		}							\
 	} while (0)
--- a/src/mesa/drivers/dri/nouveau/nv04_state_fb.c
+++ b/src/mesa/drivers/dri/nouveau/nv04_state_fb.c
@@ -93,6 +93,7 @@ nv04_emit_framebuffer(struct gl_context *ctx, int emit)

 	/* Recompute the scissor state. */
 	context_dirty(ctx, SCISSOR);
+	context_dirty(ctx, CONTROL);
 }

 void
--- a/src/mesa/drivers/dri/nouveau/nv04_state_raster.c
+++ b/src/mesa/drivers/dri/nouveau/nv04_state_raster.c
@@ -123,6 +123,7 @@ void
 nv04_emit_control(struct gl_context *ctx, int emit)
 {
 	struct nv04_context *nv04 = to_nv04_context(ctx);
+	struct gl_framebuffer *fb = ctx->DrawBuffer;
 	int cull = ctx->Polygon.CullFaceMode;
 	int front = ctx->Polygon.FrontFace;

@@ -146,9 +147,9 @@ nv04_emit_control(struct gl_context *ctx, int emit)
 				 NV04_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_CCW;

 	/* Depth test. */
-	if (ctx->Depth.Test)
+	if (ctx->Depth.Test && fb->Visual.depthBits > 0)
 		nv04->ctrl[0] |= NV04_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE;
-	if (ctx->Depth.Mask)
+	if (ctx->Depth.Mask && fb->Visual.depthBits > 0)
 		nv04->ctrl[0] |= NV04_TEXTURED_TRIANGLE_CONTROL_Z_WRITE;

 	nv04->ctrl[0] |= get_comparison_op(ctx->Depth.Func) << 16;
@@ -174,7 +175,7 @@ nv04_emit_control(struct gl_context *ctx, int emit)
 	if (ctx->Stencil.WriteMask[0])
 		nv04->ctrl[0] |= NV04_MULTITEX_TRIANGLE_CONTROL0_STENCIL_WRITE;

-	if (ctx->Stencil.Enabled)
+	if (ctx->Stencil._Enabled)
 		nv04->ctrl[1] |= NV04_MULTITEX_TRIANGLE_CONTROL1_STENCIL_ENABLE;

 	nv04->ctrl[1] |= get_comparison_op(ctx->Stencil.Function[0]) << 4 |
--- a/src/mesa/drivers/dri/nouveau/nv10_state_fb.c
+++ b/src/mesa/drivers/dri/nouveau/nv10_state_fb.c
@@ -152,6 +152,7 @@ nv10_emit_framebuffer(struct gl_context *ctx, int emit)

 	context_dirty(ctx, VIEWPORT);
 	context_dirty(ctx, SCISSOR);
+	context_dirty(ctx, DEPTH);
 }

 void
--- a/src/mesa/drivers/dri/nouveau/nv10_state_raster.c
+++ b/src/mesa/drivers/dri/nouveau/nv10_state_raster.c
@@ -97,11 +97,12 @@ void
 nv10_emit_depth(struct gl_context *ctx, int emit)
 {
 	struct nouveau_pushbuf *push = context_push(ctx);
+	struct gl_framebuffer *fb = ctx->DrawBuffer;

 	BEGIN_NV04(push, NV10_3D(DEPTH_TEST_ENABLE), 1);
-	PUSH_DATAb(push, ctx->Depth.Test);
+	PUSH_DATAb(push, ctx->Depth.Test && fb->Visual.depthBits > 0);
 	BEGIN_NV04(push, NV10_3D(DEPTH_WRITE_ENABLE), 1);
-	PUSH_DATAb(push, ctx->Depth.Mask);
+	PUSH_DATAb(push, ctx->Depth.Mask && fb->Visual.depthBits > 0);
 	BEGIN_NV04(push, NV10_3D(DEPTH_FUNC), 1);
 	PUSH_DATA (push, nvgl_comparison_op(ctx->Depth.Func));
 }
@@ -144,7 +145,7 @@ nv10_emit_stencil_func(struct gl_context *ctx, int emit)
 	struct nouveau_pushbuf *push = context_push(ctx);

 	BEGIN_NV04(push, NV10_3D(STENCIL_ENABLE), 1);
-	PUSH_DATAb(push, ctx->Stencil.Enabled);
+	PUSH_DATAb(push, ctx->Stencil._Enabled);

 	BEGIN_NV04(push, NV10_3D(STENCIL_FUNC_FUNC), 3);
 	PUSH_DATA (push, nvgl_comparison_op(ctx->Stencil.Function[0]));
--- a/src/mesa/drivers/dri/nouveau/nv20_state_fb.c
+++ b/src/mesa/drivers/dri/nouveau/nv20_state_fb.c
@@ -128,6 +128,7 @@ nv20_emit_framebuffer(struct gl_context *ctx, int emit)
 	/* Recompute the viewport/scissor state. */
 	context_dirty(ctx, VIEWPORT);
 	context_dirty(ctx, SCISSOR);
+	context_dirty(ctx, DEPTH);
 }

 void
--- a/src/mesa/main/attrib.c
+++ b/src/mesa/main/attrib.c
@@ -1488,6 +1488,10 @@ copy_array_attrib(struct gl_context *ctx,

   /* skip ArrayBufferObj */
   /* skip IndexBufferObj */
+
+   /* Invalidate draw state. It will be updated during the next draw. */
+   dest->DrawMethod = DRAW_NONE;
+   dest->_DrawArrays = NULL;
 }

 /**
--- a/src/mesa/main/context.c
+++ b/src/mesa/main/context.c
@@ -622,6 +622,12 @@ _mesa_init_constants(struct gl_constants *consts, gl_api api)
   consts->MaxProgramMatrices = MAX_PROGRAM_MATRICES;
   consts->MaxProgramMatrixStackDepth = MAX_PROGRAM_MATRIX_STACK_DEPTH;

+   /* Assume that if GLSL 1.30+ (or GLSL ES 3.00+) is supported that
+    * gl_VertexID is implemented using a native hardware register with OpenGL
+    * semantics.
+    */
+   consts->VertexID_is_zero_based = false;
+
   /* CheckArrayBounds is overriden by drivers/x11 for X server */
   consts->CheckArrayBounds = GL_FALSE;

@@ -653,6 +659,9 @@ _mesa_init_constants(struct gl_constants *consts, gl_api api)
   /* GL_ARB_framebuffer_object */
   consts->MaxSamples = 0;

+   /* GLSL default if NativeIntegers == FALSE */
+   consts->UniformBooleanTrue = FLT_AS_UINT(1.0f);
+
   /* GL_ARB_sync */
   consts->MaxServerWaitTimeout = 0x1fff7fffffffULL;

--- a/src/mesa/main/macros.h
+++ b/src/mesa/main/macros.h
@@ -184,6 +184,13 @@ static inline GLfloat UINT_AS_FLT(GLuint u)
   return tmp.f;
 }

+static inline unsigned FLT_AS_UINT(float f)
+{
+   fi_type tmp;
+   tmp.f = f;
+   return tmp.u;
+}
+
 /**
 * Convert a floating point value to an unsigned fixed point value.
 *
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2055,8 +2055,90 @@ typedef enum
    * \name Vertex shader system values
    */
   /*@{*/
+   /**
+    * OpenGL-style vertex ID.
+    *
+    * Section 2.11.7 (Shader Execution), subsection Shader Inputs, of the
+    * OpenGL 3.3 core profile spec says:
+    *
+    *     "gl_VertexID holds the integer index i implicitly passed by
+    *     DrawArrays or one of the other drawing commands defined in section
+    *     2.8.3."
+    *
+    * Section 2.8.3 (Drawing Commands) of the same spec says:
+    *
+    *     "The commands....are equivalent to the commands with the same base
+    *     name (without the BaseVertex suffix), except that the ith element
+    *     transferred by the corresponding draw call will be taken from
+    *     element indices[i] + basevertex of each enabled array."
+    *
+    * Additionally, the overview in the GL_ARB_shader_draw_parameters spec
+    * says:
+    *
+    *     "In unextended GL, vertex shaders have inputs named gl_VertexID and
+    *     gl_InstanceID, which contain, respectively the index of the vertex
+    *     and instance. The value of gl_VertexID is the implicitly passed
+    *     index of the vertex being processed, which includes the value of
+    *     baseVertex, for those commands that accept it."
+    *
+    * gl_VertexID gets basevertex added in.  This differs from DirectX where
+    * SV_VertexID does \b not get basevertex added in.
+    *
+    * \note
+    * If all system values are available, \c SYSTEM_VALUE_VERTEX_ID will be
+    * equal to \c SYSTEM_VALUE_VERTEX_ID_ZERO_BASE plus
+    * \c SYSTEM_VALUE_BASE_VERTEX.
+    *
+    * \sa SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, SYSTEM_VALUE_BASE_VERTEX
+    */
   SYSTEM_VALUE_VERTEX_ID,
+
+   /**
+    * Instanced ID as supplied to gl_InstanceID
+    *
+    * Values assigned to gl_InstanceID always begin with zero, regardless of
+    * the value of baseinstance.
+    *
+    * Section 11.1.3.9 (Shader Inputs) of the OpenGL 4.4 core profile spec
+    * says:
+    *
+    *     "gl_InstanceID holds the integer instance number of the current
+    *     primitive in an instanced draw call (see section 10.5)."
+    *
+    * Through a big chain of pseudocode, section 10.5 describes that
+    * baseinstance is not counted by gl_InstanceID.  In that section, notice
+    *
+    *     "If an enabled vertex attribute array is instanced (it has a
+    *     non-zero divisor as specified by VertexAttribDivisor), the element
+    *     index that is transferred to the GL, for all vertices, is given by
+    *
+    *         floor(instance/divisor) + baseinstance
+    *
+    *     If an array corresponding to an attribute required by a vertex
+    *     shader is not enabled, then the corresponding element is taken from
+    *     the current attribute state (see section 10.2)."
+    *
+    * Note that baseinstance is \b not included in the value of instance.
+    */
   SYSTEM_VALUE_INSTANCE_ID,
+
+   /**
+    * DirectX-style vertex ID.
+    *
+    * Unlike \c SYSTEM_VALUE_VERTEX_ID, this system value does \b not include
+    * the value of basevertex.
+    *
+    * \sa SYSTEM_VALUE_VERTEX_ID, SYSTEM_VALUE_BASE_VERTEX
+    */
+   SYSTEM_VALUE_VERTEX_ID_ZERO_BASE,
+
+   /**
+    * Value of \c basevertex passed to \c glDrawElementsBaseVertex and similar
+    * functions.
+    *
+    * \sa SYSTEM_VALUE_VERTEX_ID, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE
+    */
+   SYSTEM_VALUE_BASE_VERTEX,
   /*@}*/

   /**
@@ -3394,6 +3476,15 @@ struct gl_constants
    */
   GLboolean NativeIntegers;

+   /**
+    * Does VertexID count from zero or from base vertex?
+    *
+    * \note
+    * If desktop GLSL 1.30 or GLSL ES 3.00 are not supported, this field is
+    * ignored and need not be set.
+    */
+   bool VertexID_is_zero_based;
+
   /**
    * If the driver supports real 32-bit integers, what integer value should be
    * used for boolean true in uniform uploads?  (Usually 1 or ~0.)
--- a/src/mesa/main/pipelineobj.c
+++ b/src/mesa/main/pipelineobj.c
@@ -120,12 +120,12 @@ delete_pipelineobj_cb(GLuint id, void *data, void *userData)
 void
 _mesa_free_pipeline_data(struct gl_context *ctx)
 {
+   _mesa_reference_pipeline_object(ctx, &ctx->_Shader, NULL);
+
   _mesa_HashDeleteAll(ctx->Pipeline.Objects, delete_pipelineobj_cb, ctx);
   _mesa_DeleteHashTable(ctx->Pipeline.Objects);

-   _mesa_reference_pipeline_object(ctx, &ctx->_Shader, NULL);
   _mesa_delete_pipeline_object(ctx, ctx->Pipeline.Default);
-
 }

 /**
--- a/src/mesa/main/shader_query.cpp
+++ b/src/mesa/main/shader_query.cpp
@@ -92,8 +92,9 @@ is_active_attrib(const ir_variable *var)
       * are enumerated, including the special built-in inputs gl_VertexID
       * and gl_InstanceID."
       */
-      return !strcmp(var->name, "gl_VertexID") ||
-             !strcmp(var->name, "gl_InstanceID");
+      return var->data.location == SYSTEM_VALUE_VERTEX_ID ||
+             var->data.location == SYSTEM_VALUE_VERTEX_ID_ZERO_BASE ||
+             var->data.location == SYSTEM_VALUE_INSTANCE_ID;

   default:
      return false;
@@ -133,7 +134,18 @@ _mesa_GetActiveAttrib(GLhandleARB program, GLuint desired_index,
         continue;

      if (current_index == desired_index) {
-	 _mesa_copy_string(name, maxLength, length, var->name);
+         const char *var_name = var->name;
+
+         /* Since gl_VertexID may be lowered to gl_VertexIDMESA, we need to
+          * consider gl_VertexIDMESA as gl_VertexID for purposes of checking
+          * active attributes.
+          */
+         if (var->data.mode == ir_var_system_value &&
+             var->data.location == SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) {
+            var_name = "gl_VertexID";
+         }
+
+	 _mesa_copy_string(name, maxLength, length, var_name);

 	 if (size)
 	    *size = (var->type->is_array()) ? var->type->length : 1;
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -34,6 +34,7 @@
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_screen.h"
+#include "util/u_math.h"

 #include "st_context.h"
 #include "st_extensions.h"
@@ -274,8 +275,6 @@ void st_init_limits(struct pipe_screen *screen,
   c->MinProgramTextureGatherOffset = screen->get_param(screen, PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET);
   c->MaxProgramTextureGatherOffset = screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET);

-   c->UniformBooleanTrue = ~0;
-
   c->MaxTransformFeedbackBuffers =
      screen->get_param(screen, PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS);
   c->MaxTransformFeedbackBuffers = MIN2(c->MaxTransformFeedbackBuffers, MAX_FEEDBACK_BUFFERS);
@@ -618,7 +617,6 @@ void st_init_extensions(struct pipe_screen *screen,
   extensions->NV_fog_distance = GL_TRUE;
   extensions->NV_texture_env_combine4 = GL_TRUE;
   extensions->NV_texture_rectangle = GL_TRUE;
-   extensions->NV_vdpau_interop = GL_TRUE;

   extensions->OES_EGL_image = GL_TRUE;
   extensions->OES_EGL_image_external = GL_TRUE;
@@ -697,6 +695,8 @@ void st_init_extensions(struct pipe_screen *screen,
      }
   }

+   consts->UniformBooleanTrue = consts->NativeIntegers ? ~0 : fui(1.0f);
+
   /* Below are the cases which cannot be moved into tables easily. */

   if (!has_lib_dxtc && !options->force_s3tc_enable) {
@@ -881,4 +881,11 @@ void st_init_extensions(struct pipe_screen *screen,
                                   PIPE_BIND_SAMPLER_VIEW)) {
      extensions->ARB_ES3_compatibility = GL_TRUE;
   }
+
+   if (screen->get_video_param &&
+       screen->get_video_param(screen, PIPE_VIDEO_PROFILE_UNKNOWN,
+                               PIPE_VIDEO_ENTRYPOINT_BITSTREAM,
+                               PIPE_VIDEO_CAP_SUPPORTS_INTERLACED)) {
+      extensions->NV_vdpau_interop = GL_TRUE;
+   }
 }
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -74,14 +74,6 @@ extern "C" {
                           (1 << PROGRAM_CONSTANT) |     \
                           (1 << PROGRAM_UNIFORM))

-/**
- * Maximum number of temporary registers.
- *
- * It is too big for stack allocated arrays -- it will cause stack overflow on
- * Windows and likely Mac OS X.
- */
-#define MAX_TEMPS         4096
-
 /**
 * Maximum number of arrays
 */
@@ -3301,14 +3293,10 @@ get_src_arg_mask(st_dst_reg dst, st_src_reg src)
 void
 glsl_to_tgsi_visitor::simplify_cmp(void)
 {
-   unsigned *tempWrites;
+   int tempWritesSize = 0;
+   unsigned *tempWrites = NULL;
   unsigned outputWrites[MAX_PROGRAM_OUTPUTS];

-   tempWrites = new unsigned[MAX_TEMPS];
-   if (!tempWrites) {
-      return;
-   }
-   memset(tempWrites, 0, sizeof(unsigned) * MAX_TEMPS);
   memset(outputWrites, 0, sizeof(outputWrites));

   foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
@@ -3330,7 +3318,19 @@ glsl_to_tgsi_visitor::simplify_cmp(void)
         prevWriteMask = outputWrites[inst->dst.index];
         outputWrites[inst->dst.index] |= inst->dst.writemask;
      } else if (inst->dst.file == PROGRAM_TEMPORARY) {
-         assert(inst->dst.index < MAX_TEMPS);
+         if (inst->dst.index >= tempWritesSize) {
+            const int inc = 4096;
+
+            tempWrites = (unsigned*)
+                         realloc(tempWrites,
+                                 (tempWritesSize + inc) * sizeof(unsigned));
+            if (!tempWrites)
+               return;
+
+            memset(tempWrites + tempWritesSize, 0, inc * sizeof(unsigned));
+            tempWritesSize += inc;
+         }
+
         prevWriteMask = tempWrites[inst->dst.index];
         tempWrites[inst->dst.index] |= inst->dst.writemask;
      } else
@@ -3349,7 +3349,7 @@ glsl_to_tgsi_visitor::simplify_cmp(void)
      }
   }

-   delete [] tempWrites;
+   free(tempWrites);
 }

 /* Replaces all references to a temporary register index with another index. */
@@ -4158,7 +4158,9 @@ struct label {
 struct st_translate {
   struct ureg_program *ureg;

-   struct ureg_dst temps[MAX_TEMPS];
+   unsigned temps_size;
+   struct ureg_dst *temps;
+
   struct ureg_dst arrays[MAX_ARRAYS];
   struct ureg_src *constants;
   struct ureg_src *immediates;
@@ -4200,6 +4202,8 @@ const unsigned _mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
    */
   TGSI_SEMANTIC_VERTEXID,
   TGSI_SEMANTIC_INSTANCEID,
+   0,
+   0,

   /* Geometry shader
    */
@@ -4299,7 +4303,19 @@ dst_register(struct st_translate *t,
      return ureg_dst_undef();

   case PROGRAM_TEMPORARY:
-      assert(index < Elements(t->temps));
+      /* Allocate space for temporaries on demand. */
+      if (index >= t->temps_size) {
+         const int inc = 4096;
+
+         t->temps = (struct ureg_dst*)
+                    realloc(t->temps,
+                            (t->temps_size + inc) * sizeof(struct ureg_dst));
+         if (!t->temps)
+            return ureg_dst_undef();
+
+         memset(t->temps + t->temps_size, 0, inc * sizeof(struct ureg_dst));
+         t->temps_size += inc;
+      }

      if (ureg_dst_is_undef(t->temps[index]))
         t->temps[index] = ureg_DECL_local_temporary(t->ureg);
@@ -5158,6 +5174,7 @@ st_translate_program(

 out:
   if (t) {
+      free(t->temps);
      free(t->insn);
      free(t->labels);
      free(t->constants);