mesa: automake: include mesa_glinterop.h in distfile

Add mesa_glinterop.h to the list of headers that will get included in the distfile as it is required to build Mesa itself. Corrects a regression introduced in a89faa2022. Signed-off-by: Jonathan Gray <jsg@jsg.id.au> Reviewed-by: Emil Velikov <emil.velikov@collabora.com> (cherry picked from commit 23392abf50)
Update version to 12.0.4
2016-11-10 21:57:37 +00:00 · 2016-11-10 21:03:41 +00:00 · 2016-11-08 20:45:03 +00:00 · 2016-11-08 20:45:03 +00:00 · 2016-11-08 20:45:03 +00:00 · 2016-11-08 20:44:27 +00:00
246 changed files with 4310 additions and 1998 deletions
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,6 +1,7 @@
 language: c

-sudo: false
+sudo: true
+dist: trusty

 cache:
  directories:
@@ -15,7 +16,11 @@ addons:
      - libexpat1-dev
      - libxcb-dri2-0-dev
      - libx11-xcb-dev
-      - llvm-3.4-dev
+      - llvm-3.5-dev
+      # llvm-config is not in the dev package?
+      - llvm-3.5
+      # LLVM packaging is broken and misses this dep.
+      - libedit-dev
      - scons

 env:
@@ -41,6 +46,16 @@ install:
  - export PATH="/usr/lib/ccache:$PATH"
  - pip install --user mako

+  # Since libdrm gets updated in configure.ac regularly, try to pick up the
+  # latest version from there.
+  - for line in `grep "^LIBDRM_.*_REQUIRED=" configure.ac`; do
+      old_ver=`echo $LIBDRM_VERSION | sed 's/libdrm-//'`;
+      new_ver=`echo $line | sed 's/.*REQUIRED=//'`;
+      if `echo "$old_ver,$new_ver" | tr ',' '\n' | sort -Vc 2> /dev/null`; then
+        export LIBDRM_VERSION="libdrm-$new_ver";
+      fi;
+    done
+
  # Install dependencies where we require specific versions (or where
  # disallowed by Travis CI's package whitelisting).

@@ -78,22 +93,19 @@ install:

  - wget http://dri.freedesktop.org/libdrm/$LIBDRM_VERSION.tar.bz2
  - tar -jxvf $LIBDRM_VERSION.tar.bz2
-  - (cd $LIBDRM_VERSION && ./configure --prefix=$HOME/prefix && make install)
+  - (cd $LIBDRM_VERSION && ./configure --prefix=$HOME/prefix --enable-vc4 && make install)

  - wget $XORG_RELEASES/lib/$LIBXSHMFENCE_VERSION.tar.bz2
  - tar -jxvf $LIBXSHMFENCE_VERSION.tar.bz2
  - (cd $LIBXSHMFENCE_VERSION && ./configure --prefix=$HOME/prefix && make install)

-# Disabled LLVM (and therefore r300 and r600) because the build fails
-# with "undefined reference to `clock_gettime'" and "undefined
-# reference to `setupterm'" in llvmpipe.
 script:
  - if test "x$BUILD" = xmake; then
      ./autogen.sh --enable-debug
-        --disable-gallium-llvm
        --with-egl-platforms=x11,drm
        --with-dri-drivers=i915,i965,radeon,r200,swrast,nouveau
-        --with-gallium-drivers=svga,swrast,vc4,virgl
+        --with-gallium-drivers=svga,swrast,vc4,virgl,r300,r600
+        --disable-llvm-shared-libs
        ;
      make && make check;
    elif test x$BUILD = xscons; then
--- a/Makefile.am
+++ b/Makefile.am
@@ -62,6 +62,7 @@ noinst_HEADERS = \
 	include/c99_math.h \
 	include/c11 \
 	include/D3D9 \
+	include/GL/wglext.h \
 	include/HaikuGL \
 	include/no_extern_c.h \
 	include/pci_ids
--- a/2
+++ b/2
@@ -1 +1 @@
-12.0.1
+12.0.4
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -37,6 +37,8 @@ cache:
 - win_flex_bison-2.4.5.zip
 - llvm-3.3.1-msvc2013-mtd.7z

+os: Visual Studio 2013
+
 environment:
  WINFLEXBISON_ARCHIVE: win_flex_bison-2.4.5.zip
  LLVM_ARCHIVE: llvm-3.3.1-msvc2013-mtd.7z
@@ -47,11 +49,13 @@ install:
 - python -m pip --version
 # Install Mako
 - python -m pip install --egg Mako
+# Install pywin32 extensions, needed by SCons
+- python -m pip install pypiwin32
 # Install SCons
 - python -m pip install --egg scons==2.4.1
 - scons --version
 # Install flex/bison
- if not exist "%WINFLEXBISON_ARCHIVE%" appveyor DownloadFile "http://downloads.sourceforge.net/project/winflexbison/%WINFLEXBISON_ARCHIVE%"
+- if not exist "%WINFLEXBISON_ARCHIVE%" appveyor DownloadFile "https://downloads.sourceforge.net/project/winflexbison/old_versions/%WINFLEXBISON_ARCHIVE%"
 - 7z x -y -owinflexbison\ "%WINFLEXBISON_ARCHIVE%" > nul
 - set Path=%CD%\winflexbison;%Path%
 - win_flex --version
--- a/bin/.cherry-ignore
+++ b/bin/.cherry-ignore
@@ -1,2 +1,25 @@
 # The offending commit that this patch (part) reverts isn't in 12.0
 be32a2132785fbc119f17e62070e007ee7d17af7 i965/compiler: Bring back the INTEL_PRECISE_TRIG environment variable
+
+# The patch depends on the batch_cache work at least.
+89f00f749fda4c1beca38f362c7f86bdc6e32785 a4xx: make sure to actually clamp depth as requested
+
+# The patch depends on the 'generic' interoplation and location
+# implementation introduced with 2d6dd30a9b30
+114874b22beafb2d07006b197c62d717fc7f80cc i965/fs: Use sample interpolation for interpolateAtCentroid in persample mode
+
+# VAAPI encode landed after the branch point.
+a5993022275c20061ac025d9adc26c5f9d02afee st/va Avoid VBR bitrate calculation overflow v2
+
+# EGL_KHR_debug landed after the branch point.
+17084b6f9340f798111e53e08f5d35c7630cee48 egl: Fix missing unlock in eglGetSyncAttribKHR
+
+# Depends on update_renderbuffer_read_surfaces at least
+f2b9b0c730e345bcffa9eadabb25af3ab02642f2 i965: Add missing BRW_NEW_FS_PROG_DATA to render target reads.
+
+# The commit in question hasn't landed in branch
+1ef787339774bc7f1cc9c1615722f944005e070c Revert "egl/android: Set EGL_MAX_PBUFFER_WIDTH and EGL_MAX_PBUFFER_HEIGHT"
+
+# Patches depend on the fence_finish() gallium API change and corresponding driver work
+f240ad98bc05281ea7013d91973cb5f932ae9434 st/mesa: unduplicate st_check_sync code
+b687f766fddb7b39479cd9ee0427984029ea3559 st/mesa: allow multiple concurrent waiters in ClientWaitSync
--- a/bin/get-pick-list.sh
+++ b/bin/get-pick-list.sh
@@ -14,7 +14,7 @@ git log --reverse --grep="cherry picked from commit" origin/master..HEAD |\
 	sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked

 # Grep for commits that were marked as a candidate for the stable tree.
-git log --reverse --pretty=%H -i --grep='^\([[:space:]]*NOTE: .*[Cc]andidate\|CC:.*mesa-stable\)' HEAD..origin/master |\
+git log --reverse --pretty=%H -i --grep='^\([[:space:]]*NOTE: .*[Cc]andidate\|CC:.*12\.0.*mesa-stable\)' HEAD..origin/master |\
 while read sha
 do
 	# Check to see whether the patch is on the ignore list.
--- a/configure.ac
+++ b/configure.ac
@@ -225,6 +225,7 @@ AX_GCC_FUNC_ATTRIBUTE([packed])
 AX_GCC_FUNC_ATTRIBUTE([pure])
 AX_GCC_FUNC_ATTRIBUTE([returns_nonnull])
 AX_GCC_FUNC_ATTRIBUTE([unused])
+AX_GCC_FUNC_ATTRIBUTE([visibility])
 AX_GCC_FUNC_ATTRIBUTE([warn_unused_result])
 AX_GCC_FUNC_ATTRIBUTE([weak])

@@ -783,6 +784,7 @@ if test "x$enable_asm" = xyes; then
    esac
 fi

+AC_HEADER_MAJOR
 AC_CHECK_HEADER([xlocale.h], [DEFINES="$DEFINES -DHAVE_XLOCALE_H"])
 AC_CHECK_HEADER([sys/sysctl.h], [DEFINES="$DEFINES -DHAVE_SYS_SYSCTL_H"])
 AC_CHECK_FUNC([strtof], [DEFINES="$DEFINES -DHAVE_STRTOF"])
@@ -1639,7 +1641,7 @@ esac

 AC_ARG_WITH([vulkan-icddir],
    [AS_HELP_STRING([--with-vulkan-icddir=DIR],
-        [directory for the Vulkan driver icd files @<:@${sysconfdir}/vulkan/icd.d@:>@])],
+        [directory for the Vulkan driver icd files @<:@${datarootdir}/vulkan/icd.d@:>@])],
    [VULKAN_ICD_INSTALL_DIR="$withval"],
    [VULKAN_ICD_INSTALL_DIR='${datarootdir}/vulkan/icd.d'])
 AC_SUBST([VULKAN_ICD_INSTALL_DIR])
@@ -1997,8 +1999,8 @@ if test "x$with_egl_platforms" != "x" -a "x$enable_egl" != xyes; then
    AC_MSG_ERROR([cannot build egl state tracker without EGL library])
 fi

-PKG_CHECK_MODULES([WAYLAND_SCANNER], [wayland_scanner],
-        WAYLAND_SCANNER=`$PKG_CONFIG --variable=wayland_scanner wayland_scanner`,
+PKG_CHECK_MODULES([WAYLAND_SCANNER], [wayland-scanner],
+        WAYLAND_SCANNER=`$PKG_CONFIG --variable=wayland_scanner wayland-scanner`,
        WAYLAND_SCANNER='')
 if test "x$WAYLAND_SCANNER" = x; then
    AC_PATH_PROG([WAYLAND_SCANNER], [wayland-scanner])
@@ -2182,6 +2184,10 @@ if test "x$enable_gallium_llvm" = xyes; then

        LLVM_COMPONENTS="engine bitwriter mcjit mcdisassembler"

+        if $LLVM_CONFIG --components | grep -q inteljitevents ; then
+            LLVM_COMPONENTS="${LLVM_COMPONENTS} inteljitevents"
+        fi
+
        if test "x$enable_opencl" = xyes; then
            llvm_check_version_for "3" "5" "0" "opencl"

@@ -2331,6 +2337,45 @@ swr_llvm_check() {
    fi
 }

+swr_require_cxx_feature_flags() {
+    feature_name="$1"
+    preprocessor_test="$2"
+    option_list="$3"
+    output_var="$4"
+
+    AC_MSG_CHECKING([whether $CXX supports $feature_name])
+    AC_LANG_PUSH([C++])
+    save_CXXFLAGS="$CXXFLAGS"
+    save_IFS="$IFS"
+    IFS=","
+    found=0
+    for opts in $option_list
+    do
+        unset IFS
+        CXXFLAGS="$opts $save_CXXFLAGS"
+        AC_COMPILE_IFELSE(
+            [AC_LANG_PROGRAM(
+                [   #if !($preprocessor_test)
+                    #error
+                    #endif
+                ])],
+            [found=1; break],
+            [])
+        IFS=","
+    done
+    IFS="$save_IFS"
+    CXXFLAGS="$save_CXXFLAGS"
+    AC_LANG_POP([C++])
+    if test $found -eq 1; then
+        AC_MSG_RESULT([$opts])
+        eval "$output_var=\$opts"
+        return 0
+    fi
+    AC_MSG_RESULT([no])
+    AC_MSG_ERROR([swr requires $feature_name support])
+    return 1
+}
+
 dnl Duplicates in GALLIUM_DRIVERS_DIRS are removed by sorting it after this block
 if test -n "$with_gallium_drivers"; then
    gallium_drivers=`IFS=', '; echo $with_gallium_drivers`
@@ -2400,29 +2445,20 @@ if test -n "$with_gallium_drivers"; then
        xswr)
            swr_llvm_check "swr"

-            AC_MSG_CHECKING([whether $CXX supports c++11/AVX/AVX2])
-            AVX_CXXFLAGS="-march=core-avx-i"
-            AVX2_CXXFLAGS="-march=core-avx2"
+            swr_require_cxx_feature_flags "C++11" "__cplusplus >= 201103L" \
+                ",-std=c++11" \
+                SWR_CXX11_CXXFLAGS
+            AC_SUBST([SWR_CXX11_CXXFLAGS])

-            AC_LANG_PUSH([C++])
-            save_CXXFLAGS="$CXXFLAGS"
-            CXXFLAGS="-std=c++11 $CXXFLAGS"
-            AC_COMPILE_IFELSE([AC_LANG_PROGRAM()],[],
-                              [AC_MSG_ERROR([c++11 compiler support not detected])])
-            CXXFLAGS="$save_CXXFLAGS"
+            swr_require_cxx_feature_flags "AVX" "defined(__AVX__)" \
+                ",-mavx,-march=core-avx" \
+                SWR_AVX_CXXFLAGS
+            AC_SUBST([SWR_AVX_CXXFLAGS])

-            save_CXXFLAGS="$CXXFLAGS"
-            CXXFLAGS="$AVX_CXXFLAGS $CXXFLAGS"
-            AC_COMPILE_IFELSE([AC_LANG_PROGRAM()],[],
-                              [AC_MSG_ERROR([AVX compiler support not detected])])
-            CXXFLAGS="$save_CXXFLAGS"
-
-            save_CFLAGS="$CXXFLAGS"
-            CXXFLAGS="$AVX2_CXXFLAGS $CXXFLAGS"
-            AC_COMPILE_IFELSE([AC_LANG_PROGRAM()],[],
-                              [AC_MSG_ERROR([AVX2 compiler support not detected])])
-            CXXFLAGS="$save_CXXFLAGS"
-            AC_LANG_POP([C++])
+            swr_require_cxx_feature_flags "AVX2" "defined(__AVX2__)" \
+                ",-mavx2 -mfma -mbmi2 -mf16c,-march=core-avx2" \
+                SWR_AVX2_CXXFLAGS
+            AC_SUBST([SWR_AVX2_CXXFLAGS])

            HAVE_GALLIUM_SWR=yes
            ;;
@@ -2560,6 +2596,8 @@ fi
 AM_CONDITIONAL(HAVE_LIBDRM, test "x$have_libdrm" = xyes)
 AM_CONDITIONAL(HAVE_OSMESA, test "x$enable_osmesa" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_OSMESA, test "x$enable_gallium_osmesa" = xyes)
+AM_CONDITIONAL(HAVE_COMMON_OSMESA, test "x$enable_osmesa" = xyes -o \
+                                        "x$enable_gallium_osmesa" = xyes)

 AM_CONDITIONAL(HAVE_X86_ASM, test "x$asm_arch" = xx86 -o "x$asm_arch" = xx86_64)
 AM_CONDITIONAL(HAVE_X86_64_ASM, test "x$asm_arch" = xx86_64)
--- a/docs/relnotes/12.0.1.html
+++ b/docs/relnotes/12.0.1.html
@@ -33,7 +33,8 @@ because compatibility contexts are not supported.

 <h2>SHA256 checksums</h2>
 <pre>
-TBD.
+28dff9c045f4305c96a875a487b9f06c7e88d910511cd6016dbddcd1f53ade0d  mesa-12.0.1.tar.gz
+bab24fb79f78c876073527f515ed871fc9c81d816f66c8a0b051d8d653896389  mesa-12.0.1.tar.xz
 </pre>


--- a/docs/relnotes/12.0.2.html
+++ b/docs/relnotes/12.0.2.html
@@ -0,0 +1,403 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 12.0.2 Release Notes / September 2, 2016</h1>
+
+<p>
+Mesa 12.0.2 is a bug fix release which fixes bugs found since the 12.0.1 release.
+</p>
+<p>
+Mesa 12.0.2 implements the OpenGL 4.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.3.  OpenGL
+4.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+a08565ab1273751ebe2ffa928cbf785056594c803077c9719d0763da780f2918  mesa-12.0.2.tar.gz
+d957a5cc371dcd7ff2aa0d87492f263aece46f79352f4520039b58b1f32552cb  mesa-12.0.2.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=69622">Bug 69622</a> - eglTerminate then eglMakeCurrent crahes</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89599">Bug 89599</a> - symbol 'x86_64_entry_start' is already defined when building with LLVM/clang</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91342">Bug 91342</a> - Very dark textures on some objects in indoors environments in Postal 2</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92306">Bug 92306</a> - GL Excess demo renders incorrectly on nv43</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94148">Bug 94148</a> - Framebuffer considered invalid when a draw call is done before glCheckFramebufferStatus</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96274">Bug 96274</a> - [NVC0] Failure when compiling compute shader: Assertion `bb-&gt;getFirst()-&gt;serial &lt;= bb-&gt;getExit()-&gt;serial' failed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96358">Bug 96358</a> - SSO: wrong interface validation between GS and VS (regresion due to latest gles 3.1)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96381">Bug 96381</a> - Texture artifacts with immutable texture storage and mipmaps</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96762">Bug 96762</a> - [radeonsi,apitrace] Firewatch: nothing rendered in scrollable (text) areas</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96835">Bug 96835</a> - &quot;gallium: Force blend color to 16-byte alignment&quot; crash with &quot;-march=native -O3&quot; causes some 32bit games to crash</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96850">Bug 96850</a> - Crucible tests fail for 32bit mesa</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96908">Bug 96908</a> - [radeonsi] MSAA causes graphical artifacts</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96911">Bug 96911</a> - webgl2 conformance2/textures/misc/tex-mipmap-levels.html crashes 12.1 Intel driver</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96971">Bug 96971</a> - invariant qualifier is not valid for shader inputs</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97039">Bug 97039</a> - The Talos Principle and Serious Sam 3 GPU faults</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97207">Bug 97207</a> - [IVY BRIDGE] Fragment shader discard writing to depth</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97214">Bug 97214</a> - X not running with error &quot;Failed to make EGL context current&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97225">Bug 97225</a> - [i965 on HD4600 Haswell] xcom switch to ingame cinematics cause segmentation fault</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97231">Bug 97231</a> - GL_DEPTH_CLAMP doesn't clamp to the far plane</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97307">Bug 97307</a> - glsl/glcpp/tests/glcpp-test regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97331">Bug 97331</a> - glDrawElementsBaseVertex doesn't work in display list on i915</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97351">Bug 97351</a> - DrawElementsBaseVertex with VBO ignores base vertex on Intel GMA 9xx in some cases</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97426">Bug 97426</a> - glScissor gives vertically inverted result</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97476">Bug 97476</a> - Shader binaries should not be stored in the PipelineCache</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97567">Bug 97567</a> - [SNB, ILK] ctl, piglit regressions in mesa 12.0.2rc1</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Andreas Boll (1):</p>
+<ul>
+  <li>configure.ac: Use ${datarootdir} for --with-vulkan-icddir help string too</li>
+</ul>
+
+<p>Bernard Kilarski (1):</p>
+<ul>
+  <li>glx: fix error code when there is no context bound</li>
+</ul>
+
+<p>Brian Paul (4):</p>
+<ul>
+  <li>svga: handle mismatched number of samplers, sampler views</li>
+  <li>mesa: use _mesa_clear_texture_image() in clear_texture_fields()</li>
+  <li>swrast: fix incorrectly positioned putImage() in swrast driver</li>
+  <li>mesa: fix format conversion bug in get_tex_rgba_uncompressed()</li>
+</ul>
+
+<p>Chad Versace (2):</p>
+<ul>
+  <li>i965: Fix miptree layout for EGLImage-based renderbuffers</li>
+  <li>i965: Respect miptree offsets in intel_readpixels_tiled_memcpy()</li>
+</ul>
+
+<p>Christian König (1):</p>
+<ul>
+  <li>st/mesa: fix reference counting bug in st_vdpau</li>
+</ul>
+
+<p>Chuck Atkins (1):</p>
+<ul>
+  <li>swr: Refactor checks for compiler feature flags</li>
+</ul>
+
+<p>Daniel Scharrer (1):</p>
+<ul>
+  <li>mesa: Fix fixed function spot lighting on newer hardware (again)</li>
+</ul>
+
+<p>Dave Airlie (2):</p>
+<ul>
+  <li>anv: fix writemask on blit fragment shader.</li>
+  <li>st/glsl_to_tgsi: fix st_src_reg_for_double constant.</li>
+</ul>
+
+<p>Emil Velikov (15):</p>
+<ul>
+  <li>docs: add sha256 checksums for 12.0.1</li>
+  <li>mesa: automake: list builddir before srcdir</li>
+  <li>mesa: scons: list builddir before srcdir</li>
+  <li>i965: store reference to the context within struct brw_fence (v2)</li>
+  <li>anv: remove internal 'validate' layer</li>
+  <li>anv: automake: use VISIBILITY_CFLAGS to restrict symbol visibility</li>
+  <li>anv: automake: build with -Bsymbolic</li>
+  <li>anv: do not export the Vulkan API</li>
+  <li>anv: remove dummy VK_DEBUG_MARKER_EXT entry points</li>
+  <li>isl: automake: use VISIBILITY_CFLAGS to restrict symbol visibility</li>
+  <li>cherry-ignore: temporary(?) drop "a4xx: make sure to actually clamp depth"</li>
+  <li>i915: Check return value of screen-&gt;image.loader-&gt;getBuffers</li>
+  <li>Revert "i965/miptree: Set logical_depth0 == 6 for cube maps"</li>
+  <li>glx/glvnd: list the strcmp arguments in correct order</li>
+  <li>Update version to 12.0.2</li>
+</ul>
+
+<p>Eric Anholt (4):</p>
+<ul>
+  <li>vc4: Close our screen's fd on screen close.</li>
+  <li>vc4: Disable early Z with computed depth.</li>
+  <li>vc4: Fix a leak of the src[] array of VPM reads in optimization.</li>
+  <li>vc4: Fix leak of the bo_handles table.</li>
+</ul>
+
+<p>Francisco Jerez (3):</p>
+<ul>
+  <li>i965: Emit SKL VF cache invalidation W/A from brw_emit_pipe_control_flush.</li>
+  <li>i965: Make room in the batch epilogue for three more pipe controls.</li>
+  <li>i965: Fix remaining flush vs invalidate race conditions in brw_emit_pipe_control_flush.</li>
+</ul>
+
+<p>Haixia Shi (1):</p>
+<ul>
+  <li>platform_android: prevent deadlock in droid_swap_buffers</li>
+</ul>
+
+<p>Ian Romanick (5):</p>
+<ul>
+  <li>mesa: Strip arrayness from interface block names in some IO validation</li>
+  <li>glsl: Pack integer and double varyings as flat even if interpolation mode is none</li>
+  <li>glcpp: Track the actual version instead of just the version_resolved flag</li>
+  <li>glcpp: Only disallow #undef of pre-defined macros on GLSL ES &gt;= 3.00 shaders</li>
+  <li>glsl: Mark cube map array sampler types as reserved in GLSL ES 3.10</li>
+</ul>
+
+<p>Ilia Mirkin (16):</p>
+<ul>
+  <li>mesa: etc2 online compression is unsupported, don't attempt it</li>
+  <li>st/mesa: return appropriate mesa format for ETC texture formats</li>
+  <li>mesa: set _NEW_BUFFERS when updating texture bound to current buffers</li>
+  <li>nv50,nvc0: srgb rendering is only available for rgba/bgra</li>
+  <li>vbo: allow DrawElementsBaseVertex in display lists</li>
+  <li>gallium/util: add helper to compute zmin/zmax for a viewport state</li>
+  <li>nv50,nvc0: fix depth range when halfz is enabled</li>
+  <li>nv50/ir: fix bb positions after exit instructions</li>
+  <li>vbo: add basevertex when looking up elements for vbo splitting</li>
+  <li>a4xx: only disable depth clipping, not all clipping, when requested</li>
+  <li>nv50/ir: make sure cfg iterator always hits all blocks</li>
+  <li>main: add missing EXTRA_END in OES_sample_variables get check</li>
+  <li>nouveau: always enable at least one RC</li>
+  <li>nv30: only bail on color/depth bpp mismatch when surfaces are swizzled</li>
+  <li>a4xx: make sure to actually clamp depth as requested</li>
+  <li>gk110/ir: fix quadop dall emission</li>
+</ul>
+
+<p>Jan Ziak (2):</p>
+<ul>
+  <li>egl/x11: avoid using freed memory if dri2 init fails</li>
+  <li>loader: fix memory leak in loader_dri3_open</li>
+</ul>
+
+<p>Jason Ekstrand (31):</p>
+<ul>
+  <li>nir/spirv: Don't multiply the push constant block size by 4</li>
+  <li>anv: Add a stub for CmdCopyQueryPoolResults on Ivy Bridge</li>
+  <li>glsl/types: Fix function type comparison function</li>
+  <li>glsl/types: Use _mesa_hash_data for hashing function types</li>
+  <li>genxml: Make gen6-7 blending look more like gen8</li>
+  <li>anv/pipeline: Unify blend state setup between gen7 and gen8</li>
+  <li>anv: Enable independentBlend on gen7</li>
+  <li>anv: Add an align_down_npot_u32 helper</li>
+  <li>anv: Handle VK_WHOLE_SIZE properly for buffer views</li>
+  <li>i965/miptree: Enforce that height == 1 for 1-D array textures</li>
+  <li>i965/miptree: Set logical_depth0 == 6 for cube maps</li>
+  <li>nir: Add a nir_deref_foreach_leaf helper</li>
+  <li>nir/inline: Constant-initialize local variables in the callee if needed</li>
+  <li>anv/pipeline: Set up point coord enables</li>
+  <li>i965/miptree: Stop multiplying cube depth by 6 in HiZ calculations</li>
+  <li>i965/vec4: Make opt_vector_float reset at the top of each block</li>
+  <li>anv/blit2d: Add a format parameter to bind_dst and create_iview</li>
+  <li>anv/blit2d: Add support for RGB destinations</li>
+  <li>anv/clear: Make cmd_clear_image take an actual VkClearValue</li>
+  <li>anv/clear: Clear E5B9G9R9 images as R32_UINT</li>
+  <li>anv: Include the pipeline layout in the shader hash</li>
+  <li>isl: Allow multisampled array textures</li>
+  <li>anv/descriptor_set: memset anv_descriptor_set_layout</li>
+  <li>anv/pipeline: Fix bind maps for fragment output arrays</li>
+  <li>anv/allocator: Correctly set the number of buckets</li>
+  <li>anv/pipeline: Properly handle OOM during shader compilation</li>
+  <li>anv: Remove unused fields from anv_pipeline_bind_map</li>
+  <li>anv: Add pipeline_has_stage guards a few places</li>
+  <li>anv: Add a struct for storing a compiled shader</li>
+  <li>anv/pipeline: Add support for caching the push constant map</li>
+  <li>anv: Rework pipeline caching</li>
+</ul>
+
+<p>José Fonseca (2):</p>
+<ul>
+  <li>appveyor: Install pywin32 extensions.</li>
+  <li>appveyor: Force Visual Studio 2013 image.</li>
+</ul>
+
+<p>Kenneth Graunke (21):</p>
+<ul>
+  <li>genxml: Add CLIPMODE_* prefix to 3DSTATE_CLIP's "Clip Mode" enum values.</li>
+  <li>genxml: Add APIMODE_D3D missing enum values and improve consistency.</li>
+  <li>anv: Fix near plane clipping on Gen7/7.5.</li>
+  <li>anv: Enable early culling on Gen7.</li>
+  <li>anv: Unify 3DSTATE_CLIP code across generations.</li>
+  <li>genxml: Rename "API Rendering Disable" to "Rendering Disable".</li>
+  <li>anv: Properly call gen75_emit_state_base_address on Haswell.</li>
+  <li>i965: Include VUE handles for GS with invocations &gt; 1.</li>
+  <li>nir: Add a base const_index to shared atomic intrinsics.</li>
+  <li>i965: Fix shared atomic intrinsics to pay attention to base.</li>
+  <li>mesa: Add GL_BGRA_EXT to the list of GenerateMipmap internal formats.</li>
+  <li>mesa: Don't call GenerateMipmap if Width or Height == 0.</li>
+  <li>glsl: Delete bogus ir_set_program_inouts assert.</li>
+  <li>glsl: Fix the program resource names of gl_TessLevelOuter/Inner[].</li>
+  <li>glsl: Fix location bias for patch variables.</li>
+  <li>glsl: Fix invariant matching in GLSL 4.30 and GLSL ES 1.00.</li>
+  <li>mesa: Fix uf10_to_f32() scale factor in the E == 0 and M != 0 case.</li>
+  <li>nir/builder: Add bany_inequal and bany helpers.</li>
+  <li>i965: Implement the WaPreventHSTessLevelsInterference workaround.</li>
+  <li>i965: Fix execution size of scalar TCS barrier setup code.</li>
+  <li>i965: Fix barrier count shift in scalar TCS backend.</li>
+</ul>
+
+<p>Leo Liu (2):</p>
+<ul>
+  <li>st/omx/enc: check uninitialized list from task release</li>
+  <li>vl/dri3: fix a memory leak from front buffer</li>
+</ul>
+
+<p>Marek Olšák (7):</p>
+<ul>
+  <li>glsl_to_tgsi: don't use the negate modifier in integer ops after bitcast</li>
+  <li>radeonsi: add a workaround for a compute VGPR-usage LLVM bug</li>
+  <li>winsys/amdgpu: disallow DCC with mipmaps</li>
+  <li>gallium/util: fix align64</li>
+  <li>radeonsi: only set dual source blending for MRT0</li>
+  <li>radeonsi: fix VM faults due NULL internal const buffers on CIK</li>
+  <li>radeonsi: disable SDMA texture copying on Carrizo</li>
+</ul>
+
+<p>Matt Turner (4):</p>
+<ul>
+  <li>mapi: Massage code to allow clang to compile.</li>
+  <li>i965/vec4: Ignore swizzle of VGRF for use by var_range_end().</li>
+  <li>mesa: Use AC_HEADER_MAJOR to include correct header for major().</li>
+  <li>nir: Walk blocks in source code order in lower_vars_to_ssa.</li>
+</ul>
+
+<p>Michel Dänzer (1):</p>
+<ul>
+  <li>glx: Don't use current context in __glXSendError</li>
+</ul>
+
+<p>Miklós Máté (1):</p>
+<ul>
+  <li>vbo: set draw_id</li>
+</ul>
+
+<p>Nanley Chery (5):</p>
+<ul>
+  <li>anv/descriptor_set: Fix binding partly undefined descriptor sets</li>
+  <li>isl: Fix assert on raw buffer surface state size</li>
+  <li>anv/device: Fix max buffer range limits</li>
+  <li>isl: Fix isl_tiling_is_any_y()</li>
+  <li>anv/gen7_pipeline: Set PixelShaderKillPixel for discards</li>
+</ul>
+
+<p>Nicolai Hähnle (7):</p>
+<ul>
+  <li>radeonsi: explicitly choose center locations for 1xAA on Polaris</li>
+  <li>radeonsi: fix Polaris MSAA regression</li>
+  <li>radeonsi: ensure sample locations are set for line and polygon smoothing</li>
+  <li>st_glsl_to_tgsi: only skip over slots of an input array that are present</li>
+  <li>glsl: fix optimization of discard nested multiple levels</li>
+  <li>radeonsi: flush TC L2 cache for indirect draw data</li>
+  <li>radeonsi: add si_set_rw_buffer to be used for internal descriptors</li>
+</ul>
+
+<p>Nicolas Boichat (6):</p>
+<ul>
+  <li>egl/dri2: dri2_make_current: Set EGL error if bindContext fails</li>
+  <li>egl/wayland: Set disp-&gt;DriverData to NULL on error</li>
+  <li>egl/surfaceless: Set disp-&gt;DriverData to NULL on error</li>
+  <li>egl/drm: Set disp-&gt;DriverData to NULL on error</li>
+  <li>egl/android: Set dpy-&gt;DriverData to NULL on error</li>
+  <li>egl/dri2: Add reference count for dri2_egl_display</li>
+</ul>
+
+<p>Rob Herring (3):</p>
+<ul>
+  <li>Android: add missing u_math.h include path for libmesa_isl</li>
+  <li>vc4: fix vc4_resource_from_handle() stride calculation</li>
+  <li>vc4: add hash table look-up for exported dmabufs</li>
+</ul>
+
+<p>Samuel Pitoiset (7):</p>
+<ul>
+  <li>nvc0/ir: fix images indirect access on Fermi</li>
+  <li>nvc0: fix the driver cb size when draw parameters are used</li>
+  <li>gm107/ir: add missing NEG modifier for IADD32I</li>
+  <li>gm107/ir: make use of ADD32I for all immediates</li>
+  <li>nvc0: upload sample locations on GM20x</li>
+  <li>nvc0: invalidate textures/samplers on GK104+</li>
+  <li>nv50/ir: always emit the NDV bit for OP_QUADOP</li>
+</ul>
+
+<p>Stefan Dirsch (1):</p>
+<ul>
+  <li>Avoid overflow in 'last' variable of FindGLXFunction(...)</li>
+</ul>
+
+<p>Stencel, Joanna (1):</p>
+<ul>
+  <li>egl/wayland-egl: Fix for segfault in dri2_wl_destroy_surface.</li>
+</ul>
+
+<p>Tim Rowley (2):</p>
+<ul>
+  <li>Revert "gallium: Force blend color to 16-byte alignment"</li>
+  <li>swr: switch from overriding -march to selecting features</li>
+</ul>
+
+<p>Tomasz Figa (8):</p>
+<ul>
+  <li>gallium/dri: Add shared glapi to LIBADD on Android</li>
+  <li>egl/android: Remove unused variables</li>
+  <li>egl/android: Check return value of dri2_get_dri_config()</li>
+  <li>egl/android: Stop leaking DRI images</li>
+  <li>gallium/winsys/kms: Fix double refcount when importing from prime FD (v2)</li>
+  <li>gallium/winsys/kms: Fully initialize kms_sw_dt at prime import time (v2)</li>
+  <li>gallium/winsys/kms: Move display target handle lookup to separate function</li>
+  <li>gallium/winsys/kms: Look up the GEM handle after importing a prime FD</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/12.0.3.html
+++ b/docs/relnotes/12.0.3.html
@@ -0,0 +1,71 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 12.0.3 Release Notes / September 15, 2016</h1>
+
+<p>
+Mesa 12.0.3 is a bug fix release which fixes bugs found since the 12.0.3 release.
+</p>
+<p>
+Mesa 12.0.3 implements the OpenGL 4.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.3.  OpenGL
+4.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+79abcfab3de30dbd416d1582a3cf6b1be308466231488775f1b7bb43be353602 mesa-12.0.3.tar.gz
+1dc86dd9b51272eee1fad3df65e18cda2e556ef1bc0b6e07cd750b9757f493b1 mesa-12.0.3.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97781">Bug 97781</a> - [HSW, BYT, IVB] es2-cts.gtf.gl2extensiontests.depth_texture_cube_map.depth_texture_cube_map</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Emil Velikov (3):</p>
+<ul>
+  <li>docs: add sha256 checksums for 12.0.2</li>
+  <li>Revert "i965/miptree: Stop multiplying cube depth by 6 in HiZ calculations"</li>
+  <li>Update version to 12.0.3</li>
+</ul>
+
+<p>José Fonseca (1):</p>
+<ul>
+  <li>appveyor: Update winflexbison download URL.</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/include/GL/mesa_glinterop.h
+++ b/include/GL/mesa_glinterop.h
@@ -58,8 +58,8 @@ extern "C" {
 #endif

 /* Forward declarations to avoid inclusion of GL/glx.h */
-typedef struct _XDisplay Display;
-typedef struct __GLXcontextRec *GLXContext;
+struct _XDisplay;
+struct __GLXcontextRec;

 /* Forward declarations to avoid inclusion of EGL/egl.h */
 typedef void *EGLDisplay;
@@ -246,7 +246,7 @@ struct mesa_glinterop_export_out {
 * \return MESA_GLINTEROP_SUCCESS or MESA_GLINTEROP_* != 0 on error
 */
 int
-MesaGLInteropGLXQueryDeviceInfo(Display *dpy, GLXContext context,
+MesaGLInteropGLXQueryDeviceInfo(struct _XDisplay *dpy, struct __GLXcontextRec *context,
                                struct mesa_glinterop_device_info *out);


@@ -271,7 +271,7 @@ MesaGLInteropEGLQueryDeviceInfo(EGLDisplay dpy, EGLContext context,
 * \return MESA_GLINTEROP_SUCCESS or MESA_GLINTEROP_* != 0 on error
 */
 int
-MesaGLInteropGLXExportObject(Display *dpy, GLXContext context,
+MesaGLInteropGLXExportObject(struct _XDisplay *dpy, struct __GLXcontextRec *context,
                             struct mesa_glinterop_export_in *in,
                             struct mesa_glinterop_export_out *out);

@@ -286,11 +286,11 @@ MesaGLInteropEGLExportObject(EGLDisplay dpy, EGLContext context,
                             struct mesa_glinterop_export_out *out);


-typedef int (PFNMESAGLINTEROPGLXQUERYDEVICEINFOPROC)(Display *dpy, GLXContext context,
+typedef int (PFNMESAGLINTEROPGLXQUERYDEVICEINFOPROC)(struct _XDisplay *dpy, struct __GLXcontextRec *context,
                                                     struct mesa_glinterop_device_info *out);
 typedef int (PFNMESAGLINTEROPEGLQUERYDEVICEINFOPROC)(EGLDisplay dpy, EGLContext context,
                                                     struct mesa_glinterop_device_info *out);
-typedef int (PFNMESAGLINTEROPGLXEXPORTOBJECTPROC)(Display *dpy, GLXContext context,
+typedef int (PFNMESAGLINTEROPGLXEXPORTOBJECTPROC)(struct _XDisplay *dpy, struct __GLXcontextRec *context,
                                                  struct mesa_glinterop_export_in *in,
                                                  struct mesa_glinterop_export_out *out);
 typedef int (PFNMESAGLINTEROPEGLEXPORTOBJECTPROC)(EGLDisplay dpy, EGLContext context,
--- a/install-gallium-links.mk
+++ b/install-gallium-links.mk
@@ -13,8 +13,8 @@ all-local : .install-gallium-links
 	fi;							\
 	$(MKDIR_P) $$link_dir;					\
 	file_list="$(dri_LTLIBRARIES:%.la=.libs/%.so)";		\
-	file_list+="$(egl_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*)";	\
-	file_list+="$(lib_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*)";	\
+	file_list="$$file_list$(egl_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*)";	\
+	file_list="$$file_list$(lib_LTLIBRARIES:%.la=.libs/%.$(LIB_EXT)*)";	\
 	for f in $$file_list; do 				\
 		if test -h .libs/$$f; then			\
 			cp -d $$f $$link_dir;			\
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -47,6 +47,30 @@ CLEANFILES = $(BUILT_SOURCES)

 SUBDIRS = . gtest util mapi/glapi/gen mapi

+if HAVE_OPENGL
+gldir = $(includedir)/GL
+gl_HEADERS = \
+  $(top_srcdir)/include/GL/gl.h \
+  $(top_srcdir)/include/GL/glext.h \
+  $(top_srcdir)/include/GL/glcorearb.h \
+  $(top_srcdir)/include/GL/gl_mangle.h
+endif
+
+if HAVE_GLX
+glxdir = $(includedir)/GL
+glx_HEADERS = \
+  $(top_srcdir)/include/GL/glx.h \
+  $(top_srcdir)/include/GL/glxext.h \
+  $(top_srcdir)/include/GL/glx_mangle.h
+pkgconfigdir = $(libdir)/pkgconfig
+pkgconfig_DATA = mesa/gl.pc
+endif
+
+if HAVE_COMMON_OSMESA
+osmesadir = $(includedir)/GL
+osmesa_HEADERS = $(top_srcdir)/include/GL/osmesa.h
+endif
+
 # include only conditionally ?
 SUBDIRS += compiler

@@ -93,7 +117,8 @@ SUBDIRS += gallium
 endif

 EXTRA_DIST = \
-	getopt hgl SConscript
+	getopt hgl SConscript \
+	$(top_srcdir)/include/GL/mesa_glinterop.h

 AM_CFLAGS = $(VISIBILITY_CFLAGS)
 AM_CXXFLAGS = $(VISIBILITY_CXXFLAGS)
--- a/src/compiler/glsl/glcpp/glcpp-parse.y
+++ b/src/compiler/glsl/glcpp/glcpp-parse.y
@@ -278,10 +278,34 @@ control_line_success:
 	HASH_TOKEN DEFINE_TOKEN define
 |	HASH_TOKEN UNDEF IDENTIFIER NEWLINE {
 		macro_t *macro;
-		if (strcmp("__LINE__", $3) == 0
-		    || strcmp("__FILE__", $3) == 0
-		    || strcmp("__VERSION__", $3) == 0
-		    || strncmp("GL_", $3, 3) == 0)
+
+                /* Section 3.4 (Preprocessor) of the GLSL ES 3.00 spec says:
+                 *
+                 *    It is an error to undefine or to redefine a built-in
+                 *    (pre-defined) macro name.
+                 *
+                 * The GLSL ES 1.00 spec does not contain this text.
+                 *
+                 * Section 3.3 (Preprocessor) of the GLSL 1.30 spec says:
+                 *
+                 *    #define and #undef functionality are defined as is
+                 *    standard for C++ preprocessors for macro definitions
+                 *    both with and without macro parameters.
+                 *
+                 * At least as far as I can tell GCC allow '#undef __FILE__'.
+                 * Furthermore, there are desktop OpenGL conformance tests
+                 * that expect '#undef __VERSION__' and '#undef
+                 * GL_core_profile' to work.
+                 *
+                 * Only disallow #undef of pre-defined macros on GLSL ES >=
+                 * 3.00 shaders.
+                 */
+		if (parser->is_gles &&
+                    parser->version >= 300 &&
+                    (strcmp("__LINE__", $3) == 0
+                     || strcmp("__FILE__", $3) == 0
+                     || strcmp("__VERSION__", $3) == 0
+                     || strncmp("GL_", $3, 3) == 0))
 			glcpp_error(& @1, parser, "Built-in (pre-defined)"
 				    " macro names cannot be undefined.");

@@ -396,13 +420,13 @@ control_line_success:
 		_glcpp_parser_skip_stack_pop (parser, & @1);
 	} NEWLINE
 |	HASH_TOKEN VERSION_TOKEN integer_constant NEWLINE {
-		if (parser->version_resolved) {
+		if (parser->version != 0) {
 			glcpp_error(& @1, parser, "#version must appear on the first line");
 		}
 		_glcpp_parser_handle_version_declaration(parser, $3, NULL, true);
 	}
 |	HASH_TOKEN VERSION_TOKEN integer_constant IDENTIFIER NEWLINE {
-		if (parser->version_resolved) {
+		if (parser->version != 0) {
 			glcpp_error(& @1, parser, "#version must appear on the first line");
 		}
 		_glcpp_parser_handle_version_declaration(parser, $3, $4, true);
@@ -1345,7 +1369,7 @@ glcpp_parser_create(const struct gl_extensions *extensions, gl_api api)

   parser->extensions = extensions;
   parser->api = api;
-   parser->version_resolved = false;
+   parser->version = 0;

   parser->has_new_line_number = 0;
   parser->new_line_number = 1;
@@ -2281,10 +2305,10 @@ _glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t versio
 {
   const struct gl_extensions *extensions = parser->extensions;

-   if (parser->version_resolved)
+   if (parser->version != 0)
      return;

-   parser->version_resolved = true;
+   parser->version = version;

   add_builtin_define (parser, "__VERSION__", version);

--- a/src/compiler/glsl/glcpp/glcpp.h
+++ b/src/compiler/glsl/glcpp/glcpp.h
@@ -196,7 +196,7 @@ struct glcpp_parser {
 	int error;
 	const struct gl_extensions *extensions;
 	gl_api api;
-	bool version_resolved;
+	unsigned version;
 	bool has_new_line_number;
 	int new_line_number;
 	bool has_new_source_number;
--- a/src/compiler/glsl/glcpp/tests/120-undef-builtin.c
+++ b/src/compiler/glsl/glcpp/tests/120-undef-builtin.c
@@ -1,3 +1,4 @@
+#version 300 es
 #undef __LINE__
 #undef __FILE__
 #undef __VERSION__
--- a/src/compiler/glsl/glcpp/tests/120-undef-builtin.c.expected
+++ b/src/compiler/glsl/glcpp/tests/120-undef-builtin.c.expected
@@ -1,6 +1,7 @@
-0:1(1): preprocessor error: Built-in (pre-defined) macro names cannot be undefined.
 0:2(1): preprocessor error: Built-in (pre-defined) macro names cannot be undefined.
 0:3(1): preprocessor error: Built-in (pre-defined) macro names cannot be undefined.
+0:4(1): preprocessor error: Built-in (pre-defined) macro names cannot be undefined.
+#version 300 es



--- a/src/compiler/glsl/glcpp/tests/147-undef-builtin-allowed.c
+++ b/src/compiler/glsl/glcpp/tests/147-undef-builtin-allowed.c
@@ -0,0 +1,4 @@
+#version 110
+#undef __LINE__
+#undef __FILE__
+#undef __VERSION__
--- a/src/compiler/glsl/glcpp/tests/147-undef-builtin-allowed.c.expected
+++ b/src/compiler/glsl/glcpp/tests/147-undef-builtin-allowed.c.expected
@@ -0,0 +1,4 @@
+#version 110
+
+
+
--- a/src/compiler/glsl/glsl_lexer.ll
+++ b/src/compiler/glsl/glsl_lexer.ll
@@ -348,10 +348,10 @@ isampler2DMSArray  KEYWORD_WITH_ALT(150, 300, 150, 320, yyextra->ARB_texture_mul
 usampler2DMSArray  KEYWORD_WITH_ALT(150, 300, 150, 320, yyextra->ARB_texture_multisample_enable || yyextra->OES_texture_storage_multisample_2d_array_enable, USAMPLER2DMSARRAY);

   /* keywords available with ARB_texture_cube_map_array_enable extension on desktop GLSL */
-samplerCubeArray   KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_texture_cube_map_array_enable, SAMPLERCUBEARRAY);
-isamplerCubeArray KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_texture_cube_map_array_enable, ISAMPLERCUBEARRAY);
-usamplerCubeArray KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_texture_cube_map_array_enable, USAMPLERCUBEARRAY);
-samplerCubeArrayShadow   KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_texture_cube_map_array_enable, SAMPLERCUBEARRAYSHADOW);
+samplerCubeArray   KEYWORD_WITH_ALT(400, 310, 400, 0, yyextra->ARB_texture_cube_map_array_enable, SAMPLERCUBEARRAY);
+isamplerCubeArray KEYWORD_WITH_ALT(400, 310, 400, 0, yyextra->ARB_texture_cube_map_array_enable, ISAMPLERCUBEARRAY);
+usamplerCubeArray KEYWORD_WITH_ALT(400, 310, 400, 0, yyextra->ARB_texture_cube_map_array_enable, USAMPLERCUBEARRAY);
+samplerCubeArrayShadow   KEYWORD_WITH_ALT(400, 310, 400, 0, yyextra->ARB_texture_cube_map_array_enable, SAMPLERCUBEARRAYSHADOW);

 samplerExternalOES		{
 			  if (yyextra->OES_EGL_image_external_enable)
--- a/src/compiler/glsl/glsl_parser.yy
+++ b/src/compiler/glsl/glsl_parser.yy
@@ -1784,8 +1784,10 @@ type_qualifier:
       * variables. As only outputs can be declared as invariant, an invariant
       * output from one shader stage will still match an input of a subsequent
       * stage without the input being declared as invariant."
+       *
+       * On the desktop side, this text first appears in GLSL 4.30.
       */
-      if (state->es_shader && state->language_version >= 300 && $$.flags.q.in)
+      if (state->is_version(430, 300) && $$.flags.q.in)
         _mesa_glsl_error(&@1, state, "invariant qualifiers cannot be used with shader inputs");
   }
   | interpolation_qualifier type_qualifier
--- a/src/compiler/glsl/ir.h
+++ b/src/compiler/glsl/ir.h
@@ -586,6 +586,13 @@ public:
      return this->u.state_slots;
   }

+   inline bool is_interpolation_flat() const
+   {
+      return this->data.interpolation == INTERP_QUALIFIER_FLAT ||
+             this->type->contains_integer() ||
+             this->type->contains_double();
+   }
+
   inline bool is_name_ralloced() const
   {
      return this->name != ir_variable::tmp_name;
--- a/src/compiler/glsl/ir_hv_accept.cpp
+++ b/src/compiler/glsl/ir_hv_accept.cpp
@@ -147,7 +147,7 @@ ir_expression::accept(ir_hierarchical_visitor *v)
 	 goto done;

      case visit_stop:
-	 return s;
+	 return visit_stop;
      }
   }

--- a/src/compiler/glsl/ir_set_program_inouts.cpp
+++ b/src/compiler/glsl/ir_set_program_inouts.cpp
@@ -260,15 +260,19 @@ ir_set_program_inouts_visitor::try_mark_partial_variable(ir_variable *var,
    * lowering passes (do_vec_index_to_swizzle() gets rid of indexing into
    * vectors, and lower_packed_varyings() gets rid of structs that occur in
    * varyings).
+    *
+    * However, we don't use varying packing in all cases - tessellation
+    * shaders bypass it.  This means we'll see varying structs and arrays
+    * of structs here.  For now, we just give up so the caller marks the
+    * entire variable as used.
    */
   if (!(type->is_matrix() ||
        (type->is_array() &&
         (type->fields.array->is_numeric() ||
          type->fields.array->is_boolean())))) {
-      assert(!"Unexpected indexing in ir_set_program_inouts");

-      /* For safety in release builds, in case we ever encounter unexpected
-       * indexing, give up and let the caller mark the whole variable as used.
+      /* If we don't know how to handle this case, give up and let the
+       * caller mark the whole variable as used.
       */
      return false;
   }
--- a/src/compiler/glsl/link_varyings.cpp
+++ b/src/compiler/glsl/link_varyings.cpp
@@ -308,7 +308,25 @@ cross_validate_types_and_qualifiers(struct gl_shader_program *prog,
      return;
   }

-   if (!prog->IsES && input->data.invariant != output->data.invariant) {
+   /* The GLSL 4.30 and GLSL ES 3.00 specifications say:
+    *
+    *    "As only outputs need be declared with invariant, an output from
+    *     one shader stage will still match an input of a subsequent stage
+    *     without the input being declared as invariant."
+    *
+    * while GLSL 4.20 says:
+    *
+    *    "For variables leaving one shader and coming into another shader,
+    *     the invariant keyword has to be used in both shaders, or a link
+    *     error will result."
+    *
+    * and GLSL ES 1.00 section 4.6.4 "Invariance and Linking" says:
+    *
+    *    "The invariance of varyings that are declared in both the vertex
+    *     and fragment shaders must match."
+    */
+   if (input->data.invariant != output->data.invariant &&
+       prog->Version < (prog->IsES ? 300 : 430)) {
      linker_error(prog,
                   "%s shader output `%s' %s invariant qualifier, "
                   "but %s shader input %s invariant qualifier\n",
@@ -1610,7 +1628,8 @@ varying_matches::compute_packing_class(const ir_variable *var)
   unsigned packing_class = var->data.centroid | (var->data.sample << 1) |
                            (var->data.patch << 2);
   packing_class *= 4;
-   packing_class += var->data.interpolation;
+   packing_class += var->is_interpolation_flat()
+      ? unsigned(INTERP_QUALIFIER_FLAT) : var->data.interpolation;
   return packing_class;
 }

--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -3687,6 +3687,18 @@ create_shader_variable(struct gl_shader_program *shProg,
   if (in->data.mode == ir_var_system_value &&
       in->data.location == SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) {
      out->name = ralloc_strdup(shProg, "gl_VertexID");
+   } else if ((in->data.mode == ir_var_shader_out &&
+               in->data.location == VARYING_SLOT_TESS_LEVEL_OUTER) ||
+              (in->data.mode == ir_var_system_value &&
+               in->data.location == SYSTEM_VALUE_TESS_LEVEL_OUTER)) {
+      out->name = ralloc_strdup(shProg, "gl_TessLevelOuter");
+      type = glsl_type::get_array_instance(glsl_type::float_type, 4);
+   } else if ((in->data.mode == ir_var_shader_out &&
+               in->data.location == VARYING_SLOT_TESS_LEVEL_INNER) ||
+              (in->data.mode == ir_var_system_value &&
+               in->data.location == SYSTEM_VALUE_TESS_LEVEL_INNER)) {
+      out->name = ralloc_strdup(shProg, "gl_TessLevelInner");
+      type = glsl_type::get_array_instance(glsl_type::float_type, 2);
   } else {
      out->name = ralloc_strdup(shProg, name);
   }
@@ -3839,6 +3851,9 @@ add_interface_variables(struct gl_shader_program *shProg,
         continue;
      };

+      if (var->data.patch)
+         loc_bias = int(VARYING_SLOT_PATCH0);
+
      /* Skip packed varyings, packed varyings are handled separately
       * by add_packed_varyings.
       */
--- a/src/compiler/glsl/lower_packed_varyings.cpp
+++ b/src/compiler/glsl/lower_packed_varyings.cpp
@@ -273,11 +273,11 @@ lower_packed_varyings_visitor::run(struct gl_shader *shader)
         continue;

      /* This lowering pass is only capable of packing floats and ints
-       * together when their interpolation mode is "flat".  Therefore, to be
-       * safe, caller should ensure that integral varyings always use flat
-       * interpolation, even when this is not required by GLSL.
+       * together when their interpolation mode is "flat".  Treat integers as
+       * being flat when the interpolation mode is none.
       */
      assert(var->data.interpolation == INTERP_QUALIFIER_FLAT ||
+             var->data.interpolation == INTERP_QUALIFIER_NONE ||
             !var->type->contains_integer());

      /* Clone the variable for program resource list before
@@ -607,7 +607,7 @@ lower_packed_varyings_visitor::get_packed_varying_deref(
   if (this->packed_varyings[slot] == NULL) {
      char *packed_name = ralloc_asprintf(this->mem_ctx, "packed:%s", name);
      const glsl_type *packed_type;
-      if (unpacked_var->data.interpolation == INTERP_QUALIFIER_FLAT)
+      if (unpacked_var->is_interpolation_flat())
         packed_type = glsl_type::ivec4_type;
      else
         packed_type = glsl_type::vec4_type;
@@ -627,7 +627,8 @@ lower_packed_varyings_visitor::get_packed_varying_deref(
      packed_var->data.centroid = unpacked_var->data.centroid;
      packed_var->data.sample = unpacked_var->data.sample;
      packed_var->data.patch = unpacked_var->data.patch;
-      packed_var->data.interpolation = unpacked_var->data.interpolation;
+      packed_var->data.interpolation = packed_type == glsl_type::ivec4_type
+         ? unsigned(INTERP_QUALIFIER_FLAT) : unpacked_var->data.interpolation;
      packed_var->data.location = location;
      packed_var->data.precision = unpacked_var->data.precision;
      packed_var->data.always_active_io = unpacked_var->data.always_active_io;
--- a/src/compiler/glsl/opt_conditional_discard.cpp
+++ b/src/compiler/glsl/opt_conditional_discard.cpp
@@ -72,7 +72,14 @@ opt_conditional_discard_visitor::visit_leave(ir_if *ir)

   /* Move the condition and replace the ir_if with the ir_discard. */
   ir_discard *discard = (ir_discard *) ir->then_instructions.head;
-   discard->condition = ir->condition;
+   if (!discard->condition)
+      discard->condition = ir->condition;
+   else {
+      void *ctx = ralloc_parent(ir);
+      discard->condition = new(ctx) ir_expression(ir_binop_logic_and,
+                                                  ir->condition,
+                                                  discard->condition);
+   }
   ir->replace_with(discard);

   progress = true;
--- a/src/compiler/glsl_types.cpp
+++ b/src/compiler/glsl_types.cpp
@@ -1079,7 +1079,7 @@ function_key_compare(const void *a, const void *b)
   const glsl_type *const key2 = (glsl_type *) b;

   if (key1->length != key2->length)
-      return 1;
+      return false;

   return memcmp(key1->fields.parameters, key2->fields.parameters,
                 (key1->length + 1) * sizeof(*key1->fields.parameters)) == 0;
@@ -1090,20 +1090,8 @@ static uint32_t
 function_key_hash(const void *a)
 {
   const glsl_type *const key = (glsl_type *) a;
-   char hash_key[128];
-   unsigned size = 0;
-
-   size = snprintf(hash_key, sizeof(hash_key), "%08x", key->length);
-
-   for (unsigned i = 0; i < key->length; i++) {
-      if (size >= sizeof(hash_key))
-	 break;
-
-      size += snprintf(& hash_key[size], sizeof(hash_key) - size,
-		       "%p", (void *) key->fields.structure[i].type);
-   }
-
-   return _mesa_hash_string(hash_key);
+   return _mesa_hash_data(key->fields.parameters,
+                          (key->length + 1) * sizeof(*key->fields.parameters));
 }

 const glsl_type *
--- a/src/compiler/nir/nir.c
+++ b/src/compiler/nir/nir.c
@@ -659,6 +659,122 @@ nir_copy_deref(void *mem_ctx, nir_deref *deref)
   return NULL;
 }

+/* This is the second step in the recursion.  We've found the tail and made a
+ * copy.  Now we need to iterate over all possible leaves and call the
+ * callback on each one.
+ */
+static bool
+deref_foreach_leaf_build_recur(nir_deref_var *deref, nir_deref *tail,
+                               nir_deref_foreach_leaf_cb cb, void *state)
+{
+   unsigned length;
+   union {
+      nir_deref_array arr;
+      nir_deref_struct str;
+   } tmp;
+
+   assert(tail->child == NULL);
+   switch (glsl_get_base_type(tail->type)) {
+   case GLSL_TYPE_UINT:
+   case GLSL_TYPE_INT:
+   case GLSL_TYPE_FLOAT:
+   case GLSL_TYPE_DOUBLE:
+   case GLSL_TYPE_BOOL:
+      if (glsl_type_is_vector_or_scalar(tail->type))
+         return cb(deref, state);
+      /* Fall Through */
+
+   case GLSL_TYPE_ARRAY:
+      tmp.arr.deref.deref_type = nir_deref_type_array;
+      tmp.arr.deref.type = glsl_get_array_element(tail->type);
+      tmp.arr.deref_array_type = nir_deref_array_type_direct;
+      tmp.arr.indirect = NIR_SRC_INIT;
+      tail->child = &tmp.arr.deref;
+
+      length = glsl_get_length(tail->type);
+      for (unsigned i = 0; i < length; i++) {
+         tmp.arr.deref.child = NULL;
+         tmp.arr.base_offset = i;
+         if (!deref_foreach_leaf_build_recur(deref, &tmp.arr.deref, cb, state))
+            return false;
+      }
+      return true;
+
+   case GLSL_TYPE_STRUCT:
+      tmp.str.deref.deref_type = nir_deref_type_struct;
+      tail->child = &tmp.str.deref;
+
+      length = glsl_get_length(tail->type);
+      for (unsigned i = 0; i < length; i++) {
+         tmp.arr.deref.child = NULL;
+         tmp.str.deref.type = glsl_get_struct_field(tail->type, i);
+         tmp.str.index = i;
+         if (!deref_foreach_leaf_build_recur(deref, &tmp.arr.deref, cb, state))
+            return false;
+      }
+      return true;
+
+   default:
+      unreachable("Invalid type for dereference");
+   }
+}
+
+/* This is the first step of the foreach_leaf recursion.  In this step we are
+ * walking to the end of the deref chain and making a copy in the stack as we
+ * go.  This is because we don't want to mutate the deref chain that was
+ * passed in by the caller.  The downside is that this deref chain is on the
+ * stack and , if the caller wants to do anything with it, they will have to
+ * make their own copy because this one will go away.
+ */
+static bool
+deref_foreach_leaf_copy_recur(nir_deref_var *deref, nir_deref *tail,
+                              nir_deref_foreach_leaf_cb cb, void *state)
+{
+   union {
+      nir_deref_array arr;
+      nir_deref_struct str;
+   } c;
+
+   if (tail->child) {
+      switch (tail->child->deref_type) {
+      case nir_deref_type_array:
+         c.arr = *nir_deref_as_array(tail->child);
+         tail->child = &c.arr.deref;
+         return deref_foreach_leaf_copy_recur(deref, &c.arr.deref, cb, state);
+
+      case nir_deref_type_struct:
+         c.str = *nir_deref_as_struct(tail->child);
+         tail->child = &c.str.deref;
+         return deref_foreach_leaf_copy_recur(deref, &c.str.deref, cb, state);
+
+      case nir_deref_type_var:
+      default:
+         unreachable("Invalid deref type for a child");
+      }
+   } else {
+      /* We've gotten to the end of the original deref.  Time to start
+       * building our own derefs.
+       */
+      return deref_foreach_leaf_build_recur(deref, tail, cb, state);
+   }
+}
+
+/**
+ * This function iterates over all of the possible derefs that can be created
+ * with the given deref as the head.  It then calls the provided callback with
+ * a full deref for each one.
+ *
+ * The deref passed to the callback will be allocated on the stack.  You will
+ * need to make a copy if you want it to hang around.
+ */
+bool
+nir_deref_foreach_leaf(nir_deref_var *deref,
+                       nir_deref_foreach_leaf_cb cb, void *state)
+{
+   nir_deref_var copy = *deref;
+   return deref_foreach_leaf_copy_recur(&copy, &copy.deref, cb, state);
+}
+
 /* Returns a load_const instruction that represents the constant
 * initializer for the given deref chain.  The caller is responsible for
 * ensuring that there actually is a constant initializer.
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1923,6 +1923,10 @@ nir_deref_struct *nir_deref_struct_create(void *mem_ctx, unsigned field_index);

 nir_deref *nir_copy_deref(void *mem_ctx, nir_deref *deref);

+typedef bool (*nir_deref_foreach_leaf_cb)(nir_deref_var *deref, void *state);
+bool nir_deref_foreach_leaf(nir_deref_var *deref,
+                            nir_deref_foreach_leaf_cb cb, void *state);
+
 nir_load_const_instr *
 nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref);

--- a/src/compiler/nir/nir_builder.h
+++ b/src/compiler/nir/nir_builder.h
@@ -317,6 +317,25 @@ nir_fdot(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1)
   return NULL;
 }

+static inline nir_ssa_def *
+nir_bany_inequal(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1)
+{
+   switch (src0->num_components) {
+   case 1: return nir_ine(b, src0, src1);
+   case 2: return nir_bany_inequal2(b, src0, src1);
+   case 3: return nir_bany_inequal3(b, src0, src1);
+   case 4: return nir_bany_inequal4(b, src0, src1);
+   default:
+      unreachable("bad component size");
+   }
+}
+
+static inline nir_ssa_def *
+nir_bany(nir_builder *b, nir_ssa_def *src)
+{
+   return nir_bany_inequal(b, src, nir_imm_int(b, 0));
+}
+
 static inline nir_ssa_def *
 nir_channel(nir_builder *b, nir_ssa_def *def, unsigned c)
 {
--- a/src/compiler/nir/nir_inline_functions.c
+++ b/src/compiler/nir/nir_inline_functions.c
@@ -25,6 +25,20 @@
 #include "nir_builder.h"
 #include "nir_control_flow.h"

+static bool
+deref_apply_constant_initializer(nir_deref_var *deref, void *state)
+{
+   struct nir_builder *b = state;
+
+   nir_load_const_instr *initializer =
+      nir_deref_get_const_initializer_load(b->shader, deref);
+   nir_builder_instr_insert(b, &initializer->instr);
+
+   nir_store_deref_var(b, deref, &initializer->def, 0xf);
+
+   return true;
+}
+
 static bool inline_function_impl(nir_function_impl *impl, struct set *inlined);

 static void
@@ -174,11 +188,35 @@ inline_functions_block(nir_block *block, nir_builder *b,
      /* Add copies of all in parameters */
      assert(call->num_params == callee_copy->num_params);

+      b->cursor = nir_before_instr(&call->instr);
+
+      /* Before we insert the copy of the function, we need to lower away
+       * constant initializers on local variables.  This is because constant
+       * initializers happen (effectively) at the top of the function and,
+       * since these are about to become locals of the calling function,
+       * initialization will happen at the top of the caller rather than at
+       * the top of the callee.  This isn't usually a problem, but if we are
+       * being inlined inside of a loop, it can result in the variable not
+       * getting re-initialized properly for all loop iterations.
+       */
+      nir_foreach_variable(local, &callee_copy->locals) {
+         if (!local->constant_initializer)
+            continue;
+
+         nir_deref_var deref;
+         deref.deref.deref_type = nir_deref_type_var,
+         deref.deref.child = NULL;
+         deref.deref.type = local->type,
+         deref.var = local;
+
+         nir_deref_foreach_leaf(&deref, deref_apply_constant_initializer, b);
+
+         local->constant_initializer = NULL;
+      }
+
      exec_list_append(&b->impl->locals, &callee_copy->locals);
      exec_list_append(&b->impl->registers, &callee_copy->registers);

-      b->cursor = nir_before_instr(&call->instr);
-
      /* We now need to tie the two functions together using the
       * parameters.  There are two ways we do this: One is to turn the
       * parameter into a local variable and do a shadow-copy.  The other
--- a/src/compiler/nir/nir_intrinsics.h
+++ b/src/compiler/nir/nir_intrinsics.h
@@ -41,6 +41,8 @@

 #define ARR(...) { __VA_ARGS__ }

+INTRINSIC(nop, 0, ARR(0), false, 0, 0, 0, xx, xx, xx,
+          NIR_INTRINSIC_CAN_ELIMINATE)

 INTRINSIC(load_var, 0, ARR(0), true, 0, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
 INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 1, WRMASK, xx, xx, 0)
@@ -266,16 +268,16 @@ INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1, 1), true, 1, 0, 0, xx, xx, xx,
 *    in shared_atomic_add, etc).
 * 2: For CompSwap only: the second data parameter.
 */
-INTRINSIC(shared_atomic_add, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_imin, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_umin, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_imax, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_umax, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_and, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_or, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_xor, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
+INTRINSIC(shared_atomic_add, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_imin, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_umin, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_imax, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_umax, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_and, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_or, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_xor, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 1, BASE, xx, xx, 0)
+INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 1, BASE, xx, xx, 0)

 #define SYSTEM_VALUE(name, components, num_indices, idx0, idx1, idx2) \
   INTRINSIC(load_##name, 0, ARR(0), true, components, 0, num_indices, \
--- a/src/compiler/nir/nir_lower_alu_to_scalar.c
+++ b/src/compiler/nir/nir_lower_alu_to_scalar.c
@@ -254,6 +254,9 @@ nir_lower_alu_to_scalar_impl(nir_function_impl *impl)
            lower_alu_instr_scalar(nir_instr_as_alu(instr), &builder);
      }
   }
+
+   nir_metadata_preserve(impl, nir_metadata_block_index |
+                               nir_metadata_dominance);
 }

 void
--- a/src/compiler/nir/nir_lower_vars_to_ssa.c
+++ b/src/compiler/nir/nir_lower_vars_to_ssa.c
@@ -471,7 +471,7 @@ lower_copies_to_load_store(struct deref_node *node,
   return true;
 }

-/* Performs variable renaming by doing a DFS of the dominance tree
+/* Performs variable renaming
 *
 * This algorithm is very similar to the one outlined in "Efficiently
 * Computing Static Single Assignment Form and the Control Dependence
@@ -479,133 +479,132 @@ lower_copies_to_load_store(struct deref_node *node,
 * SSA def on the stack per block.
 */
 static bool
-rename_variables_block(nir_block *block, struct lower_variables_state *state)
+rename_variables(struct lower_variables_state *state)
 {
   nir_builder b;
   nir_builder_init(&b, state->impl);

-   nir_foreach_instr_safe(instr, block) {
-      if (instr->type != nir_instr_type_intrinsic)
-         continue;
+   nir_foreach_block(block, state->impl) {
+      nir_foreach_instr_safe(instr, block) {
+         if (instr->type != nir_instr_type_intrinsic)
+            continue;

-      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);

-      switch (intrin->intrinsic) {
-      case nir_intrinsic_load_var: {
-         struct deref_node *node =
-            get_deref_node(intrin->variables[0], state);
+         switch (intrin->intrinsic) {
+         case nir_intrinsic_load_var: {
+            struct deref_node *node =
+               get_deref_node(intrin->variables[0], state);

-         if (node == NULL) {
-            /* If we hit this path then we are referencing an invalid
-             * value.  Most likely, we unrolled something and are
-             * reading past the end of some array.  In any case, this
-             * should result in an undefined value.
-             */
-            nir_ssa_undef_instr *undef =
-               nir_ssa_undef_instr_create(state->shader,
-                                          intrin->num_components,
-                                          intrin->dest.ssa.bit_size);
+            if (node == NULL) {
+               /* If we hit this path then we are referencing an invalid
+                * value.  Most likely, we unrolled something and are
+                * reading past the end of some array.  In any case, this
+                * should result in an undefined value.
+                */
+               nir_ssa_undef_instr *undef =
+                  nir_ssa_undef_instr_create(state->shader,
+                                             intrin->num_components,
+                                             intrin->dest.ssa.bit_size);

-            nir_instr_insert_before(&intrin->instr, &undef->instr);
+               nir_instr_insert_before(&intrin->instr, &undef->instr);
+               nir_instr_remove(&intrin->instr);
+
+               nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+                                        nir_src_for_ssa(&undef->def));
+               continue;
+            }
+
+            if (!node->lower_to_ssa)
+               continue;
+
+            nir_alu_instr *mov = nir_alu_instr_create(state->shader,
+                                                      nir_op_imov);
+            mov->src[0].src = nir_src_for_ssa(
+               nir_phi_builder_value_get_block_def(node->pb_value, block));
+            for (unsigned i = intrin->num_components; i < 4; i++)
+               mov->src[0].swizzle[i] = 0;
+
+            assert(intrin->dest.is_ssa);
+
+            mov->dest.write_mask = (1 << intrin->num_components) - 1;
+            nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
+                              intrin->num_components,
+                              intrin->dest.ssa.bit_size, NULL);
+
+            nir_instr_insert_before(&intrin->instr, &mov->instr);
            nir_instr_remove(&intrin->instr);

            nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
-                                     nir_src_for_ssa(&undef->def));
-            continue;
+                                     nir_src_for_ssa(&mov->dest.dest.ssa));
+            break;
         }

-         if (!node->lower_to_ssa)
-            continue;
+         case nir_intrinsic_store_var: {
+            struct deref_node *node =
+               get_deref_node(intrin->variables[0], state);

-         nir_alu_instr *mov = nir_alu_instr_create(state->shader,
-                                                   nir_op_imov);
-         mov->src[0].src = nir_src_for_ssa(
-            nir_phi_builder_value_get_block_def(node->pb_value, block));
-         for (unsigned i = intrin->num_components; i < 4; i++)
-            mov->src[0].swizzle[i] = 0;
-
-         assert(intrin->dest.is_ssa);
-
-         mov->dest.write_mask = (1 << intrin->num_components) - 1;
-         nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
-                           intrin->num_components,
-                           intrin->dest.ssa.bit_size, NULL);
-
-         nir_instr_insert_before(&intrin->instr, &mov->instr);
-         nir_instr_remove(&intrin->instr);
-
-         nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
-                                  nir_src_for_ssa(&mov->dest.dest.ssa));
-         break;
-      }
-
-      case nir_intrinsic_store_var: {
-         struct deref_node *node =
-            get_deref_node(intrin->variables[0], state);
-
-         if (node == NULL) {
-            /* Probably an out-of-bounds array store.  That should be a
-             * no-op. */
-            nir_instr_remove(&intrin->instr);
-            continue;
-         }
-
-         if (!node->lower_to_ssa)
-            continue;
-
-         assert(intrin->num_components ==
-                glsl_get_vector_elements(node->type));
-
-         assert(intrin->src[0].is_ssa);
-
-         nir_ssa_def *new_def;
-         b.cursor = nir_before_instr(&intrin->instr);
-
-         unsigned wrmask = nir_intrinsic_write_mask(intrin);
-         if (wrmask == (1 << intrin->num_components) - 1) {
-            /* Whole variable store - just copy the source.  Note that
-             * intrin->num_components and intrin->src[0].ssa->num_components
-             * may differ.
-             */
-            unsigned swiz[4];
-            for (unsigned i = 0; i < 4; i++)
-               swiz[i] = i < intrin->num_components ? i : 0;
-
-            new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz,
-                                  intrin->num_components, false);
-         } else {
-            nir_ssa_def *old_def =
-               nir_phi_builder_value_get_block_def(node->pb_value, block);
-            /* For writemasked store_var intrinsics, we combine the newly
-             * written values with the existing contents of unwritten
-             * channels, creating a new SSA value for the whole vector.
-             */
-            nir_ssa_def *srcs[4];
-            for (unsigned i = 0; i < intrin->num_components; i++) {
-               if (wrmask & (1 << i)) {
-                  srcs[i] = nir_channel(&b, intrin->src[0].ssa, i);
-               } else {
-                  srcs[i] = nir_channel(&b, old_def, i);
-               }
+            if (node == NULL) {
+               /* Probably an out-of-bounds array store.  That should be a
+                * no-op. */
+               nir_instr_remove(&intrin->instr);
+               continue;
            }
-            new_def = nir_vec(&b, srcs, intrin->num_components);
+
+            if (!node->lower_to_ssa)
+               continue;
+
+            assert(intrin->num_components ==
+                   glsl_get_vector_elements(node->type));
+
+            assert(intrin->src[0].is_ssa);
+
+            nir_ssa_def *new_def;
+            b.cursor = nir_before_instr(&intrin->instr);
+
+            unsigned wrmask = nir_intrinsic_write_mask(intrin);
+            if (wrmask == (1 << intrin->num_components) - 1) {
+               /* Whole variable store - just copy the source.  Note that
+                * intrin->num_components and intrin->src[0].ssa->num_components
+                * may differ.
+                */
+               unsigned swiz[4];
+               for (unsigned i = 0; i < 4; i++)
+                  swiz[i] = i < intrin->num_components ? i : 0;
+
+               new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz,
+                                     intrin->num_components, false);
+            } else {
+               nir_ssa_def *old_def =
+                  nir_phi_builder_value_get_block_def(node->pb_value, block);
+               /* For writemasked store_var intrinsics, we combine the newly
+                * written values with the existing contents of unwritten
+                * channels, creating a new SSA value for the whole vector.
+                */
+               nir_ssa_def *srcs[4];
+               for (unsigned i = 0; i < intrin->num_components; i++) {
+                  if (wrmask & (1 << i)) {
+                     srcs[i] = nir_channel(&b, intrin->src[0].ssa, i);
+                  } else {
+                     srcs[i] = nir_channel(&b, old_def, i);
+                  }
+               }
+               new_def = nir_vec(&b, srcs, intrin->num_components);
+            }
+
+            assert(new_def->num_components == intrin->num_components);
+
+            nir_phi_builder_value_set_block_def(node->pb_value, block, new_def);
+            nir_instr_remove(&intrin->instr);
+            break;
         }

-         assert(new_def->num_components == intrin->num_components);
-
-         nir_phi_builder_value_set_block_def(node->pb_value, block, new_def);
-         nir_instr_remove(&intrin->instr);
-         break;
-      }
-
-      default:
-         break;
+         default:
+            break;
+         }
      }
   }

-   for (unsigned i = 0; i < block->num_dom_children; ++i)
-      rename_variables_block(block->dom_children[i], state);
-
   return true;
 }

@@ -737,7 +736,7 @@ nir_lower_vars_to_ssa_impl(nir_function_impl *impl)
      }
   }

-   rename_variables_block(nir_start_block(impl), &state);
+   rename_variables(&state);

   nir_phi_builder_finish(state.phi_builder);

--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -257,7 +257,7 @@ unpack_4x8("unorm")
 unpack_2x16("half")

 unop_horiz("pack_uvec2_to_uint", 1, tuint32, 2, tuint32, """
-dst.x = (src0.x & 0xffff) | (src0.y >> 16);
+dst.x = (src0.x & 0xffff) | (src0.y << 16);
 """)

 unop_horiz("pack_uvec4_to_uint", 1, tuint32, 4, tuint32, """
--- a/src/compiler/nir/nir_phi_builder.h
+++ b/src/compiler/nir/nir_phi_builder.h
@@ -44,7 +44,8 @@
 *         var.pb_val = nir_phi_builder_add_value(pb, var.defs)
 *
 *     // Visit each block.  This needs to visit dominators first;
- *     // nir_for_each_block() will be ok.
+ *     // nir_foreach_block() will be ok.
+ *
 *     foreach block:
 *         foreach instruction:
 *             foreach use of variable var:
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -1718,8 +1718,8 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode,
      break;

   case SpvOpAtomicCompareExchange:
-      intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def);
-      intrin->src[3] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def);
+      intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[8])->def);
+      intrin->src[3] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def);
      break;

   case SpvOpAtomicISub:
@@ -1816,8 +1816,8 @@ fill_common_atomic_sources(struct vtn_builder *b, SpvOp opcode,
      break;

   case SpvOpAtomicCompareExchange:
-      src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def);
-      src[1] = nir_src_for_ssa(vtn_ssa_value(b, w[8])->def);
+      src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[8])->def);
+      src[1] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def);
      break;
      /* Fall through */

--- a/src/compiler/spirv/vtn_cfg.c
+++ b/src/compiler/spirv/vtn_cfg.c
@@ -239,12 +239,12 @@ vtn_get_branch_type(struct vtn_block *block,
             swcase->fallthrough == block->switch_case);
      swcase->fallthrough = block->switch_case;
      return vtn_branch_type_switch_fallthrough;
-   } else if (block == switch_break) {
-      return vtn_branch_type_switch_break;
   } else if (block == loop_break) {
      return vtn_branch_type_loop_break;
   } else if (block == loop_cont) {
      return vtn_branch_type_loop_continue;
+   } else if (block == switch_break) {
+      return vtn_branch_type_switch_break;
   } else {
      return vtn_branch_type_none;
   }
@@ -443,6 +443,19 @@ vtn_cfg_walk_blocks(struct vtn_builder *b, struct list_head *cf_list,
            vtn_order_case(swtch, case_block->switch_case);
         }

+         enum vtn_branch_type branch_type =
+            vtn_get_branch_type(break_block, switch_case, NULL,
+                                loop_break, loop_cont);
+
+         if (branch_type != vtn_branch_type_none) {
+            /* It is possible that the break is actually the continue block
+             * for the containing loop.  In this case, we need to bail and let
+             * the loop parsing code handle the continue properly.
+             */
+            assert(branch_type == vtn_branch_type_loop_continue);
+            return;
+         }
+
         block = break_block;
         continue;
      }
@@ -518,7 +531,7 @@ vtn_handle_phi_second_pass(struct vtn_builder *b, SpvOp opcode,
      struct vtn_block *pred =
         vtn_value(b, w[i + 1], vtn_value_type_block)->block;

-      b->nb.cursor = nir_after_block_before_jump(pred->end_block);
+      b->nb.cursor = nir_after_instr(&pred->end_nop->instr);

      vtn_local_store(b, src, nir_deref_var_create(b, phi_var));
   }
@@ -576,7 +589,9 @@ vtn_emit_cf_list(struct vtn_builder *b, struct list_head *cf_list,

         vtn_foreach_instruction(b, block_start, block_end, handler);

-         block->end_block = nir_cursor_current_block(b->nb.cursor);
+         block->end_nop = nir_intrinsic_instr_create(b->nb.shader,
+                                                     nir_intrinsic_nop);
+         nir_builder_instr_insert(&b->nb, &block->end_nop->instr);

         if ((*block->branch & SpvOpCodeMask) == SpvOpReturnValue) {
            struct vtn_ssa_value *src = vtn_ssa_value(b, block->branch[1]);
--- a/src/compiler/spirv/vtn_private.h
+++ b/src/compiler/spirv/vtn_private.h
@@ -149,8 +149,8 @@ struct vtn_block {
   /** Points to the switch case started by this block (if any) */
   struct vtn_case *switch_case;

-   /** The last block in this SPIR-V block. */
-   nir_block *end_block;
+   /** Every block ends in a nop intrinsic so that we can find it again */
+   nir_intrinsic_instr *end_nop;
 };

 struct vtn_function {
--- a/src/compiler/spirv/vtn_variables.c
+++ b/src/compiler/spirv/vtn_variables.c
@@ -889,81 +889,9 @@ vtn_get_builtin_location(struct vtn_builder *b,
 }

 static void
-var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member,
-                  const struct vtn_decoration *dec, void *void_var)
+apply_var_decoration(struct vtn_builder *b, nir_variable *nir_var,
+                     const struct vtn_decoration *dec)
 {
-   struct vtn_variable *vtn_var = void_var;
-
-   /* Handle decorations that apply to a vtn_variable as a whole */
-   switch (dec->decoration) {
-   case SpvDecorationBinding:
-      vtn_var->binding = dec->literals[0];
-      return;
-   case SpvDecorationDescriptorSet:
-      vtn_var->descriptor_set = dec->literals[0];
-      return;
-   default:
-      break;
-   }
-
-   /* Now we handle decorations that apply to a particular nir_variable */
-   nir_variable *nir_var = vtn_var->var;
-   if (val->value_type == vtn_value_type_access_chain) {
-      assert(val->access_chain->length == 0);
-      assert(val->access_chain->var == void_var);
-      assert(member == -1);
-   } else {
-      assert(val->value_type == vtn_value_type_type);
-      if (member != -1)
-         nir_var = vtn_var->members[member];
-   }
-
-   /* Location is odd in that it can apply in three different cases: To a
-    * non-split variable, to a whole split variable, or to one structure
-    * member of a split variable.
-    */
-   if (dec->decoration == SpvDecorationLocation) {
-      unsigned location = dec->literals[0];
-      bool is_vertex_input;
-      if (b->shader->stage == MESA_SHADER_FRAGMENT &&
-          vtn_var->mode == vtn_variable_mode_output) {
-         is_vertex_input = false;
-         location += FRAG_RESULT_DATA0;
-      } else if (b->shader->stage == MESA_SHADER_VERTEX &&
-                 vtn_var->mode == vtn_variable_mode_input) {
-         is_vertex_input = true;
-         location += VERT_ATTRIB_GENERIC0;
-      } else if (vtn_var->mode == vtn_variable_mode_input ||
-                 vtn_var->mode == vtn_variable_mode_output) {
-         is_vertex_input = false;
-         location += VARYING_SLOT_VAR0;
-      } else {
-         assert(!"Location must be on input or output variable");
-      }
-
-      if (nir_var) {
-         /* This handles the member and lone variable cases */
-         nir_var->data.location = location;
-         nir_var->data.explicit_location = true;
-      } else {
-         /* This handles the structure member case */
-         assert(vtn_var->members);
-         unsigned length =
-            glsl_get_length(glsl_without_array(vtn_var->type->type));
-         for (unsigned i = 0; i < length; i++) {
-            vtn_var->members[i]->data.location = location;
-            vtn_var->members[i]->data.explicit_location = true;
-            location +=
-               glsl_count_attribute_slots(vtn_var->members[i]->interface_type,
-                                          is_vertex_input);
-         }
-      }
-      return;
-   }
-
-   if (nir_var == NULL)
-      return;
-
   switch (dec->decoration) {
   case SpvDecorationRelaxedPrecision:
      break; /* FIXME: Do nothing with this for now. */
@@ -1080,6 +1008,99 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member,
   }
 }

+static void
+var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member,
+                  const struct vtn_decoration *dec, void *void_var)
+{
+   struct vtn_variable *vtn_var = void_var;
+
+   /* Handle decorations that apply to a vtn_variable as a whole */
+   switch (dec->decoration) {
+   case SpvDecorationBinding:
+      vtn_var->binding = dec->literals[0];
+      return;
+   case SpvDecorationDescriptorSet:
+      vtn_var->descriptor_set = dec->literals[0];
+      return;
+   default:
+      break;
+   }
+
+   if (val->value_type == vtn_value_type_access_chain) {
+      assert(val->access_chain->length == 0);
+      assert(val->access_chain->var == void_var);
+      assert(member == -1);
+   } else {
+      assert(val->value_type == vtn_value_type_type);
+   }
+
+   /* Location is odd.  If applied to a split structure, we have to walk the
+    * whole thing and accumulate the location.  It's easier to handle as a
+    * special case.
+    */
+   if (dec->decoration == SpvDecorationLocation) {
+      unsigned location = dec->literals[0];
+      bool is_vertex_input;
+      if (b->shader->stage == MESA_SHADER_FRAGMENT &&
+          vtn_var->mode == vtn_variable_mode_output) {
+         is_vertex_input = false;
+         location += FRAG_RESULT_DATA0;
+      } else if (b->shader->stage == MESA_SHADER_VERTEX &&
+                 vtn_var->mode == vtn_variable_mode_input) {
+         is_vertex_input = true;
+         location += VERT_ATTRIB_GENERIC0;
+      } else if (vtn_var->mode == vtn_variable_mode_input ||
+                 vtn_var->mode == vtn_variable_mode_output) {
+         is_vertex_input = false;
+         location += VARYING_SLOT_VAR0;
+      } else {
+         assert(!"Location must be on input or output variable");
+      }
+
+      if (vtn_var->var) {
+         /* This handles the member and lone variable cases */
+         vtn_var->var->data.location = location;
+         vtn_var->var->data.explicit_location = true;
+      } else {
+         /* This handles the structure member case */
+         assert(vtn_var->members);
+         unsigned length =
+            glsl_get_length(glsl_without_array(vtn_var->type->type));
+         for (unsigned i = 0; i < length; i++) {
+            vtn_var->members[i]->data.location = location;
+            vtn_var->members[i]->data.explicit_location = true;
+            location +=
+               glsl_count_attribute_slots(vtn_var->members[i]->interface_type,
+                                          is_vertex_input);
+         }
+      }
+      return;
+   } else {
+      if (vtn_var->var) {
+         assert(member <= 0);
+         apply_var_decoration(b, vtn_var->var, dec);
+      } else if (vtn_var->members) {
+         if (member >= 0) {
+            assert(vtn_var->members);
+            apply_var_decoration(b, vtn_var->members[member], dec);
+         } else {
+            unsigned length =
+               glsl_get_length(glsl_without_array(vtn_var->type->type));
+            for (unsigned i = 0; i < length; i++)
+               apply_var_decoration(b, vtn_var->members[i], dec);
+         }
+      } else {
+         /* A few variables, those with external storage, have no actual
+          * nir_variables associated with them.  Fortunately, all decorations
+          * we care about for those variables are on the type only.
+          */
+         assert(vtn_var->mode == vtn_variable_mode_ubo ||
+                vtn_var->mode == vtn_variable_mode_ssbo ||
+                vtn_var->mode == vtn_variable_mode_push_constant);
+      }
+   }
+}
+
 /* Tries to compute the size of an interface block based on the strides and
 * offsets that are provided to us in the SPIR-V source.
 */
@@ -1173,7 +1194,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
      case SpvStorageClassPushConstant:
         var->mode = vtn_variable_mode_push_constant;
         assert(b->shader->num_uniforms == 0);
-         b->shader->num_uniforms = vtn_type_block_size(var->type) * 4;
+         b->shader->num_uniforms = vtn_type_block_size(var->type);
         break;
      case SpvStorageClassInput:
         var->mode = vtn_variable_mode_input;
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -242,6 +242,15 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,
            return NULL;
         break;

+      case __DRI_ATTRIB_MAX_PBUFFER_WIDTH:
+         _eglSetConfigKey(&base, EGL_MAX_PBUFFER_WIDTH,
+                          _EGL_MAX_PBUFFER_WIDTH);
+         break;
+      case __DRI_ATTRIB_MAX_PBUFFER_HEIGHT:
+         _eglSetConfigKey(&base, EGL_MAX_PBUFFER_HEIGHT,
+                          _EGL_MAX_PBUFFER_HEIGHT);
+         break;
+
      default:
 	 key = dri2_to_egl_attribute_map[attrib];
 	 if (key != 0)
@@ -320,6 +329,15 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,
      surface_type &= ~EGL_PIXMAP_BIT;
   }

+   /* No support for pbuffer + MSAA for now.
+    *
+    * XXX TODO: pbuffer + MSAA does not work and causes crashes.
+    * See QT bugreport: https://bugreports.qt.io/browse/QTBUG-47509
+    */
+   if (base.Samples) {
+      surface_type &= ~EGL_PBUFFER_BIT;
+   }
+
   conf->base.SurfaceType |= surface_type;

   return conf;
@@ -757,64 +775,99 @@ dri2_create_screen(_EGLDisplay *disp)

 /**
 * Called via eglInitialize(), GLX_drv->API.Initialize().
+ *
+ * This must be guaranteed to be called exactly once, even if eglInitialize is
+ * called many times (without a eglTerminate in between).
 */
 static EGLBoolean
 dri2_initialize(_EGLDriver *drv, _EGLDisplay *disp)
 {
+   EGLBoolean ret = EGL_FALSE;
+   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
+
+   /* In the case where the application calls eglMakeCurrent(context1),
+    * eglTerminate, then eglInitialize again (without a call to eglReleaseThread
+    * or eglMakeCurrent(NULL) before that), dri2_dpy structure is still
+    * initialized, as we need it to be able to free context1 correctly.
+    *
+    * It would probably be safest to forcibly release the display with
+    * dri2_display_release, to make sure the display is reinitialized correctly.
+    * However, the EGL spec states that we need to keep a reference to the
+    * current context (so we cannot call dri2_make_current(NULL)), and therefore
+    * we would leak context1 as we would be missing the old display connection
+    * to free it up correctly.
+    */
+   if (dri2_dpy) {
+      dri2_dpy->ref_count++;
+      return EGL_TRUE;
+   }
+
   /* not until swrast_dri is supported */
   if (disp->Options.UseFallback)
      return EGL_FALSE;

+   /* Nothing to initialize for a test only display */
+   if (disp->Options.TestOnly)
+      return EGL_TRUE;
+
   switch (disp->Platform) {
 #ifdef HAVE_SURFACELESS_PLATFORM
   case _EGL_PLATFORM_SURFACELESS:
-      if (disp->Options.TestOnly)
-         return EGL_TRUE;
-      return dri2_initialize_surfaceless(drv, disp);
+      ret = dri2_initialize_surfaceless(drv, disp);
+      break;
 #endif
-
 #ifdef HAVE_X11_PLATFORM
   case _EGL_PLATFORM_X11:
-      if (disp->Options.TestOnly)
-         return EGL_TRUE;
-      return dri2_initialize_x11(drv, disp);
+      ret = dri2_initialize_x11(drv, disp);
+      break;
 #endif
-
 #ifdef HAVE_DRM_PLATFORM
   case _EGL_PLATFORM_DRM:
-      if (disp->Options.TestOnly)
-         return EGL_TRUE;
-      return dri2_initialize_drm(drv, disp);
+      ret = dri2_initialize_drm(drv, disp);
+      break;
 #endif
 #ifdef HAVE_WAYLAND_PLATFORM
   case _EGL_PLATFORM_WAYLAND:
-      if (disp->Options.TestOnly)
-         return EGL_TRUE;
-      return dri2_initialize_wayland(drv, disp);
+      ret = dri2_initialize_wayland(drv, disp);
+      break;
 #endif
 #ifdef HAVE_ANDROID_PLATFORM
   case _EGL_PLATFORM_ANDROID:
-      if (disp->Options.TestOnly)
-         return EGL_TRUE;
-      return dri2_initialize_android(drv, disp);
+      ret = dri2_initialize_android(drv, disp);
+      break;
 #endif
-
   default:
      _eglLog(_EGL_WARNING, "No EGL platform enabled.");
      return EGL_FALSE;
   }
+
+   if (ret) {
+      dri2_dpy = dri2_egl_display(disp);
+
+      if (!dri2_dpy) {
+         return EGL_FALSE;
+      }
+
+      dri2_dpy->ref_count++;
+   }
+
+   return ret;
 }

 /**
- * Called via eglTerminate(), drv->API.Terminate().
+ * Decrement display reference count, and free up display if necessary.
 */
-static EGLBoolean
-dri2_terminate(_EGLDriver *drv, _EGLDisplay *disp)
-{
+static void
+dri2_display_release(_EGLDisplay *disp) {
   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
   unsigned i;

-   _eglReleaseDisplayResources(drv, disp);
+   assert(dri2_dpy->ref_count > 0);
+   dri2_dpy->ref_count--;
+
+   if (dri2_dpy->ref_count > 0)
+      return;
+
   _eglCleanupDisplay(disp);

   if (dri2_dpy->own_dri_screen)
@@ -869,6 +922,21 @@ dri2_terminate(_EGLDriver *drv, _EGLDisplay *disp)
   }
   free(dri2_dpy);
   disp->DriverData = NULL;
+}
+
+/**
+ * Called via eglTerminate(), drv->API.Terminate().
+ *
+ * This must be guaranteed to be called exactly once, even if eglTerminate is
+ * called many times (without a eglInitialize in between).
+ */
+static EGLBoolean
+dri2_terminate(_EGLDriver *drv, _EGLDisplay *disp)
+{
+   /* Release all non-current Context/Surfaces. */
+   _eglReleaseDisplayResources(drv, disp);
+
+   dri2_display_release(disp);

   return EGL_TRUE;
 }
@@ -1188,10 +1256,16 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf,
   _EGLSurface *tmp_dsurf, *tmp_rsurf;
   __DRIdrawable *ddraw, *rdraw;
   __DRIcontext *cctx;
+   EGLBoolean unbind;
+
+   if (!dri2_dpy)
+      return _eglError(EGL_NOT_INITIALIZED, "eglMakeCurrent");

   /* make new bindings */
-   if (!_eglBindContext(ctx, dsurf, rsurf, &old_ctx, &old_dsurf, &old_rsurf))
+   if (!_eglBindContext(ctx, dsurf, rsurf, &old_ctx, &old_dsurf, &old_rsurf)) {
+      /* _eglBindContext already sets the EGL error (in _eglCheckMakeCurrent) */
      return EGL_FALSE;
+   }

   /* flush before context switch */
   if (old_ctx && dri2_drv->glFlush)
@@ -1206,14 +1280,21 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf,
      dri2_dpy->core->unbindContext(old_cctx);
   }

-   if ((cctx == NULL && ddraw == NULL && rdraw == NULL) ||
-       dri2_dpy->core->bindContext(cctx, ddraw, rdraw)) {
+   unbind = (cctx == NULL && ddraw == NULL && rdraw == NULL);
+
+   if (unbind || dri2_dpy->core->bindContext(cctx, ddraw, rdraw)) {
      if (old_dsurf)
         drv->API.DestroySurface(drv, disp, old_dsurf);
      if (old_rsurf)
         drv->API.DestroySurface(drv, disp, old_rsurf);
-      if (old_ctx)
+
+      if (!unbind)
+         dri2_dpy->ref_count++;
+      if (old_ctx) {
+         EGLDisplay old_disp = _eglGetDisplayHandle(old_ctx->Resource.Display);
         drv->API.DestroyContext(drv, disp, old_ctx);
+         dri2_display_release(old_disp);
+      }

      return EGL_TRUE;
   } else {
@@ -1231,7 +1312,11 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf,
      _eglPutSurface(old_rsurf);
      _eglPutContext(old_ctx);

-      return EGL_FALSE;
+      /* dri2_dpy->core->bindContext failed. We cannot tell for sure why, but
+       * setting the error to EGL_BAD_MATCH is surely better than leaving it
+       * as EGL_SUCCESS.
+       */
+      return _eglError(EGL_BAD_MATCH, "eglMakeCurrent");
   }
 }

--- a/src/egl/drivers/dri2/egl_dri2.h
+++ b/src/egl/drivers/dri2/egl_dri2.h
@@ -80,8 +80,6 @@
 #include "eglimage.h"
 #include "eglsync.h"

-#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
-
 struct wl_buffer;

 struct dri2_egl_driver
@@ -177,6 +175,10 @@ struct dri2_egl_display
   const __DRI2interopExtension *interop;
   int                       fd;

+   /* dri2_initialize/dri2_terminate increment/decrement this count, so does
+    * dri2_make_current (tracks if there are active contexts/surfaces). */
+   int                       ref_count;
+
   int                       own_device;
   int                       swap_available;
   int                       invalidate_available;
--- a/src/egl/drivers/dri2/platform_android.c
+++ b/src/egl/drivers/dri2/platform_android.c
@@ -29,6 +29,7 @@

 #include <errno.h>
 #include <dlfcn.h>
+#include <fcntl.h>
 #include <xf86drm.h>

 #if ANDROID_VERSION >= 0x402
@@ -160,8 +161,16 @@ droid_window_dequeue_buffer(struct dri2_egl_surface *dri2_surf)
 }

 static EGLBoolean
-droid_window_enqueue_buffer(struct dri2_egl_surface *dri2_surf)
+droid_window_enqueue_buffer(_EGLDisplay *disp, struct dri2_egl_surface *dri2_surf)
 {
+   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
+
+   /* To avoid blocking other EGL calls, release the display mutex before
+    * we enter droid_window_enqueue_buffer() and re-acquire the mutex upon
+    * return.
+    */
+   mtx_unlock(&disp->Mutex);
+
 #if ANDROID_VERSION >= 0x0402
   /* Queue the buffer without a sync fence. This informs the ANativeWindow
    * that it may access the buffer immediately.
@@ -185,14 +194,21 @@ droid_window_enqueue_buffer(struct dri2_egl_surface *dri2_surf)
   dri2_surf->buffer->common.decRef(&dri2_surf->buffer->common);
   dri2_surf->buffer = NULL;

+   mtx_lock(&disp->Mutex);
+
+   if (dri2_surf->dri_image) {
+      dri2_dpy->image->destroyImage(dri2_surf->dri_image);
+      dri2_surf->dri_image = NULL;
+   }
+
   return EGL_TRUE;
 }

 static void
-droid_window_cancel_buffer(struct dri2_egl_surface *dri2_surf)
+droid_window_cancel_buffer(_EGLDisplay *disp, struct dri2_egl_surface *dri2_surf)
 {
   /* no cancel buffer? */
-   droid_window_enqueue_buffer(dri2_surf);
+   droid_window_enqueue_buffer(disp, dri2_surf);
 }

 static __DRIbuffer *
@@ -273,6 +289,8 @@ droid_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,

   config = dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT,
                                dri2_surf->base.GLColorspace);
+   if (!config)
+      goto cleanup_surface;

   dri2_surf->dri_drawable =
      (*dri2_dpy->dri2->createNewDrawable)(dri2_dpy->dri_screen, config,
@@ -325,7 +343,7 @@ droid_destroy_surface(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf)

   if (dri2_surf->base.Type == EGL_WINDOW_BIT) {
      if (dri2_surf->buffer)
-         droid_window_cancel_buffer(dri2_surf);
+         droid_window_cancel_buffer(disp, dri2_surf);

      dri2_surf->window->common.decRef(&dri2_surf->window->common);
   }
@@ -366,6 +384,9 @@ get_back_bo(struct dri2_egl_surface *dri2_surf)
   int fourcc, pitch;
   int offset = 0, fd;

+   if (dri2_surf->dri_image)
+	   return 0;
+
   if (!dri2_surf->buffer)
      return -1;

@@ -424,10 +445,8 @@ droid_image_get_buffers(__DRIdrawable *driDrawable,
 static EGLBoolean
 droid_swap_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw)
 {
-   struct dri2_egl_driver *dri2_drv = dri2_egl_driver(drv);
   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
   struct dri2_egl_surface *dri2_surf = dri2_egl_surface(draw);
-   _EGLContext *ctx;

   if (dri2_surf->base.Type != EGL_WINDOW_BIT)
      return EGL_TRUE;
@@ -435,7 +454,7 @@ droid_swap_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw)
   dri2_flush_drawable_for_swapbuffers(disp, draw);

   if (dri2_surf->buffer)
-      droid_window_enqueue_buffer(dri2_surf);
+      droid_window_enqueue_buffer(disp, dri2_surf);

   (*dri2_dpy->flush->invalidate)(dri2_surf->dri_drawable);

@@ -718,7 +737,7 @@ droid_open_device(void)
      fd = -1;
   }

-   return (fd >= 0) ? dup(fd) : -1;
+   return (fd >= 0) ? fcntl(fd, F_DUPFD_CLOEXEC, 3) : -1;
 }

 /* support versions < JellyBean */
@@ -864,6 +883,7 @@ cleanup_device:
   close(dri2_dpy->fd);
 cleanup_display:
   free(dri2_dpy);
+   dpy->DriverData = NULL;

   return _eglError(EGL_NOT_INITIALIZED, err);
 }
--- a/src/egl/drivers/dri2/platform_drm.c
+++ b/src/egl/drivers/dri2/platform_drm.c
@@ -726,5 +726,6 @@ cleanup:
      close(fd);

   free(dri2_dpy);
+   disp->DriverData = NULL;
   return EGL_FALSE;
 }
--- a/src/egl/drivers/dri2/platform_surfaceless.c
+++ b/src/egl/drivers/dri2/platform_surfaceless.c
@@ -157,6 +157,7 @@ cleanup_driver:
   close(dri2_dpy->fd);
 cleanup_display:
   free(dri2_dpy);
+   disp->DriverData = NULL;

   return _eglError(EGL_NOT_INITIALIZED, err);
 }
--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -118,6 +118,13 @@ resize_callback(struct wl_egl_window *wl_win, void *data)
   (*dri2_dpy->flush->invalidate)(dri2_surf->dri_drawable);
 }

+static void
+destroy_window_callback(void *data)
+{
+   struct dri2_egl_surface *dri2_surf = data;
+   dri2_surf->wl_win = NULL;
+}
+
 /**
 * Called via eglCreateWindowSurface(), drv->API.CreateWindowSurface().
 */
@@ -159,6 +166,7 @@ dri2_wl_create_surface(_EGLDriver *drv, _EGLDisplay *disp,

   dri2_surf->wl_win->private = dri2_surf;
   dri2_surf->wl_win->resize_callback = resize_callback;
+   dri2_surf->wl_win->destroy_window_callback = destroy_window_callback;

   dri2_surf->base.Width =  -1;
   dri2_surf->base.Height = -1;
@@ -257,8 +265,11 @@ dri2_wl_destroy_surface(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf)
   if (dri2_surf->throttle_callback)
      wl_callback_destroy(dri2_surf->throttle_callback);

-   dri2_surf->wl_win->private = NULL;
-   dri2_surf->wl_win->resize_callback = NULL;
+   if (dri2_surf->wl_win) {
+      dri2_surf->wl_win->private = NULL;
+      dri2_surf->wl_win->resize_callback = NULL;
+      dri2_surf->wl_win->destroy_window_callback = NULL;
+   }

   free(surf);

@@ -1238,6 +1249,7 @@ dri2_initialize_wayland_drm(_EGLDriver *drv, _EGLDisplay *disp)
   wl_event_queue_destroy(dri2_dpy->wl_queue);
 cleanup_dpy:
   free(dri2_dpy);
+   disp->DriverData = NULL;

   return EGL_FALSE;
 }
@@ -1883,6 +1895,7 @@ dri2_initialize_wayland_swrast(_EGLDriver *drv, _EGLDisplay *disp)
   wl_event_queue_destroy(dri2_dpy->wl_queue);
 cleanup_dpy:
   free(dri2_dpy);
+   disp->DriverData = NULL;

   return EGL_FALSE;
 }
--- a/src/egl/drivers/dri2/platform_x11.c
+++ b/src/egl/drivers/dri2/platform_x11.c
@@ -1231,6 +1231,7 @@ dri2_initialize_x11_swrast(_EGLDriver *drv, _EGLDisplay *disp)
      xcb_disconnect(dri2_dpy->conn);
 cleanup_dpy:
   free(dri2_dpy);
+   disp->DriverData = NULL;

   return EGL_FALSE;
 }
@@ -1302,15 +1303,13 @@ dri2_initialize_x11_dri3(_EGLDriver *drv, _EGLDisplay *disp)
      dri2_dpy->screen = DefaultScreen(dpy);
   }

-   if (xcb_connection_has_error(dri2_dpy->conn)) {
+   if (!dri2_dpy->conn || xcb_connection_has_error(dri2_dpy->conn)) {
      _eglLog(_EGL_WARNING, "DRI3: xcb_connect failed");
      goto cleanup_dpy;
   }

-   if (dri2_dpy->conn) {
-      if (!dri3_x11_connect(dri2_dpy))
-         goto cleanup_conn;
-   }
+   if (!dri3_x11_connect(dri2_dpy))
+      goto cleanup_conn;

   if (!dri2_load_driver_dri3(disp))
      goto cleanup_conn;
@@ -1338,10 +1337,8 @@ dri2_initialize_x11_dri3(_EGLDriver *drv, _EGLDisplay *disp)
   disp->Extensions.WL_bind_wayland_display = EGL_TRUE;
 #endif

-   if (dri2_dpy->conn) {
-      if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp, false))
-         goto cleanup_configs;
-   }
+   if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp, false))
+      goto cleanup_configs;

   dri2_dpy->loader_dri3_ext.core = dri2_dpy->core;
   dri2_dpy->loader_dri3_ext.image_driver = dri2_dpy->image_driver;
@@ -1370,6 +1367,7 @@ dri2_initialize_x11_dri3(_EGLDriver *drv, _EGLDisplay *disp)
      xcb_disconnect(dri2_dpy->conn);
 cleanup_dpy:
   free(dri2_dpy);
+   disp->DriverData = NULL;

   return EGL_FALSE;
 }
@@ -1467,6 +1465,7 @@ dri2_initialize_x11_dri2(_EGLDriver *drv, _EGLDisplay *disp)
      xcb_disconnect(dri2_dpy->conn);
 cleanup_dpy:
   free(dri2_dpy);
+   disp->DriverData = NULL;

   return EGL_FALSE;
 }
--- a/src/egl/drivers/dri2/platform_x11_dri3.c
+++ b/src/egl/drivers/dri2/platform_x11_dri3.c
@@ -103,6 +103,17 @@ egl_dri3_get_dri_context(struct loader_dri3_drawable *draw)
   return dri2_ctx->dri_context;
 }

+static __DRIscreen *
+egl_dri3_get_dri_screen(struct loader_dri3_drawable *draw)
+{
+   _EGLContext *ctx = _eglGetCurrentContext();
+   struct dri2_egl_context *dri2_ctx;
+   if (!ctx)
+      return NULL;
+   dri2_ctx = dri2_egl_context(ctx);
+   return dri2_egl_display(dri2_ctx->base.Resource.Display)->dri_screen;
+}
+
 static void
 egl_dri3_flush_drawable(struct loader_dri3_drawable *draw, unsigned flags)
 {
@@ -119,6 +130,7 @@ static struct loader_dri3_vtable egl_dri3_vtable = {
   .set_drawable_size = egl_dri3_set_drawable_size,
   .in_current_context = egl_dri3_in_current_context,
   .get_dri_context = egl_dri3_get_dri_context,
+   .get_dri_screen = egl_dri3_get_dri_screen,
   .flush_drawable = egl_dri3_flush_drawable,
   .show_fps = NULL,
 };
--- a/src/egl/main/eglapi.c
+++ b/src/egl/main/eglapi.c
@@ -627,7 +627,9 @@ eglCreateContext(EGLDisplay dpy, EGLConfig config, EGLContext share_list,

   _EGL_CHECK_DISPLAY(disp, EGL_NO_CONTEXT, drv);

-   if (!config && !disp->Extensions.MESA_configless_context)
+   if (config)
+      _EGL_CHECK_CONFIG(disp, conf, EGL_NO_CONTEXT, drv);
+   else if (!disp->Extensions.MESA_configless_context)
      RETURN_EGL_ERROR(disp, EGL_BAD_CONFIG, EGL_NO_CONTEXT);

   if (!share && share_list != EGL_NO_CONTEXT)
@@ -1937,7 +1939,7 @@ _eglLockDisplayInterop(EGLDisplay dpy, EGLContext context,
   return MESA_GLINTEROP_SUCCESS;
 }

-int
+PUBLIC int
 MesaGLInteropEGLQueryDeviceInfo(EGLDisplay dpy, EGLContext context,
                                struct mesa_glinterop_device_info *out)
 {
@@ -1959,7 +1961,7 @@ MesaGLInteropEGLQueryDeviceInfo(EGLDisplay dpy, EGLContext context,
   return ret;
 }

-int
+PUBLIC int
 MesaGLInteropEGLExportObject(EGLDisplay dpy, EGLContext context,
                             struct mesa_glinterop_export_in *in,
                             struct mesa_glinterop_export_out *out)
--- a/src/egl/main/egldefines.h
+++ b/src/egl/main/egldefines.h
@@ -34,6 +34,8 @@
 #ifndef EGLDEFINES_INCLUDED
 #define EGLDEFINES_INCLUDED

+#include "util/macros.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -48,7 +50,6 @@ extern "C" {

 #define _EGL_VENDOR_STRING "Mesa Project"

-#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
 #define MIN2(A, B)  (((A) < (B)) ? (A) : (B))

 #ifdef __cplusplus
--- a/src/egl/main/eglglobals.c
+++ b/src/egl/main/eglglobals.c
@@ -53,10 +53,16 @@ struct _egl_global _eglGlobal =
   /* ClientExtensionsString */
   "EGL_EXT_client_extensions"
   " EGL_EXT_platform_base"
+#ifdef HAVE_WAYLAND_PLATFORM
   " EGL_EXT_platform_wayland"
+#endif
+#ifdef HAVE_X11_PLATFORM
   " EGL_EXT_platform_x11"
-   " EGL_KHR_client_get_all_proc_addresses"
+#endif
+#ifdef HAVE_DRM_PLATFORM
   " EGL_MESA_platform_gbm"
+#endif
+   " EGL_KHR_client_get_all_proc_addresses"
 };


--- a/src/egl/main/eglsurface.c
+++ b/src/egl/main/eglsurface.c
@@ -262,9 +262,13 @@ _eglInitSurface(_EGLSurface *surf, _EGLDisplay *dpy, EGLint type,
 {
   const char *func;
   EGLint renderBuffer = EGL_BACK_BUFFER;
-   EGLint swapBehavior = EGL_BUFFER_PRESERVED;
+   EGLint swapBehavior = EGL_BUFFER_DESTROYED;
   EGLint err;

+   /* Swap behavior can be preserved only if config supports this. */
+   if (conf->SurfaceType & EGL_SWAP_BEHAVIOR_PRESERVED_BIT)
+      swapBehavior = EGL_BUFFER_PRESERVED;
+
   switch (type) {
   case EGL_WINDOW_BIT:
      func = "eglCreateWindowSurface";
--- a/src/egl/main/eglsync.c
+++ b/src/egl/main/eglsync.c
@@ -26,6 +26,7 @@
 **************************************************************************/


+#include <inttypes.h>
 #include <string.h>

 #include "eglsync.h"
@@ -75,8 +76,8 @@ _eglParseSyncAttribList64(_EGLSync *sync, const EGLAttrib *attrib_list)
      return EGL_SUCCESS;

   for (i = 0; attrib_list[i] != EGL_NONE; i++) {
-      EGLint attr = attrib_list[i++];
-      EGLint val = attrib_list[i];
+      EGLAttrib attr = attrib_list[i++];
+      EGLAttrib val = attrib_list[i];

      switch (attr) {
      case EGL_CL_EVENT_HANDLE_KHR:
@@ -92,7 +93,7 @@ _eglParseSyncAttribList64(_EGLSync *sync, const EGLAttrib *attrib_list)
      }

      if (err != EGL_SUCCESS) {
-         _eglLog(_EGL_DEBUG, "bad sync attribute 0x%04x", attr);
+         _eglLog(_EGL_DEBUG, "bad sync attribute 0x%" PRIxPTR, attr);
         break;
      }
   }
--- a/src/egl/wayland/wayland-egl/wayland-egl-priv.h
+++ b/src/egl/wayland/wayland-egl/wayland-egl-priv.h
@@ -27,6 +27,7 @@ struct wl_egl_window {

 	void *private;
 	void (*resize_callback)(struct wl_egl_window *, void *);
+	void (*destroy_window_callback)(void *);
 };

 #ifdef  __cplusplus
--- a/src/egl/wayland/wayland-egl/wayland-egl.c
+++ b/src/egl/wayland/wayland-egl/wayland-egl.c
@@ -66,6 +66,7 @@ wl_egl_window_create(struct wl_surface *surface,
 	egl_window->surface = surface;
 	egl_window->private = NULL;
 	egl_window->resize_callback = NULL;
+	egl_window->destroy_window_callback = NULL;
 	wl_egl_window_resize(egl_window, width, height, 0, 0);
 	egl_window->attached_width  = 0;
 	egl_window->attached_height = 0;
@@ -76,6 +77,8 @@ wl_egl_window_create(struct wl_surface *surface,
 WL_EGL_EXPORT void
 wl_egl_window_destroy(struct wl_egl_window *egl_window)
 {
+	if (egl_window->destroy_window_callback)
+		egl_window->destroy_window_callback(egl_window->private);
 	free(egl_window);
 }

--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -310,7 +310,8 @@ C_SOURCES := \
 	util/u_upload_mgr.h \
 	util/u_vbuf.c \
 	util/u_vbuf.h \
-	util/u_video.h
+	util/u_video.h \
+	util/u_viewport.h

 NIR_SOURCES := \
 	nir/tgsi_to_nir.c \
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
@@ -45,6 +45,7 @@ struct pipe_loader_sw_device {
   struct util_dl_library *lib;
 #endif
   struct sw_winsys *ws;
+   int fd;
 };

 #define pipe_loader_sw_device(dev) ((struct pipe_loader_sw_device *)dev)
@@ -92,6 +93,7 @@ pipe_loader_sw_probe_init_common(struct pipe_loader_sw_device *sdev)
   sdev->base.type = PIPE_LOADER_DEVICE_SOFTWARE;
   sdev->base.driver_name = "swrast";
   sdev->base.ops = &pipe_loader_sw_ops;
+   sdev->fd = -1;

 #ifdef GALLIUM_STATIC_TARGETS
   sdev->dd = &driver_descriptors;
@@ -169,6 +171,8 @@ pipe_loader_sw_probe_kms(struct pipe_loader_device **devs, int fd)
   if (!pipe_loader_sw_probe_init_common(sdev))
      goto fail;

+   sdev->fd = fd;
+
   for (i = 0; sdev->dd->winsys[i].name; i++) {
      if (strcmp(sdev->dd->winsys[i].name, "kms_dri") == 0) {
         sdev->ws = sdev->dd->winsys[i].create_winsys(fd);
@@ -273,6 +277,11 @@ pipe_loader_sw_release(struct pipe_loader_device **dev)
      util_dl_close(sdev->lib);
 #endif

+#ifdef HAVE_PIPE_LOADER_KMS
+   if (sdev->fd != -1)
+      close(sdev->fd);
+#endif
+
   FREE(sdev);
   *dev = NULL;
 }
--- a/src/gallium/auxiliary/util/u_box.h
+++ b/src/gallium/auxiliary/util/u_box.h
@@ -140,11 +140,15 @@ static inline void
 u_box_union_2d(struct pipe_box *dst,
               const struct pipe_box *a, const struct pipe_box *b)
 {
-   dst->x = MIN2(a->x, b->x);
-   dst->y = MIN2(a->y, b->y);
+   int x, y;

-   dst->width = MAX2(a->x + a->width, b->x + b->width) - dst->x;
-   dst->height = MAX2(a->y + a->height, b->y + b->height) - dst->y;
+   x = MIN2(a->x, b->x);
+   y = MIN2(a->y, b->y);
+
+   dst->width = MAX2(a->x + a->width, b->x + b->width) - x;
+   dst->height = MAX2(a->y + a->height, b->y + b->height) - y;
+   dst->x = x;
+   dst->y = y;
 }

 /* Aliasing of @dst permitted. */
@@ -152,13 +156,18 @@ static inline void
 u_box_union_3d(struct pipe_box *dst,
               const struct pipe_box *a, const struct pipe_box *b)
 {
-   dst->x = MIN2(a->x, b->x);
-   dst->y = MIN2(a->y, b->y);
-   dst->z = MIN2(a->z, b->z);
+   int x, y, z;

-   dst->width = MAX2(a->x + a->width, b->x + b->width) - dst->x;
-   dst->height = MAX2(a->y + a->height, b->y + b->height) - dst->y;
-   dst->depth = MAX2(a->z + a->depth, b->z + b->depth) - dst->z;
+   x = MIN2(a->x, b->x);
+   y = MIN2(a->y, b->y);
+   z = MIN2(a->z, b->z);
+
+   dst->width = MAX2(a->x + a->width, b->x + b->width) - x;
+   dst->height = MAX2(a->y + a->height, b->y + b->height) - y;
+   dst->depth = MAX2(a->z + a->depth, b->z + b->depth) - z;
+   dst->x = x;
+   dst->y = y;
+   dst->z = z;
 }

 static inline boolean
--- a/src/gallium/auxiliary/util/u_format_r11g11b10f.h
+++ b/src/gallium/auxiliary/util/u_format_r11g11b10f.h
@@ -194,7 +194,7 @@ static inline float uf10_to_f32(uint16_t val)

   if (exponent == 0) {
      if (mantissa != 0) {
-         const float scale = 1.0 / (1 << 20);
+         const float scale = 1.0 / (1 << 19);
         f32.f = scale * mantissa;
      }
   }
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -829,7 +829,7 @@ align(int value, int alignment)
 static inline uint64_t
 align64(uint64_t value, unsigned alignment)
 {
-   return (value + alignment - 1) & ~(alignment - 1);
+   return (value + alignment - 1) & ~((uint64_t)alignment - 1);
 }

 /**
--- a/src/gallium/auxiliary/util/u_viewport.h
+++ b/src/gallium/auxiliary/util/u_viewport.h
@@ -0,0 +1,59 @@
+/**************************************************************************
+ *
+ * Copyright 2016 Ilia Mirkin.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef U_VIEWPORT_H
+#define U_VIEWPORT_H
+
+#include "c99_compat.h"
+#include "pipe/p_state.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline void
+util_viewport_zmin_zmax(const struct pipe_viewport_state *vp, bool halfz,
+                        float *zmin, float *zmax)
+{
+   float a, b;
+   if (halfz) {
+      a = vp->translate[2];
+      b = vp->translate[2] + vp->scale[2];
+   } else {
+      a = vp->translate[2] - vp->scale[2];
+      b = vp->translate[2] + vp->scale[2];
+   }
+
+   *zmin = a < b ? a : b;
+   *zmax = a < b ? b : a;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/src/gallium/auxiliary/vl/vl_winsys_dri3.c
+++ b/src/gallium/auxiliary/vl/vl_winsys_dri3.c
@@ -89,6 +89,7 @@ dri3_free_front_buffer(struct vl_dri3_screen *scrn,
 {
   xcb_sync_destroy_fence(scrn->conn, buffer->sync_fence);
   xshmfence_unmap_shm(buffer->shm_fence);
+   pipe_resource_reference(&buffer->texture, NULL);
   FREE(buffer);
 }

--- a/src/gallium/auxiliary/vl/vl_winsys_drm.c
+++ b/src/gallium/auxiliary/vl/vl_winsys_drm.c
@@ -26,6 +26,7 @@
 **************************************************************************/

 #include <assert.h>
+#include <fcntl.h>

 #include "pipe/p_screen.h"
 #include "pipe-loader/pipe_loader.h"
@@ -47,7 +48,7 @@ vl_drm_screen_create(int fd)
   if (!vscreen)
      return NULL;

-   if (fd < 0 || (new_fd = dup(fd)) < 0)
+   if (fd < 0 || (new_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3)) < 0)
      goto free_screen;

   if (pipe_loader_drm_probe_fd(&vscreen->dev, new_fd))
--- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
+++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
@@ -1472,7 +1472,7 @@ static inline uint32_t A3XX_RB_DEPTH_CONTROL_ZFUNC(enum adreno_compare_func val)
 {
 	return ((val) << A3XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT) & A3XX_RB_DEPTH_CONTROL_ZFUNC__MASK;
 }
-#define A3XX_RB_DEPTH_CONTROL_BF_ENABLE				0x00000080
+#define A3XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE			0x00000080
 #define A3XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE			0x80000000

 #define REG_A3XX_RB_DEPTH_CLEAR					0x00002101
--- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
@@ -158,6 +158,9 @@ fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
 		.sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
 	};

+	if (fd3_needs_manual_clipping(ctx->prog.vp, ctx->rasterizer))
+		emit.key.ucp_enables = ctx->rasterizer->clip_plane_enable;
+
 	fixup_shader_state(ctx, &emit.key);

 	unsigned dirty = ctx->dirty;
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -31,6 +31,7 @@
 #include "util/u_memory.h"
 #include "util/u_helpers.h"
 #include "util/u_format.h"
+#include "util/u_viewport.h"

 #include "freedreno_resource.h"
 #include "freedreno_query_hw.h"
@@ -529,7 +530,7 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 				A3XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1]));
 	}

-	if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) {
+	if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
 		uint32_t val = fd3_zsa_stateobj(ctx->zsa)->rb_depth_control;
 		if (fp->writes_pos) {
 			val |= A3XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z;
@@ -538,6 +539,9 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 		if (fp->has_kill) {
 			val |= A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
 		}
+		if (!ctx->rasterizer->depth_clip) {
+			val |= A3XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE;
+		}
 		OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
 		OUT_RING(ring, val);
 	}
@@ -561,20 +565,24 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 	if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
 		uint32_t val = fd3_rasterizer_stateobj(ctx->rasterizer)
 				->gras_cl_clip_cntl;
+		uint8_t planes = ctx->rasterizer->clip_plane_enable;
 		val |= COND(fp->writes_pos, A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE);
 		val |= COND(fp->frag_coord, A3XX_GRAS_CL_CLIP_CNTL_ZCOORD |
 				A3XX_GRAS_CL_CLIP_CNTL_WCOORD);
-		/* TODO only use if prog doesn't use clipvertex/clipdist */
-		val |= A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES(
-				MIN2(util_bitcount(ctx->rasterizer->clip_plane_enable), 6));
+		if (!emit->key.ucp_enables)
+			val |= A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES(
+					MIN2(util_bitcount(planes), 6));
 		OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
 		OUT_RING(ring, val);
 	}

-	if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_UCP)) {
+	if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG | FD_DIRTY_UCP)) {
 		uint32_t planes = ctx->rasterizer->clip_plane_enable;
 		int count = 0;

+		if (emit->key.ucp_enables)
+			planes = 0;
+
 		while (planes && count < 6) {
 			int i = ffs(planes) - 1;

@@ -615,19 +623,35 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 		OUT_RING(ring, val);
 	}

-	if (dirty & FD_DIRTY_SCISSOR) {
+	if (dirty & (FD_DIRTY_SCISSOR | FD_DIRTY_RASTERIZER | FD_DIRTY_VIEWPORT)) {
 		struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
+		int minx = scissor->minx;
+		int miny = scissor->miny;
+		int maxx = scissor->maxx;
+		int maxy = scissor->maxy;
+
+		/* Unfortunately there is no separate depth clip disable, only an all
+		 * or nothing deal. So when we disable clipping, we must handle the
+		 * viewport clip via scissors.
+		 */
+		if (!ctx->rasterizer->depth_clip) {
+			struct pipe_viewport_state *vp = &ctx->viewport;
+			minx = MAX2(minx, (int)floorf(vp->translate[0] - fabsf(vp->scale[0])));
+			miny = MAX2(miny, (int)floorf(vp->translate[1] - fabsf(vp->scale[1])));
+			maxx = MIN2(maxx, (int)ceilf(vp->translate[0] + fabsf(vp->scale[0])));
+			maxy = MIN2(maxy, (int)ceilf(vp->translate[1] + fabsf(vp->scale[1])));
+		}

 		OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
-		OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(scissor->minx) |
-				A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(scissor->miny));
-		OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(scissor->maxx - 1) |
-				A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(scissor->maxy - 1));
+		OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(minx) |
+				A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(miny));
+		OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(maxx - 1) |
+				A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(maxy - 1));

-		ctx->max_scissor.minx = MIN2(ctx->max_scissor.minx, scissor->minx);
-		ctx->max_scissor.miny = MIN2(ctx->max_scissor.miny, scissor->miny);
-		ctx->max_scissor.maxx = MAX2(ctx->max_scissor.maxx, scissor->maxx);
-		ctx->max_scissor.maxy = MAX2(ctx->max_scissor.maxy, scissor->maxy);
+		ctx->max_scissor.minx = MIN2(ctx->max_scissor.minx, minx);
+		ctx->max_scissor.miny = MIN2(ctx->max_scissor.miny, miny);
+		ctx->max_scissor.maxx = MAX2(ctx->max_scissor.maxx, maxx);
+		ctx->max_scissor.maxy = MAX2(ctx->max_scissor.maxy, maxy);
 	}

 	if (dirty & FD_DIRTY_VIEWPORT) {
@@ -641,6 +665,30 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 		OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(ctx->viewport.scale[2]));
 	}

+	if (dirty & (FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER | FD_DIRTY_FRAMEBUFFER)) {
+		float zmin, zmax;
+		int depth = 24;
+		if (ctx->framebuffer.zsbuf) {
+			depth = util_format_get_component_bits(
+					pipe_surface_format(ctx->framebuffer.zsbuf),
+					UTIL_FORMAT_COLORSPACE_ZS, 0);
+		}
+		util_viewport_zmin_zmax(&ctx->viewport, ctx->rasterizer->clip_halfz,
+								&zmin, &zmax);
+
+		OUT_PKT0(ring, REG_A3XX_RB_Z_CLAMP_MIN, 2);
+		if (depth == 32) {
+			OUT_RING(ring, (uint32_t)(zmin * 0xffffffff));
+			OUT_RING(ring, (uint32_t)(zmax * 0xffffffff));
+		} else if (depth == 16) {
+			OUT_RING(ring, (uint32_t)(zmin * 0xffff));
+			OUT_RING(ring, (uint32_t)(zmax * 0xffff));
+		} else {
+			OUT_RING(ring, (uint32_t)(zmin * 0xffffff));
+			OUT_RING(ring, (uint32_t)(zmax * 0xffffff));
+		}
+	}
+
 	if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER | FD_DIRTY_BLEND_DUAL)) {
 		struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
 		int nr_cbufs = pfb->nr_cbufs;
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
@@ -28,6 +28,7 @@

 #include "pipe/p_state.h"
 #include "util/u_string.h"
+#include "util/u_math.h"
 #include "util/u_memory.h"
 #include "util/u_inlines.h"
 #include "util/u_format.h"
@@ -85,6 +86,20 @@ fd3_vp_state_delete(struct pipe_context *pctx, void *hwcso)
 	delete_shader_stateobj(so);
 }

+bool
+fd3_needs_manual_clipping(const struct fd3_shader_stateobj *so,
+						  const struct pipe_rasterizer_state *rast)
+{
+	uint64_t outputs = ir3_shader_outputs(so->shader);
+
+	return (!rast->depth_clip ||
+			util_bitcount(rast->clip_plane_enable) > 6 ||
+			outputs & ((1ULL << VARYING_SLOT_CLIP_VERTEX) |
+					   (1ULL << VARYING_SLOT_CLIP_DIST0) |
+					   (1ULL << VARYING_SLOT_CLIP_DIST1)));
+}
+
+
 static void
 emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so)
 {
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.h
@@ -44,4 +44,7 @@ void fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,

 void fd3_prog_init(struct pipe_context *pctx);

+bool fd3_needs_manual_clipping(const struct fd3_shader_stateobj *,
+							   const struct pipe_rasterizer_state *);
+
 #endif /* FD3_PROGRAM_H_ */
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -1376,7 +1376,7 @@ static inline uint32_t A4XX_RB_DEPTH_CONTROL_ZFUNC(enum adreno_compare_func val)
 {
 	return ((val) << A4XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT) & A4XX_RB_DEPTH_CONTROL_ZFUNC__MASK;
 }
-#define A4XX_RB_DEPTH_CONTROL_BF_ENABLE				0x00000080
+#define A4XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE			0x00000080
 #define A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE			0x00010000
 #define A4XX_RB_DEPTH_CONTROL_FORCE_FRAGZ_TO_FS			0x00020000
 #define A4XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE			0x80000000
@@ -3145,6 +3145,8 @@ static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_GS(uint32_t val)

 #define REG_A4XX_GRAS_CL_CLIP_CNTL				0x00002000
 #define A4XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE			0x00008000
+#define A4XX_GRAS_CL_CLIP_CNTL_ZNEAR_CLIP_DISABLE		0x00010000
+#define A4XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE		0x00020000
 #define A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z			0x00400000

 #define REG_A4XX_GRAS_CLEAR_CNTL				0x00002003
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -31,6 +31,7 @@
 #include "util/u_memory.h"
 #include "util/u_helpers.h"
 #include "util/u_format.h"
+#include "util/u_viewport.h"

 #include "freedreno_resource.h"
 #include "freedreno_query_hw.h"
@@ -544,12 +545,14 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 				A4XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1]));
 	}

-	if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) {
+	if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
 		struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
 		bool fragz = fp->has_kill | fp->writes_pos;
+		bool clamp = !ctx->rasterizer->depth_clip;

 		OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
 		OUT_RING(ring, zsa->rb_depth_control |
+				COND(clamp, A4XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE) |
 				COND(fragz, A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE) |
 				COND(fragz && fp->frag_coord, A4XX_RB_DEPTH_CONTROL_FORCE_FRAGZ_TO_FS));

@@ -636,6 +639,30 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 		OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2]));
 	}

+	if (dirty & (FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER | FD_DIRTY_FRAMEBUFFER)) {
+		float zmin, zmax;
+		int depth = 24;
+		if (ctx->framebuffer.zsbuf) {
+			depth = util_format_get_component_bits(
+					pipe_surface_format(ctx->framebuffer.zsbuf),
+					UTIL_FORMAT_COLORSPACE_ZS, 0);
+		}
+		util_viewport_zmin_zmax(&ctx->viewport, ctx->rasterizer->clip_halfz,
+								&zmin, &zmax);
+
+		OUT_PKT0(ring, REG_A4XX_RB_VPORT_Z_CLAMP(0), 2);
+		if (depth == 32) {
+			OUT_RING(ring, fui(zmin));
+			OUT_RING(ring, fui(zmax));
+		} else if (depth == 16) {
+			OUT_RING(ring, (uint32_t)(zmin * 0xffff));
+			OUT_RING(ring, (uint32_t)(zmax * 0xffff));
+		} else {
+			OUT_RING(ring, (uint32_t)(zmin * 0xffffff));
+			OUT_RING(ring, (uint32_t)(zmax * 0xffffff));
+		}
+	}
+
 	if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER)) {
 		struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
 		unsigned n = pfb->nr_cbufs;
--- a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c
@@ -98,7 +98,8 @@ fd4_rasterizer_state_create(struct pipe_context *pctx,
 		so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET;

 	if (!cso->depth_clip)
-		so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE;
+		so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_ZNEAR_CLIP_DISABLE |
+			A4XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE;
 	if (cso->clip_halfz)
 		so->gras_cl_clip_cntl |= A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z;

--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
@@ -469,6 +469,12 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin)
 	debug_printf("\n");
 }

+uint64_t
+ir3_shader_outputs(const struct ir3_shader *so)
+{
+	return so->nir->info.outputs_written;
+}
+
 /* This has to reach into the fd_context a bit more than the rest of
 * ir3, but it needs to be aligned with the compiler, so both agree
 * on which const regs hold what.  And the logic is identical between
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
@@ -272,6 +272,7 @@ void ir3_shader_destroy(struct ir3_shader *shader);
 struct ir3_shader_variant * ir3_shader_variant(struct ir3_shader *shader,
 		struct ir3_shader_key key, struct pipe_debug_callback *debug);
 void ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin);
+uint64_t ir3_shader_outputs(const struct ir3_shader *so);

 struct fd_ringbuffer;
 struct fd_context;
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -726,7 +726,7 @@ void
 CodeEmitterGK110::emitIMAD(const Instruction *i)
 {
   uint8_t addOp =
-      (i->src(2).mod.neg() << 1) | (i->src(0).mod.neg() ^ i->src(1).mod.neg());
+      i->src(2).mod.neg() | ((i->src(0).mod.neg() ^ i->src(1).mod.neg()) << 1);

   emitForm_21(i, 0x100, 0xa00);

@@ -773,7 +773,7 @@ CodeEmitterGK110::emitNOT(const Instruction *i)
      break;
   case FILE_MEMORY_CONST:
      code[1] |= 0x4 << 28;
-      setCAddress14(i->src(1));
+      setCAddress14(i->src(0));
      break;
   default:
      assert(0);
@@ -1321,15 +1321,12 @@ void
 CodeEmitterGK110::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
 {
   code[0] = 0x00000002 | ((qOp & 1) << 31);
-   code[1] = 0x7fc00000 | (qOp >> 1) | (laneMask << 12);
+   code[1] = 0x7fc00200 | (qOp >> 1) | (laneMask << 12); // dall

   defId(i->def(0), 2);
   srcId(i->src(0), 10);
   srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 23);

-   if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT)
-      code[1] |= 1 << 9; // dall
-
   emitPredicate(i);
 }

--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -1682,7 +1682,7 @@ CodeEmitterGM107::emitNOT()
 void
 CodeEmitterGM107::emitIADD()
 {
-   if (!longIMMD(insn->src(1))) {
+   if (insn->src(1).getFile() != FILE_IMMEDIATE) {
      switch (insn->src(1).getFile()) {
      case FILE_GPR:
         emitInsn(0x5c100000);
@@ -1707,6 +1707,7 @@ CodeEmitterGM107::emitIADD()
      emitX  (0x2b);
   } else {
      emitInsn(0x1c000000);
+      emitNEG (0x38, insn->src(0));
      emitSAT (0x36);
      emitX   (0x35);
      emitCC  (0x34);
@@ -2300,6 +2301,7 @@ CodeEmitterGM107::emitAL2P()
 {
   emitInsn (0xefa00000);
   emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
+   emitPRED (0x2c);
   emitO    (0x20);
   emitField(0x14, 11, insn->src(0).get()->reg.data.offset);
   emitGPR  (0x08, insn->src(0).getIndirect(0));
@@ -2523,7 +2525,7 @@ CodeEmitterGM107::emitTEX()

   if (insn->tex.rIndirectSrc >= 0) {
      emitInsn (0xdeb80000);
-      emitField(0x35, 2, lodm);
+      emitField(0x25, 2, lodm);
      emitField(0x24, 1, insn->tex.useOffsets == 1);
   } else {
      emitInsn (0xc0380000);
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
@@ -2112,7 +2112,7 @@ makeInstructionLong(Instruction *insn)
   insn->encSize = 8;

   for (int i = fn->bbCount - 1; i >= 0 && fn->bbArray[i] != insn->bb; --i) {
-      fn->bbArray[i]->binPos += 4;
+      fn->bbArray[i]->binPos += adj;
   }
   fn->binSize += adj;
   insn->bb->binSize += adj;
@@ -2164,9 +2164,16 @@ replaceExitWithModifier(Function *func)
            return;
      }
   }
-   epilogue->binSize -= 8;
-   func->binSize -= 8;
+
+   int adj = epilogue->getExit()->encSize;
+   epilogue->binSize -= adj;
+   func->binSize -= adj;
   delete_Instruction(func->getProgram(), epilogue->getExit());
+
+   // There may be BB's that are laid out after the exit block
+   for (int i = func->bbCount - 1; i >= 0 && func->bbArray[i] != epilogue; --i) {
+      func->bbArray[i]->binPos -= adj;
+   }
 }

 void
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -736,9 +736,15 @@ CodeEmitterNVC0::emitUADD(const Instruction *i)
 void
 CodeEmitterNVC0::emitIMAD(const Instruction *i)
 {
+   uint8_t addOp =
+      i->src(2).mod.neg() | ((i->src(0).mod.neg() ^ i->src(1).mod.neg()) << 1);
+
   assert(i->encSize == 8);
   emitForm_A(i, HEX64(20000000, 00000003));

+   assert(addOp != 3);
+   code[0] |= addOp << 8;
+
   if (isSignedType(i->dType))
      code[0] |= 1 << 7;
   if (isSignedType(i->sType))
@@ -749,10 +755,6 @@ CodeEmitterNVC0::emitIMAD(const Instruction *i)
   if (i->flagsDef >= 0) code[1] |= 1 << 16;
   if (i->flagsSrc >= 0) code[1] |= 1 << 23;

-   if (i->src(2).mod.neg()) code[0] |= 0x10;
-   if (i->src(1).mod.neg() ^
-       i->src(0).mod.neg()) code[0] |= 0x20;
-
   if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
      code[0] |= 1 << 6;
 }
@@ -1356,16 +1358,13 @@ CodeEmitterNVC0::emitTXQ(const TexInstruction *i)
 void
 CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask)
 {
-   code[0] = 0x00000000 | (laneMask << 6);
+   code[0] = 0x00000200 | (laneMask << 6); // dall
   code[1] = 0x48000000 | qOp;

   defId(i->def(0), 14);
   srcId(i->src(0), 20);
   srcId((i->srcExists(1) && i->predSrc != 1) ? i->src(1) : i->src(0), 26);

-   if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT)
-      code[0] |= 1 << 9; // dall
-
   emitPredicate(i);
 }

--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -182,6 +182,7 @@ public:

   // mask of used components of source s
   unsigned int srcMask(unsigned int s) const;
+   unsigned int texOffsetMask() const;

   SrcRegister getSrc(unsigned int s) const
   {
@@ -234,6 +235,35 @@ private:
   const struct tgsi_full_instruction *insn;
 };

+unsigned int Instruction::texOffsetMask() const
+{
+   const struct tgsi_instruction_texture *tex = &insn->Texture;
+   assert(insn->Instruction.Texture);
+
+   switch (tex->Texture) {
+   case TGSI_TEXTURE_BUFFER:
+   case TGSI_TEXTURE_1D:
+   case TGSI_TEXTURE_SHADOW1D:
+   case TGSI_TEXTURE_1D_ARRAY:
+   case TGSI_TEXTURE_SHADOW1D_ARRAY:
+      return 0x1;
+   case TGSI_TEXTURE_2D:
+   case TGSI_TEXTURE_SHADOW2D:
+   case TGSI_TEXTURE_2D_ARRAY:
+   case TGSI_TEXTURE_SHADOW2D_ARRAY:
+   case TGSI_TEXTURE_RECT:
+   case TGSI_TEXTURE_SHADOWRECT:
+   case TGSI_TEXTURE_2D_MSAA:
+   case TGSI_TEXTURE_2D_ARRAY_MSAA:
+      return 0x3;
+   case TGSI_TEXTURE_3D:
+      return 0x7;
+   default:
+      assert(!"Unexpected texture target");
+      return 0xf;
+   }
+}
+
 unsigned int Instruction::srcMask(unsigned int s) const
 {
   unsigned int mask = insn->Dst[0].Register.WriteMask;
@@ -942,6 +972,9 @@ private:
   int inferSysValDirection(unsigned sn) const;
   bool scanDeclaration(const struct tgsi_full_declaration *);
   bool scanInstruction(const struct tgsi_full_instruction *);
+   void scanInstructionSrc(const Instruction& insn,
+                           const Instruction::SrcRegister& src,
+                           unsigned mask);
   void scanProperty(const struct tgsi_full_property *);
   void scanImmediate(const struct tgsi_full_immediate *);

@@ -1351,6 +1384,61 @@ inline bool Source::isEdgeFlagPassthrough(const Instruction& insn) const
      insn.getSrc(0).getFile() == TGSI_FILE_INPUT;
 }

+void Source::scanInstructionSrc(const Instruction& insn,
+                                const Instruction::SrcRegister& src,
+                                unsigned mask)
+{
+   if (src.getFile() == TGSI_FILE_TEMPORARY) {
+      if (src.isIndirect(0))
+         indirectTempArrays.insert(src.getArrayId());
+   } else
+   if (src.getFile() == TGSI_FILE_BUFFER ||
+       src.getFile() == TGSI_FILE_IMAGE ||
+       (src.getFile() == TGSI_FILE_MEMORY &&
+        memoryFiles[src.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) {
+      info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
+         0x1 : 0x2;
+   } else
+   if (src.getFile() == TGSI_FILE_OUTPUT) {
+      if (src.isIndirect(0)) {
+         // We don't know which one is accessed, just mark everything for
+         // reading. This is an extremely unlikely occurrence.
+         for (unsigned i = 0; i < info->numOutputs; ++i)
+            info->out[i].oread = 1;
+      } else {
+         info->out[src.getIndex(0)].oread = 1;
+      }
+   }
+   if (src.getFile() != TGSI_FILE_INPUT)
+      return;
+
+   if (src.isIndirect(0)) {
+      for (unsigned i = 0; i < info->numInputs; ++i)
+         info->in[i].mask = 0xf;
+   } else {
+      const int i = src.getIndex(0);
+      for (unsigned c = 0; c < 4; ++c) {
+         if (!(mask & (1 << c)))
+            continue;
+         int k = src.getSwizzle(c);
+         if (k <= TGSI_SWIZZLE_W)
+            info->in[i].mask |= 1 << k;
+      }
+      switch (info->in[i].sn) {
+      case TGSI_SEMANTIC_PSIZE:
+      case TGSI_SEMANTIC_PRIMID:
+      case TGSI_SEMANTIC_FOG:
+         info->in[i].mask &= 0x1;
+         break;
+      case TGSI_SEMANTIC_PCOORD:
+         info->in[i].mask &= 0x3;
+         break;
+      default:
+         break;
+      }
+   }
+}
+
 bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
 {
   Instruction insn(inst);
@@ -1383,66 +1471,19 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
            indirectTempArrays.insert(dst.getArrayId());
      } else
      if (dst.getFile() == TGSI_FILE_BUFFER ||
-          dst.getFile() == TGSI_FILE_IMAGE || 
+          dst.getFile() == TGSI_FILE_IMAGE ||
          (dst.getFile() == TGSI_FILE_MEMORY &&
           memoryFiles[dst.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) {
         info->io.globalAccess |= 0x2;
      }
   }

-   for (unsigned s = 0; s < insn.srcCount(); ++s) {
-      Instruction::SrcRegister src = insn.getSrc(s);
-      if (src.getFile() == TGSI_FILE_TEMPORARY) {
-         if (src.isIndirect(0))
-            indirectTempArrays.insert(src.getArrayId());
-      } else
-      if (src.getFile() == TGSI_FILE_BUFFER ||
-          src.getFile() == TGSI_FILE_IMAGE ||
-          (src.getFile() == TGSI_FILE_MEMORY &&
-           memoryFiles[src.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) {
-         info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
-               0x1 : 0x2;
-      } else
-      if (src.getFile() == TGSI_FILE_OUTPUT) {
-         if (src.isIndirect(0)) {
-            // We don't know which one is accessed, just mark everything for
-            // reading. This is an extremely unlikely occurrence.
-            for (unsigned i = 0; i < info->numOutputs; ++i)
-               info->out[i].oread = 1;
-         } else {
-            info->out[src.getIndex(0)].oread = 1;
-         }
-      }
-      if (src.getFile() != TGSI_FILE_INPUT)
-         continue;
-      unsigned mask = insn.srcMask(s);
+   for (unsigned s = 0; s < insn.srcCount(); ++s)
+      scanInstructionSrc(insn, insn.getSrc(s), insn.srcMask(s));
+
+   for (unsigned s = 0; s < insn.getNumTexOffsets(); ++s)
+      scanInstructionSrc(insn, insn.getTexOffset(s), insn.texOffsetMask());

-      if (src.isIndirect(0)) {
-         for (unsigned i = 0; i < info->numInputs; ++i)
-            info->in[i].mask = 0xf;
-      } else {
-         const int i = src.getIndex(0);
-         for (unsigned c = 0; c < 4; ++c) {
-            if (!(mask & (1 << c)))
-               continue;
-            int k = src.getSwizzle(c);
-            if (k <= TGSI_SWIZZLE_W)
-               info->in[i].mask |= 1 << k;
-         }
-         switch (info->in[i].sn) {
-         case TGSI_SEMANTIC_PSIZE:
-         case TGSI_SEMANTIC_PRIMID:
-         case TGSI_SEMANTIC_FOG:
-            info->in[i].mask &= 0x1;
-            break;
-         case TGSI_SEMANTIC_PCOORD:
-            info->in[i].mask &= 0x3;
-            break;
-         default:
-            break;
-         }
-      }
-   }
   return true;
 }

--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_graph.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_graph.cpp
@@ -287,7 +287,10 @@ private:

      bb.push(node);

-      while (bb.getSize()) {
+      while (bb.getSize() || cross.getSize()) {
+         if (bb.getSize() == 0)
+            cross.moveTo(bb);
+
         node = reinterpret_cast<Graph::Node *>(bb.pop().u.p);
         assert(node);
         if (!node->visit(sequence))
@@ -314,9 +317,6 @@ private:
            }
         }
         nodes[count++] = node;
-
-         if (bb.getSize() == 0)
-            cross.moveTo(bb);
      }
   }

--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -750,6 +750,16 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
         i->tex.rIndirectSrc = 0;
         i->tex.sIndirectSrc = -1;
      }
+      // Move the indirect reference to right after the coords
+      else if (i->tex.rIndirectSrc >= 0 && chipset >= NVISA_GM107_CHIPSET) {
+         Value *hnd = i->getIndirectR();
+
+         i->setIndirectR(NULL);
+         i->moveSources(arg, 1);
+         i->setSrc(arg, hnd);
+         i->tex.rIndirectSrc = 0;
+         i->tex.sIndirectSrc = -1;
+      }
   } else
   // (nvc0) generate and move the tsc/tic/array source to the front
   if (i->tex.target.isArray() || i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) {
@@ -823,7 +833,7 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
         for (n = 0; n < i->tex.useOffsets; n++) {
            for (c = 0; c < 2; ++c) {
               if ((n % 2) == 0 && c == 0)
-                  offs[n / 2] = i->offset[n][c].get();
+                  bld.mkMov(offs[n / 2] = bld.getScratch(), i->offset[n][c].get());
               else
                  bld.mkOp3(OP_INSBF, TYPE_U32,
                            offs[n / 2],
@@ -2056,6 +2066,13 @@ NVC0LoweringPass::processSurfaceCoordsNVC0(TexInstruction *su)
      base = 0;
   }

+   if (ind) {
+      Value *ptr;
+      ptr = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), ind, bld.mkImm(su->tex.r));
+      ptr = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(7));
+      su->setIndirectR(ptr);
+   }
+
   // get surface coordinates
   for (c = 0; c < arg; ++c)
      src[c] = su->getSrc(c);
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -1902,8 +1902,10 @@ GCRA::resolveSplitsAndMerges()
         // their registers should be identical.
         if (v->getInsn()->op == OP_PHI || v->getInsn()->op == OP_UNION) {
            Instruction *phi = v->getInsn();
-            for (int phis = 0; phi->srcExists(phis); ++phis)
+            for (int phis = 0; phi->srcExists(phis); ++phis) {
               phi->getSrc(phis)->join = v;
+               phi->getSrc(phis)->reg.data.id = v->reg.data.id;
+            }
         }
         reg += v->reg.size;
      }
--- a/src/gallium/drivers/nouveau/nv30/nv30_state.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_state.c
@@ -379,8 +379,9 @@ nv30_set_framebuffer_state(struct pipe_context *pipe,
       struct nv30_miptree *zeta_mt = nv30_miptree(fb->zsbuf->texture);

       if (color_mt->swizzled != zeta_mt->swizzled ||
-           (util_format_get_blocksize(fb->zsbuf->format) > 2) !=
-           (util_format_get_blocksize(fb->cbufs[0]->format) > 2)) {
+           (color_mt->swizzled &&
+            (util_format_get_blocksize(fb->zsbuf->format) > 2) !=
+            (util_format_get_blocksize(fb->cbufs[0]->format) > 2))) {
          nv30->framebuffer.zsbuf = NULL;
          debug_printf("Mismatched color and zeta formats, ignoring zeta.\n");
       }
--- a/src/gallium/drivers/nouveau/nv30/nv30_transfer.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_transfer.c
@@ -115,7 +115,8 @@ nv30_transfer_rect_fragprog(struct nv30_context *nv30)
   struct pipe_context *pipe = &nv30->base.pipe;

   if (!fp) {
-      nv30->blit_fp = pipe_buffer_create(pipe->screen, 0, 0, 12 * 4);
+      nv30->blit_fp =
+         pipe_buffer_create(pipe->screen, 0, PIPE_USAGE_STAGING, 12 * 4);
      if (nv30->blit_fp) {
         struct pipe_transfer *transfer;
         u32 *map = pipe_buffer_map(pipe, nv30->blit_fp,
--- a/src/gallium/drivers/nouveau/nv50/nv50_formats.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_formats.c
@@ -161,7 +161,7 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
   F3(A, R11G11B10_FLOAT, R11G11B10_FLOAT, R, G, B, xx, FLOAT, BF10GF11RF11, IB),

   F3(A, L8_UNORM, R8_UNORM, R, R, R, xx, UNORM, R8, TB),
-   F3(A, L8_SRGB, R8_UNORM, R, R, R, xx, UNORM, R8, TB),
+   F3(A, L8_SRGB, NONE, R, R, R, xx, UNORM, R8, T),
   F3(A, L8_SNORM, R8_SNORM, R, R, R, xx, SNORM, R8, TC),
   I3(A, L8_SINT, R8_SINT, R, R, R, xx, SINT, R8, TR),
   I3(A, L8_UINT, R8_UINT, R, R, R, xx, UINT, R8, TR),
@@ -203,7 +203,7 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
   C4(A, L4A4_UNORM, NONE, R, R, R, G, UNORM, G4R4, T),
   C4(A, L8A8_UNORM, RG8_UNORM, R, R, R, G, UNORM, G8R8, T),
   C4(A, L8A8_SNORM, RG8_SNORM, R, R, R, G, SNORM, G8R8, T),
-   C4(A, L8A8_SRGB, RG8_UNORM, R, R, R, G, UNORM, G8R8, T),
+   C4(A, L8A8_SRGB, NONE, R, R, R, G, UNORM, G8R8, T),
   C4(A, L8A8_SINT, RG8_SINT, R, R, R, G, SINT, G8R8, T),
   C4(A, L8A8_UINT, RG8_UINT, R, R, R, G, UINT, G8R8, T),
   C4(A, L16A16_UNORM, RG16_UNORM, R, R, R, G, UNORM, R16_G16, T),
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -307,6 +307,9 @@ nv50_program_create_strmout_state(const struct nv50_ir_prog_info *info,
      const unsigned r = pso->output[i].register_index;
      b = pso->output[i].output_buffer;

+      if (r >= info->numOutputs)
+         continue;
+
      for (c = 0; c < pso->output[i].num_components; ++c)
         so->map[base[b] + p + c] = info->out[r].slot[s + c];
   }
--- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
@@ -1,5 +1,6 @@

 #include "util/u_format.h"
+#include "util/u_viewport.h"

 #include "nv50/nv50_context.h"

@@ -265,8 +266,12 @@ nv50_validate_viewport(struct nv50_context *nv50)
      PUSH_DATAf(push, vpt->scale[1]);
      PUSH_DATAf(push, vpt->scale[2]);

-      zmin = vpt->translate[2] - fabsf(vpt->scale[2]);
-      zmax = vpt->translate[2] + fabsf(vpt->scale[2]);
+      /* If the halfz setting ever changes, the viewports will also get
+       * updated. The rast will get updated before the validate function has a
+       * chance to hit, so we can just use it directly without an atom
+       * dependency.
+       */
+      util_viewport_zmin_zmax(vpt, nv50->rast->pipe.clip_halfz, &zmin, &zmax);

 #ifdef NV50_SCISSORS_CLIPPING
      BEGIN_NV04(push, NV50_3D(DEPTH_RANGE_NEAR(i)), 2);
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
@@ -499,10 +499,8 @@ nvc0_bufctx_fence(struct nvc0_context *nvc0, struct nouveau_bufctx *bufctx,
   NOUVEAU_DRV_STAT(&nvc0->screen->base, resource_validate_count, count);
 }

-static void
-nvc0_context_get_sample_position(struct pipe_context *pipe,
-                                 unsigned sample_count, unsigned sample_index,
-                                 float *xy)
+const void *
+nvc0_get_sample_locations(unsigned sample_count)
 {
   static const uint8_t ms1[1][2] = { { 0x8, 0x8 } };
   static const uint8_t ms2[2][2] = {
@@ -534,8 +532,22 @@ nvc0_context_get_sample_position(struct pipe_context *pipe,
   case 8: ptr = ms8; break;
   default:
      assert(0);
-      return; /* bad sample count -> undefined locations */
+      return NULL; /* bad sample count -> undefined locations */
   }
+   return ptr;
+}
+
+static void
+nvc0_context_get_sample_position(struct pipe_context *pipe,
+                                 unsigned sample_count, unsigned sample_index,
+                                 float *xy)
+{
+   const uint8_t (*ptr)[2];
+
+   ptr = nvc0_get_sample_locations(sample_count);
+   if (!ptr)
+      return;
+
   xy[0] = ptr[sample_index][0] * 0.0625f;
   xy[1] = ptr[sample_index][1] * 0.0625f;
 }
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -278,6 +278,7 @@ struct pipe_context *nvc0_create(struct pipe_screen *, void *, unsigned flags);
 void nvc0_bufctx_fence(struct nvc0_context *, struct nouveau_bufctx *,
                       bool on_flush);
 void nvc0_default_kick_notify(struct nouveau_pushbuf *);
+const void *nvc0_get_sample_locations(unsigned);

 /* nvc0_draw.c */
 extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *);
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -500,11 +500,14 @@ nvc0_program_create_tfb_state(const struct nv50_ir_prog_info *info,
   for (i = 0; i < pso->num_outputs; ++i) {
      unsigned s = pso->output[i].start_component;
      unsigned p = pso->output[i].dst_offset;
+      const unsigned r = pso->output[i].register_index;
      b = pso->output[i].output_buffer;

+      if (r >= info->numOutputs)
+         continue;
+
      for (c = 0; c < pso->output[i].num_components; ++c)
-         tfb->varying_index[b][p++] =
-            info->out[pso->output[i].register_index].slot[s + c];
+         tfb->varying_index[b][p++] = info->out[r].slot[s + c];

      tfb->varying_count[b] = MAX2(tfb->varying_count[b], p);
      tfb->stream[b] = pso->output[i].stream;
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -2,6 +2,7 @@
 #include "util/u_format.h"
 #include "util/u_framebuffer.h"
 #include "util/u_math.h"
+#include "util/u_viewport.h"

 #include "nvc0/nvc0_context.h"

@@ -211,6 +212,19 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
       PUSH_DATAf(push, xy[1]);
    }

+   if (screen->base.class_3d >= GM200_3D_CLASS) {
+      const uint8_t (*ptr)[2] = nvc0_get_sample_locations(ms);
+      uint32_t val[4] = {};
+
+      for (i = 0; i < 16; i++) {
+         val[i / 4] |= ptr[i % ms][0] << (((i % 4) * 8) + 0);
+         val[i / 4] |= ptr[i % ms][1] << (((i % 4) * 8) + 4);
+      }
+
+      BEGIN_NVC0(push, SUBC_3D(0x11e0), 4);
+      PUSH_DATAp(push, val, 4);
+   }
+
    if (serialize)
       IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);

@@ -316,8 +330,12 @@ nvc0_validate_viewport(struct nvc0_context *nvc0)
      PUSH_DATA (push, (w << 16) | x);
      PUSH_DATA (push, (h << 16) | y);

-      zmin = vp->translate[2] - fabsf(vp->scale[2]);
-      zmax = vp->translate[2] + fabsf(vp->scale[2]);
+      /* If the halfz setting ever changes, the viewports will also get
+       * updated. The rast will get updated before the validate function has a
+       * chance to hit, so we can just use it directly without an atom
+       * dependency.
+       */
+      util_viewport_zmin_zmax(vp, nvc0->rast->pipe.clip_halfz, &zmin, &zmax);

      BEGIN_NVC0(push, NVC0_3D(DEPTH_RANGE_NEAR(i)), 2);
      PUSH_DATAf(push, zmin);
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
@@ -589,13 +589,11 @@ void nvc0_validate_textures(struct nvc0_context *nvc0)
      PUSH_DATA (nvc0->base.pushbuf, 0);
   }

-   if (nvc0->screen->base.class_3d < NVE4_3D_CLASS) {
-      /* Invalidate all CP textures because they are aliased. */
-      for (int i = 0; i < nvc0->num_textures[5]; i++)
-         nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_CP_TEX(i));
-      nvc0->textures_dirty[5] = ~0;
-      nvc0->dirty_cp |= NVC0_NEW_CP_TEXTURES;
-   }
+   /* Invalidate all CP textures because they are aliased. */
+   for (int i = 0; i < nvc0->num_textures[5]; i++)
+      nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_TEX(i));
+   nvc0->textures_dirty[5] = ~0;
+   nvc0->dirty_cp |= NVC0_NEW_CP_TEXTURES;
 }

 bool
@@ -709,11 +707,9 @@ void nvc0_validate_samplers(struct nvc0_context *nvc0)
      PUSH_DATA (nvc0->base.pushbuf, 0);
   }

-   if (nvc0->screen->base.class_3d < NVE4_3D_CLASS) {
-      /* Invalidate all CP samplers because they are aliased. */
-      nvc0->samplers_dirty[5] = ~0;
-      nvc0->dirty_cp |= NVC0_NEW_CP_SAMPLERS;
-   }
+   /* Invalidate all CP samplers because they are aliased. */
+   nvc0->samplers_dirty[5] = ~0;
+   nvc0->dirty_cp |= NVC0_NEW_CP_SAMPLERS;
 }

 /* Upload the "diagonal" entries for the possible texture sources ($t == $s).
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
@@ -835,7 +835,7 @@ nvc0_draw_indirect(struct nvc0_context *nvc0, const struct pipe_draw_info *info)

   /* Queue things up to let the macros write params to the driver constbuf */
   BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
-   PUSH_DATA (push, 512);
+   PUSH_DATA (push, 2048);
   PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0));
   PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0));
   BEGIN_NVC0(push, NVC0_3D(CB_POS), 1);
@@ -979,7 +979,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
   if (nvc0->vertprog->vp.need_draw_parameters) {
      PUSH_SPACE(push, 9);
      BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
-      PUSH_DATA (push, 512);
+      PUSH_DATA (push, 2048);
      PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0));
      PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(0));
      if (!info->indirect) {
--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
@@ -237,7 +237,13 @@ nve4_compute_validate_samplers(struct nvc0_context *nvc0)
      BEGIN_NVC0(nvc0->base.pushbuf, NVE4_CP(TSC_FLUSH), 1);
      PUSH_DATA (nvc0->base.pushbuf, 0);
   }
+
+   /* Invalidate all 3D samplers because they are aliased. */
+   for (int s = 0; s < 5; s++)
+      nvc0->samplers_dirty[s] = ~0;
+   nvc0->dirty_3d |= NVC0_NEW_3D_SAMPLERS;
 }
+
 /* (Code duplicated at bottom for various non-convincing reasons.
 *  E.g. we might want to use the COMPUTE subchannel to upload TIC/TSC
 *  entries to avoid a subchannel switch.
@@ -686,6 +692,14 @@ nve4_compute_validate_textures(struct nvc0_context *nvc0)
   }

   nvc0->state.num_textures[s] = nvc0->num_textures[s];
+
+   /* Invalidate all 3D textures because they are aliased. */
+   for (int s = 0; s < 5; s++) {
+      for (int i = 0; i < nvc0->num_textures[s]; i++)
+         nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TEX(s, i));
+      nvc0->textures_dirty[s] = ~0;
+   }
+   nvc0->dirty_3d |= NVC0_NEW_3D_TEXTURES;
 }


--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -190,7 +190,7 @@ static boolean r300_setup_atoms(struct r300_context* r300)
    /* VAP. */
    R300_INIT_ATOM(viewport_state, 9);
    R300_INIT_ATOM(pvs_flush, 2);
-    R300_INIT_ATOM(vap_invariant_state, is_r500 ? 11 : 9);
+    R300_INIT_ATOM(vap_invariant_state, is_r500 || !has_tcl ? 11 : 9);
    R300_INIT_ATOM(vertex_stream_state, 0);
    R300_INIT_ATOM(vs_state, 0);
    R300_INIT_ATOM(vs_constants, 0);
@@ -314,6 +314,14 @@ static void r300_init_states(struct pipe_context *pipe)

        if (r300->screen->caps.is_r500) {
            OUT_CB_REG(R500_VAP_TEX_TO_COLOR_CNTL, 0);
+        } else if (!r300->screen->caps.has_tcl) {
+            /* RSxxx:
+             * Static VAP setup since r300_emit_vs_state() is never called.
+             */
+            OUT_CB_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(10) |
+                                      R300_PVS_NUM_CNTLRS(5) |
+                                      R300_PVS_NUM_FPUS(2) |
+                                      R300_PVS_VF_MAX_VTX_NUM(5));
        }
        END_CB;
    }
--- a/src/gallium/drivers/radeon/cayman_msaa.c
+++ b/src/gallium/drivers/radeon/cayman_msaa.c
@@ -143,6 +143,13 @@ void cayman_init_msaa(struct pipe_context *ctx)
 void cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples)
 {
 	switch (nr_samples) {
+	default:
+	case 1:
+		radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 0);
+		radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, 0);
+		radeon_set_context_reg(cs, CM_R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, 0);
+		radeon_set_context_reg(cs, CM_R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, 0);
+		break;
 	case 2:
 		radeon_set_context_reg(cs, CM_R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, eg_sample_locs_2x[0]);
 		radeon_set_context_reg(cs, CM_R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, eg_sample_locs_2x[1]);
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -703,8 +703,9 @@ static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen
 	}

 	rtex->cmask_buffer = (struct r600_resource *)
-		pipe_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM,
-				   PIPE_USAGE_DEFAULT, rtex->cmask.size);
+		r600_aligned_buffer_create(&rscreen->b, 0, PIPE_USAGE_DEFAULT,
+					   rtex->cmask.size,
+					   rtex->cmask.alignment);
 	if (rtex->cmask_buffer == NULL) {
 		rtex->cmask.size = 0;
 		return;
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -513,6 +513,16 @@ void radeon_llvm_emit_store(
 	}
 }

+/* Emit a branch to the given default target for the current block if
+ * applicable -- that is, if the current block does not already contain a
+ * branch from a break or continue.
+ */
+static void emit_default_branch(LLVMBuilderRef builder, LLVMBasicBlockRef target)
+{
+	if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder)))
+		 LLVMBuildBr(builder, target);
+}
+
 static void bgnloop_emit(
 	const struct lp_build_tgsi_action * action,
 	struct lp_build_tgsi_context * bld_base,
@@ -577,28 +587,8 @@ static void else_emit(
 	struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
 	struct gallivm_state * gallivm = bld_base->base.gallivm;
 	struct radeon_llvm_branch * current_branch = get_current_branch(ctx);
-	LLVMBasicBlockRef current_block = LLVMGetInsertBlock(gallivm->builder);

-	/* We need to add a terminator to the current block if the previous
-	 * instruction was an ENDIF.Example:
-	 * IF
-	 *   [code]
-	 *   IF
-	 *     [code]
-	 *   ELSE
-	 *    [code]
-	 *   ENDIF <--
-	 * ELSE<--
-	 *   [code]
-	 * ENDIF
-	 */
-
-	if (current_block != current_branch->if_block) {
-		LLVMBuildBr(gallivm->builder, current_branch->endif_block);
-	}
-	if (!LLVMGetBasicBlockTerminator(current_branch->if_block)) {
-		LLVMBuildBr(gallivm->builder, current_branch->endif_block);
-	}
+	emit_default_branch(gallivm->builder, current_branch->endif_block);
 	current_branch->has_else = 1;
 	LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->else_block);
 }
@@ -611,26 +601,15 @@ static void endif_emit(
 	struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
 	struct gallivm_state * gallivm = bld_base->base.gallivm;
 	struct radeon_llvm_branch * current_branch = get_current_branch(ctx);
-	LLVMBasicBlockRef current_block = LLVMGetInsertBlock(gallivm->builder);

-	/* If we have consecutive ENDIF instructions, then the first ENDIF
-	 * will not have a terminator, so we need to add one. */
-	if (current_block != current_branch->if_block
-			&& current_block != current_branch->else_block
-			&& !LLVMGetBasicBlockTerminator(current_block)) {
+	emit_default_branch(gallivm->builder, current_branch->endif_block);

-		 LLVMBuildBr(gallivm->builder, current_branch->endif_block);
-	}
+	/* Need to fixup an empty else block if there was no ELSE opcode. */
 	if (!LLVMGetBasicBlockTerminator(current_branch->else_block)) {
 		LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->else_block);
 		LLVMBuildBr(gallivm->builder, current_branch->endif_block);
 	}

-	if (!LLVMGetBasicBlockTerminator(current_branch->if_block)) {
-		LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->if_block);
-		LLVMBuildBr(gallivm->builder, current_branch->endif_block);
-	}
-
 	LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->endif_block);
 	ctx->branch_depth--;
 }
@@ -644,9 +623,7 @@ static void endloop_emit(
 	struct gallivm_state * gallivm = bld_base->base.gallivm;
 	struct radeon_llvm_loop * current_loop = get_current_loop(ctx);

-	if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(gallivm->builder))) {
-		 LLVMBuildBr(gallivm->builder, current_loop->loop_block);
-	}
+	emit_default_branch(gallivm->builder, current_loop->loop_block);

 	LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->endloop_block);
 	ctx->loop_depth--;
--- a/src/gallium/drivers/radeonsi/cik_sdma.c
+++ b/src/gallium/drivers/radeonsi/cik_sdma.c
@@ -519,6 +519,12 @@ static void cik_sdma_copy(struct pipe_context *ctx,
 		return;
 	}

+	/* Carrizo SDMA texture copying is very broken for some users.
+	 * https://bugs.freedesktop.org/show_bug.cgi?id=97029
+	 */
+	if (sctx->b.family == CHIP_CARRIZO)
+		goto fallback;
+
 	if (cik_sdma_copy_texture(sctx, dst, dst_level, dstx, dsty, dstz,
 				  src, src_level, src_box))
 		return;
--- a/Show More
+++ b/Show More
@@ -1 +1 @@
 .0.1
 .0.4