docs: Update 11.1.0 release notes

Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
Update version to 11.1.0(final)
2015-12-15 14:49:25 +00:00 · 2015-12-14 12:20:18 +00:00 · 2015-12-12 19:39:03 +00:00 · 2015-12-12 19:39:03 +00:00 · 2015-12-12 19:39:03 +00:00 · 2015-12-12 19:39:03 +00:00
1707 changed files with 27364 additions and 106426 deletions
--- a/.dir-locals.el
+++ b/.dir-locals.el
@@ -5,7 +5,6 @@
  (c-file-style . "stroustrup")
  (fill-column . 78)
  (eval . (progn
-	    (c-set-offset 'case-label '0)
 	    (c-set-offset 'innamespace '0)
 	    (c-set-offset 'inline-open '0)))
  )
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,101 +0,0 @@
-language: c
-
-sudo: false
-
-cache:
-  directories:
-    - $HOME/.ccache
-
-addons:
-  apt:
-    packages:
-      - libdrm-dev
-      - libudev-dev
-      - x11proto-xf86vidmode-dev
-      - libexpat1-dev
-      - libxcb-dri2-0-dev
-      - libx11-xcb-dev
-      - llvm-3.4-dev
-      - scons
-
-env:
-  global:
-    - XORG_RELEASES=http://xorg.freedesktop.org/releases/individual
-    - XCB_RELEASES=http://xcb.freedesktop.org/dist
-    - XORGMACROS_VERSION=util-macros-1.19.0
-    - GLPROTO_VERSION=glproto-1.4.17
-    - DRI2PROTO_VERSION=dri2proto-2.8
-    - DRI3PROTO_VERSION=dri3proto-1.0
-    - PRESENTPROTO_VERSION=presentproto-1.0
-    - LIBPCIACCESS_VERSION=libpciaccess-0.13.4
-    - LIBDRM_VERSION=libdrm-2.4.65
-    - XCBPROTO_VERSION=xcb-proto-1.11
-    - LIBXCB_VERSION=libxcb-1.11
-    - LIBXSHMFENCE_VERSION=libxshmfence-1.2
-    - PKG_CONFIG_PATH=$HOME/prefix/lib/pkgconfig
-  matrix:
-    - BUILD=make
-    - BUILD=scons
-
-install:
-  - export PATH="/usr/lib/ccache:$PATH"
-  - pip install --user mako
-
-  # Install dependencies where we require specific versions (or where
-  # disallowed by Travis CI's package whitelisting).
-
-  - wget $XORG_RELEASES/util/$XORGMACROS_VERSION.tar.bz2
-  - tar -jxvf $XORGMACROS_VERSION.tar.bz2
-  - (cd $XORGMACROS_VERSION && ./configure --prefix=$HOME/prefix && make install)
-
-  - wget $XORG_RELEASES/proto/$GLPROTO_VERSION.tar.bz2
-  - tar -jxvf $GLPROTO_VERSION.tar.bz2
-  - (cd $GLPROTO_VERSION && ./configure --prefix=$HOME/prefix && make install)
-
-  - wget $XORG_RELEASES/proto/$DRI2PROTO_VERSION.tar.bz2
-  - tar -jxvf $DRI2PROTO_VERSION.tar.bz2
-  - (cd $DRI2PROTO_VERSION && ./configure --prefix=$HOME/prefix && make install)
-
-  - wget $XORG_RELEASES/proto/$DRI3PROTO_VERSION.tar.bz2
-  - tar -jxvf $DRI3PROTO_VERSION.tar.bz2
-  - (cd $DRI3PROTO_VERSION && ./configure --prefix=$HOME/prefix && make install)
-
-  - wget $XORG_RELEASES/proto/$PRESENTPROTO_VERSION.tar.bz2
-  - tar -jxvf $PRESENTPROTO_VERSION.tar.bz2
-  - (cd $PRESENTPROTO_VERSION && ./configure --prefix=$HOME/prefix && make install)
-
-  - wget $XCB_RELEASES/$XCBPROTO_VERSION.tar.bz2
-  - tar -jxvf $XCBPROTO_VERSION.tar.bz2
-  - (cd $XCBPROTO_VERSION && ./configure --prefix=$HOME/prefix && make install)
-
-  - wget $XCB_RELEASES/$LIBXCB_VERSION.tar.bz2
-  - tar -jxvf $LIBXCB_VERSION.tar.bz2
-  - (cd $LIBXCB_VERSION && ./configure --prefix=$HOME/prefix && make install)
-
-  - wget $XORG_RELEASES/lib/$LIBPCIACCESS_VERSION.tar.bz2
-  - tar -jxvf $LIBPCIACCESS_VERSION.tar.bz2
-  - (cd $LIBPCIACCESS_VERSION && ./configure --prefix=$HOME/prefix && make install)
-
-  - wget http://dri.freedesktop.org/libdrm/$LIBDRM_VERSION.tar.bz2
-  - tar -jxvf $LIBDRM_VERSION.tar.bz2
-  - (cd $LIBDRM_VERSION && ./configure --prefix=$HOME/prefix && make install)
-
-  - wget $XORG_RELEASES/lib/$LIBXSHMFENCE_VERSION.tar.bz2
-  - tar -jxvf $LIBXSHMFENCE_VERSION.tar.bz2
-  - (cd $LIBXSHMFENCE_VERSION && ./configure --prefix=$HOME/prefix && make install)
-
-# Disabled LLVM (and therefore r300 and r600) because the build fails
-# with "undefined reference to `clock_gettime'" and "undefined
-# reference to `setupterm'" in llvmpipe.
-script:
-  - if test "x$BUILD" = xmake; then
-      ./autogen.sh --enable-debug
-        --disable-gallium-llvm
-        --with-egl-platforms=x11,drm
-        --with-dri-drivers=i915,i965,radeon,r200,swrast,nouveau
-        --with-gallium-drivers=svga,swrast,vc4,virgl
-        ;
-      make && make check;
-    elif test x$BUILD = xscons; then
-      scons;
-    fi
--- a/Android.common.mk
+++ b/Android.common.mk
@@ -21,8 +21,13 @@
 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 # DEALINGS IN THE SOFTWARE.

+# use c99 compiler by default
+ifeq ($(LOCAL_CC),)
 ifeq ($(LOCAL_IS_HOST_MODULE),true)
-LOCAL_CFLAGS += -D_GNU_SOURCE
+LOCAL_CC := $(HOST_CC) -std=c99 -D_GNU_SOURCE
+else
+LOCAL_CC := $(TARGET_CC) -std=c99
+endif
 endif

 LOCAL_C_INCLUDES += \
@@ -32,7 +37,6 @@ LOCAL_C_INCLUDES += \
 MESA_VERSION := $(shell cat $(MESA_TOP)/VERSION)
 # define ANDROID_VERSION (e.g., 4.0.x => 0x0400)
 LOCAL_CFLAGS += \
-	-Wno-unused-parameter \
 	-DPACKAGE_VERSION=\"$(MESA_VERSION)\" \
 	-DPACKAGE_BUGREPORT=\"https://bugs.freedesktop.org/enter_bug.cgi?product=Mesa\" \
 	-DANDROID_VERSION=0x0$(MESA_ANDROID_MAJOR_VERSION)0$(MESA_ANDROID_MINOR_VERSION)
@@ -56,10 +60,6 @@ LOCAL_CFLAGS += \
 	-fvisibility=hidden \
 	-Wno-sign-compare

-# mesa requires at least c99 compiler
-LOCAL_CONLYFLAGS += \
-	-std=c99
-
 ifeq ($(strip $(MESA_ENABLE_ASM)),true)
 ifeq ($(TARGET_ARCH),x86)
 LOCAL_CFLAGS += \
--- a/Android.mk
+++ b/Android.mk
@@ -24,7 +24,7 @@
 # BOARD_GPU_DRIVERS should be defined.  The valid values are
 #
 #   classic drivers: i915 i965
-#   gallium drivers: swrast freedreno i915g ilo nouveau r300g r600g radeonsi vc4 virgl vmwgfx
+#   gallium drivers: swrast freedreno i915g ilo nouveau r300g r600g radeonsi vc4 vmwgfx
 #
 # The main target is libGLES_mesa.  For each classic driver enabled, a DRI
 # module will also be built.  DRI modules will be loaded by libGLES_mesa.
@@ -46,7 +46,7 @@ MESA_COMMON_MK := $(MESA_TOP)/Android.common.mk
 MESA_PYTHON2 := python

 classic_drivers := i915 i965
-gallium_drivers := swrast freedreno i915g ilo nouveau r300g r600g radeonsi vmwgfx vc4 virgl
+gallium_drivers := swrast freedreno i915g ilo nouveau r300g r600g radeonsi vmwgfx vc4

 MESA_GPU_DRIVERS := $(strip $(BOARD_GPU_DRIVERS))

@@ -86,7 +86,6 @@ ifneq ($(strip $(MESA_GPU_DRIVERS)),)
 SUBDIRS := \
 	src/loader \
 	src/mapi \
-	src/compiler \
 	src/glsl \
 	src/mesa \
 	src/util \
--- a/Makefile.am
+++ b/Makefile.am
@@ -51,6 +51,7 @@ noinst_HEADERS = \
 	include/c99_alloca.h \
 	include/c99_compat.h \
 	include/c99_math.h \
+	include/c99 \
 	include/c11 \
 	include/D3D9 \
 	include/HaikuGL \
--- a/2
+++ b/2
@@ -1 +1 @@
-11.2.0-devel
+11.1.0
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -1,73 +0,0 @@
-# http://www.appveyor.com/docs/appveyor-yml
-#
-# To setup AppVeyor for your own personal repositories do the following:
-# - Sign up
-# - Add a new project
-# - Select Git and fill in the Git clone URL
-# - Setup a Git hook as explained in
-#   https://github.com/appveyor/webhooks#installing-git-hook
-# - Check 'Settings > General > Skip branches without appveyor.yml'
-# - Check 'Settings > General > Rolling builds'
-# - Setup the global or project notifications to your liking
-#
-# Note that kicking (or restarting) a build via the web UI will not work, as it
-# will fail to find appveyor.yml .  The Git hook is the most practical way to
-# kick a build.
-#
-# See also:
-# - http://help.appveyor.com/discussions/problems/2209-node-grunt-build-specify-a-project-or-solution-file-the-directory-does-not-contain-a-project-or-solution-file
-# - http://help.appveyor.com/discussions/questions/1184-build-config-vs-appveyoryaml
-
-version: '{build}'
-
-branches:
-  except:
-  - /^travis.*$/
-
-# Don't download the full Mesa history to speed up cloning.  However the clone
-# depth must not be too small, otherwise builds might fail when lots of patches
-# are committed in succession, because the desired commit is not found on the
-# truncated history.
-#
-# See also:
-# - https://www.appveyor.com/blog/2014/06/04/shallow-clone-for-git-repositories
-clone_depth: 100
-
-cache:
- win_flex_bison-2.4.5.zip
- llvm-3.3.1-msvc2013-mtd.7z
-
-environment:
-  WINFLEXBISON_ARCHIVE: win_flex_bison-2.4.5.zip
-  LLVM_ARCHIVE: llvm-3.3.1-msvc2013-mtd.7z
-
-install:
-# Check pip
- python --version
- python -m pip --version
-# Install Mako
- python -m pip install --egg Mako
-# Install SCons
- python -m pip install --egg scons==2.4.1
- scons --version
-# Install flex/bison
- if not exist "%WINFLEXBISON_ARCHIVE%" appveyor DownloadFile "http://downloads.sourceforge.net/project/winflexbison/%WINFLEXBISON_ARCHIVE%"
- 7z x -y -owinflexbison\ "%WINFLEXBISON_ARCHIVE%" > nul
- set Path=%CD%\winflexbison;%Path%
- win_flex --version
- win_bison --version
-# Download and extract LLVM
- if not exist "%LLVM_ARCHIVE%" appveyor DownloadFile "https://people.freedesktop.org/~jrfonseca/llvm/%LLVM_ARCHIVE%"
- 7z x -y "%LLVM_ARCHIVE%" > nul
- mkdir llvm\bin
- set LLVM=%CD%\llvm
-
-build_script:
- scons -j%NUMBER_OF_PROCESSORS% MSVC_VERSION=12.0 llvm=1
-
-
-# It's possible to setup notification here, as described in
-# http://www.appveyor.com/docs/notifications#appveyor-yml-configuration , but
-# doing so would cause the notification settings to be replicated across all
-# repos, which is most likely undesired.  So it's better to rely on the
-# Appveyor global/project notification settings.
--- a/configure.ac
+++ b/configure.ac
@@ -72,9 +72,9 @@ LIBDRM_REQUIRED=2.4.60
 LIBDRM_RADEON_REQUIRED=2.4.56
 LIBDRM_AMDGPU_REQUIRED=2.4.63
 LIBDRM_INTEL_REQUIRED=2.4.61
-LIBDRM_NVVIEUX_REQUIRED=2.4.66
-LIBDRM_NOUVEAU_REQUIRED=2.4.66
-LIBDRM_FREEDRENO_REQUIRED=2.4.67
+LIBDRM_NVVIEUX_REQUIRED=2.4.33
+LIBDRM_NOUVEAU_REQUIRED=2.4.62
+LIBDRM_FREEDRENO_REQUIRED=2.4.65
 DRI2PROTO_REQUIRED=2.6
 DRI3PROTO_REQUIRED=1.0
 PRESENTPROTO_REQUIRED=1.0
@@ -98,7 +98,7 @@ AC_PROG_CXX
 AM_PROG_CC_C_O
 AM_PROG_AS
 AX_CHECK_GNU_MAKE
-AC_CHECK_PROGS([PYTHON2], [python2.7 python2 python])
+AC_CHECK_PROGS([PYTHON2], [python2 python])
 AC_PROG_SED
 AC_PROG_MKDIR_P

@@ -197,13 +197,6 @@ if test "x$GCC" = xyes -a "x$acv_mesa_CLANG" = xno; then
    fi
 fi

-dnl We don't support building Mesa with Sun C compiler
-dnl https://bugs.freedesktop.org/show_bug.cgi?id=93189
-AC_CHECK_DECL([__SUNPRO_C], [SUNCC=yes], [SUNCC=no])
-if test "x$SUNCC" = xyes; then
-    AC_MSG_ERROR([Building with Sun C compiler is not supported, use GCC instead.])
-fi
-
 dnl Check for compiler builtins
 AX_GCC_BUILTIN([__builtin_bswap32])
 AX_GCC_BUILTIN([__builtin_bswap64])
@@ -245,7 +238,7 @@ _SAVE_LDFLAGS="$LDFLAGS"
 _SAVE_CPPFLAGS="$CPPFLAGS"

 dnl Compiler macros
-DEFINES="-D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS"
+DEFINES="-D__STDC_LIMIT_MACROS"
 AC_SUBST([DEFINES])
 case "$host_os" in
 linux*|*-gnu*|gnu*)
@@ -305,7 +298,8 @@ if test "x$GCC" = xyes; then

    # Flags to help ensure that certain portions of the code -- and only those
    # portions -- can be built with MSVC:
-    # - src/util, src/gallium/auxiliary, rc/gallium/drivers/llvmpipe, and
+    # - src/util, src/gallium/auxiliary, and src/gallium/drivers/llvmpipe needs
+    #   to build with Windows SDK 7.0.7600, which bundles MSVC 2008
    # - non-Linux/Posix OpenGL portions needs to build on MSVC 2013 (which
    #   supports most of C99)
    # - the rest has no compiler compiler restrictions
@@ -322,6 +316,9 @@ if test "x$GCC" = xyes; then
 		    AC_MSG_RESULT([yes])],
 		    AC_MSG_RESULT([no]));
    CFLAGS="$save_CFLAGS"
+
+    MSVC2008_COMPAT_CFLAGS="$MSVC2013_COMPAT_CFLAGS -Werror=declaration-after-statement"
+    MSVC2008_COMPAT_CXXFLAGS="$MSVC2013_COMPAT_CXXFLAGS"
 fi
 if test "x$GXX" = xyes; then
    CXXFLAGS="$CXXFLAGS -Wall"
@@ -349,6 +346,8 @@ fi

 AC_SUBST([MSVC2013_COMPAT_CFLAGS])
 AC_SUBST([MSVC2013_COMPAT_CXXFLAGS])
+AC_SUBST([MSVC2008_COMPAT_CFLAGS])
+AC_SUBST([MSVC2008_COMPAT_CXXFLAGS])

 dnl even if the compiler appears to support it, using visibility attributes isn't
 dnl going to do anything useful currently on cygwin apart from emit lots of warnings
@@ -377,11 +376,10 @@ save_CFLAGS="$CFLAGS"
 CFLAGS="$SSE41_CFLAGS $CFLAGS"
 AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
 #include <smmintrin.h>
-int param;
 int main () {
-    __m128i a = _mm_set1_epi32 (param), b = _mm_set1_epi32 (param + 1), c;
+    __m128i a = _mm_set1_epi32 (0), b = _mm_set1_epi32 (0), c;
    c = _mm_max_epu32(a, b);
-    return _mm_cvtsi128_si32(c);
+    return 0;
 }]])], SSE41_SUPPORTED=1)
 CFLAGS="$save_CFLAGS"
 if test "x$SSE41_SUPPORTED" = x1; then
@@ -390,61 +388,6 @@ fi
 AM_CONDITIONAL([SSE41_SUPPORTED], [test x$SSE41_SUPPORTED = x1])
 AC_SUBST([SSE41_CFLAGS], $SSE41_CFLAGS)

-dnl Check for Endianness
-AC_C_BIGENDIAN(
-   little_endian=no,
-   little_endian=yes,
-   little_endian=no,
-   little_endian=no
-)
-
-dnl Check for POWER8 Architecture
-PWR8_CFLAGS="-mpower8-vector"
-have_pwr8_intrinsics=no
-AC_MSG_CHECKING(whether gcc supports -mpower8-vector)
-save_CFLAGS=$CFLAGS
-CFLAGS="$PWR8_CFLAGS $CFLAGS"
-AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
-#if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 8))
-#error "Need GCC >= 4.8 for sane POWER8 support"
-#endif
-#include <altivec.h>
-int main () {
-    vector unsigned char r;
-    vector unsigned int v = vec_splat_u32 (1);
-    r = __builtin_vec_vgbbd ((vector unsigned char) v);
-    return 0;
-}]])], have_pwr8_intrinsics=yes)
-CFLAGS=$save_CFLAGS
-
-AC_ARG_ENABLE(pwr8,
-   [AC_HELP_STRING([--disable-pwr8-inst],
-                   [disable POWER8-specific instructions])],
-   [enable_pwr8=$enableval], [enable_pwr8=auto])
-
-if test "x$enable_pwr8" = xno ; then
-   have_pwr8_intrinsics=disabled
-fi
-
-if test $have_pwr8_intrinsics = yes && test $little_endian = yes ; then
-   DEFINES="$DEFINES -D_ARCH_PWR8"
-   CXXFLAGS="$CXXFLAGS $PWR8_CFLAGS"
-   CFLAGS="$CFLAGS $PWR8_CFLAGS"
-else
-   PWR8_CFLAGS=
-fi
-
-AC_MSG_RESULT($have_pwr8_intrinsics)
-if test "x$enable_pwr8" = xyes && test $have_pwr8_intrinsics = no ; then
-   AC_MSG_ERROR([POWER8 compiler support not detected])
-fi
-
-if test $have_pwr8_intrinsics = yes && test $little_endian = no ; then
-   AC_MSG_WARN([POWER8 optimization is enabled only on POWER8 Little-Endian])
-fi
-
-AC_SUBST([PWR8_CFLAGS], $PWR8_CFLAGS)
-
 dnl Can't have static and shared libraries, default to static if user
 dnl explicitly requested. If both disabled, set to static since shared
 dnl was explicitly requested.
@@ -470,29 +413,8 @@ AC_ARG_ENABLE([debug],
    [enable_debug="$enableval"],
    [enable_debug=no]
 )
-
-AC_ARG_ENABLE([profile],
-    [AS_HELP_STRING([--enable-profile],
-        [enable profiling of code @<:@default=disabled@:>@])],
-    [enable_profile="$enableval"],
-    [enable_profile=no]
-)
-
-if test "x$enable_profile" = xyes; then
-    DEFINES="$DEFINES -DPROFILE"
-    if test "x$GCC" = xyes; then
-        CFLAGS="$CFLAGS -fno-omit-frame-pointer"
-    fi
-    if test "x$GXX" = xyes; then
-        CXXFLAGS="$CXXFLAGS -fno-omit-frame-pointer"
-    fi
-fi
-
 if test "x$enable_debug" = xyes; then
    DEFINES="$DEFINES -DDEBUG"
-    if test "x$enable_profile" = xyes; then
-        AC_MSG_WARN([Debug and Profile are enabled at the same time])
-    fi
    if test "x$GCC" = xyes; then
        if ! echo "$CFLAGS" | grep -q -e '-g'; then
            CFLAGS="$CFLAGS -g"
@@ -1783,15 +1705,7 @@ AC_ARG_WITH([clang-libdir],
   [CLANG_LIBDIR=''])

 PKG_CHECK_EXISTS([libclc], [have_libclc=yes], [have_libclc=no])
-PKG_CHECK_MODULES([LIBELF], [libelf], [have_libelf=yes], [have_libelf=no])
-
-if test "x$have_libelf" = xno; then
-   LIBELF_LIBS=''
-   LIBELF_CFLAGS=''
-   AC_CHECK_LIB([elf], [elf_memory], [have_libelf=yes;LIBELF_LIBS=-lelf], [have_libelf=no])
-   AC_SUBST([LIBELF_LIBS])
-   AC_SUBST([LIBELF_CFLAGS])
-fi
+AC_CHECK_LIB([elf], [elf_memory], [have_libelf=yes;ELF_LIB=-lelf])

 if test "x$enable_opencl" = xyes; then
    if test -z "$with_gallium_drivers"; then
@@ -2153,12 +2067,7 @@ gallium_require_drm_loader() {
    fi
 }

-dnl This is for Glamor. Skip this if OpenGL is disabled.
 require_egl_drm() {
-    if test "x$enable_opengl" = xno; then
-        return 0
-    fi
-
    case "$with_egl_platforms" in
        *drm*)
            ;;
@@ -2180,7 +2089,7 @@ radeon_llvm_check() {
    if test "x$enable_gallium_llvm" != "xyes"; then
        AC_MSG_ERROR([--enable-gallium-llvm is required when building $1])
    fi
-    llvm_check_version_for "3" "6" "0" $1
+    llvm_check_version_for "3" "5" "0" $1
    if test true && $LLVM_CONFIG --targets-built | grep -iqvw $amdgpu_llvm_target_name ; then
        AC_MSG_ERROR([LLVM $amdgpu_llvm_target_name not enabled in your LLVM build.])
    fi
@@ -2295,9 +2204,6 @@ dnl in LLVM_LIBS.

 if test "x$MESA_LLVM" != x0; then

-    if ! $LLVM_CONFIG --libs ${LLVM_COMPONENTS} >/dev/null; then
-       AC_MSG_ERROR([Calling ${LLVM_CONFIG} failed])
-    fi
    LLVM_LIBS="`$LLVM_CONFIG --libs ${LLVM_COMPONENTS}`"

    dnl llvm-config may not give the right answer when llvm is a built as a
@@ -2386,6 +2292,8 @@ if test "x$USE_VC4_SIMULATOR" = xyes -a "x$HAVE_GALLIUM_ILO" = xyes; then
    AC_MSG_ERROR([VC4 simulator on x86 replaces i965 driver build, so ilo must be disabled.])
 fi

+AC_SUBST([ELF_LIB])
+
 AM_CONDITIONAL(HAVE_LIBDRM, test "x$have_libdrm" = xyes)
 AM_CONDITIONAL(HAVE_X11_DRIVER, test "x$enable_xlib_glx" = xyes)
 AM_CONDITIONAL(HAVE_OSMESA, test "x$enable_osmesa" = xyes)
@@ -2438,7 +2346,6 @@ CXXFLAGS="$CXXFLAGS $USER_CXXFLAGS"
 dnl Substitute the config
 AC_CONFIG_FILES([Makefile
 		src/Makefile
-		src/compiler/Makefile
 		src/egl/Makefile
 		src/egl/main/egl.pc
 		src/egl/wayland/wayland-drm/Makefile
@@ -2508,6 +2415,7 @@ AC_CONFIG_FILES([Makefile
 		src/gallium/winsys/virgl/vtest/Makefile
 		src/gbm/Makefile
 		src/gbm/main/gbm.pc
+		src/glsl/Makefile
 		src/glx/Makefile
 		src/glx/apple/Makefile
 		src/glx/tests/Makefile
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -92,11 +92,11 @@ GL 3.3, GLSL 3.30 --- all DONE: i965, nv50, nvc0, r600, radeonsi, llvmpipe, soft
  GL_ARB_vertex_type_2_10_10_10_rev                     DONE ()


-GL 4.0, GLSL 4.00 --- all DONE: nvc0, r600, radeonsi
+GL 4.0, GLSL 4.00 --- all DONE: nvc0, radeonsi

-  GL_ARB_draw_buffers_blend                            DONE (i965, nv50, llvmpipe, softpipe)
-  GL_ARB_draw_indirect                                 DONE (i965, llvmpipe, softpipe)
-  GL_ARB_gpu_shader5                                   DONE (i965)
+  GL_ARB_draw_buffers_blend                            DONE (i965, nv50, r600, llvmpipe, softpipe)
+  GL_ARB_draw_indirect                                 DONE (i965, r600, llvmpipe, softpipe)
+  GL_ARB_gpu_shader5                                   DONE (i965, r600)
  - 'precise' qualifier                                DONE
  - Dynamically uniform sampler array indices          DONE (softpipe)
  - Dynamically uniform UBO array indices              DONE ()
@@ -109,33 +109,33 @@ GL 4.0, GLSL 4.00 --- all DONE: nvc0, r600, radeonsi
  - Enhanced per-sample shading                        DONE ()
  - Interpolation functions                            DONE ()
  - New overload resolution rules                      DONE
-  GL_ARB_gpu_shader_fp64                               DONE (llvmpipe, softpipe)
-  GL_ARB_sample_shading                                DONE (i965, nv50)
-  GL_ARB_shader_subroutine                             DONE (i965, nv50, llvmpipe, softpipe)
-  GL_ARB_tessellation_shader                           DONE (i965)
-  GL_ARB_texture_buffer_object_rgb32                   DONE (i965, llvmpipe, softpipe)
-  GL_ARB_texture_cube_map_array                        DONE (i965, nv50, llvmpipe, softpipe)
-  GL_ARB_texture_gather                                DONE (i965, nv50, llvmpipe, softpipe)
-  GL_ARB_texture_query_lod                             DONE (i965, nv50, softpipe)
-  GL_ARB_transform_feedback2                           DONE (i965, nv50, llvmpipe, softpipe)
-  GL_ARB_transform_feedback3                           DONE (i965, nv50, llvmpipe, softpipe)
+  GL_ARB_gpu_shader_fp64                               DONE (r600, llvmpipe, softpipe)
+  GL_ARB_sample_shading                                DONE (i965, nv50, r600)
+  GL_ARB_shader_subroutine                             DONE (i965, nv50, r600, llvmpipe, softpipe)
+  GL_ARB_tessellation_shader                           DONE ()
+  GL_ARB_texture_buffer_object_rgb32                   DONE (i965, r600, llvmpipe, softpipe)
+  GL_ARB_texture_cube_map_array                        DONE (i965, nv50, r600, llvmpipe, softpipe)
+  GL_ARB_texture_gather                                DONE (i965, nv50, r600, llvmpipe, softpipe)
+  GL_ARB_texture_query_lod                             DONE (i965, nv50, r600, softpipe)
+  GL_ARB_transform_feedback2                           DONE (i965, nv50, r600, llvmpipe, softpipe)
+  GL_ARB_transform_feedback3                           DONE (i965, nv50, r600, llvmpipe, softpipe)


-GL 4.1, GLSL 4.10 --- all DONE: nvc0, r600, radeonsi
+GL 4.1, GLSL 4.10 --- all DONE: nvc0, radeonsi

-  GL_ARB_ES2_compatibility                             DONE (i965, nv50, llvmpipe, softpipe)
+  GL_ARB_ES2_compatibility                             DONE (i965, nv50, r600, llvmpipe, softpipe)
  GL_ARB_get_program_binary                            DONE (0 binary formats)
  GL_ARB_separate_shader_objects                       DONE (all drivers)
  GL_ARB_shader_precision                              DONE (all drivers that support GLSL 4.10)
-  GL_ARB_vertex_attrib_64bit                           DONE (llvmpipe, softpipe)
-  GL_ARB_viewport_array                                DONE (i965, nv50, llvmpipe, softpipe)
+  GL_ARB_vertex_attrib_64bit                           DONE (r600, llvmpipe, softpipe)
+  GL_ARB_viewport_array                                DONE (i965, nv50, r600, llvmpipe)


 GL 4.2, GLSL 4.20:

  GL_ARB_texture_compression_bptc                      DONE (i965, nvc0, r600, radeonsi)
  GL_ARB_compressed_texture_pixel_storage              DONE (all drivers)
-  GL_ARB_shader_atomic_counters                        DONE (i965, nvc0)
+  GL_ARB_shader_atomic_counters                        DONE (i965)
  GL_ARB_texture_storage                               DONE (all drivers)
  GL_ARB_transform_feedback_instanced                  DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
  GL_ARB_base_instance                                 DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
@@ -149,22 +149,22 @@ GL 4.2, GLSL 4.20:

 GL 4.3, GLSL 4.30:

-  GL_ARB_arrays_of_arrays                              DONE (all drivers that support GLSL 1.30)
+  GL_ARB_arrays_of_arrays                              DONE (i965)
  GL_ARB_ES3_compatibility                             DONE (all drivers that support GLSL 3.30)
  GL_ARB_clear_buffer_object                           DONE (all drivers)
-  GL_ARB_compute_shader                                DONE (i965)
-  GL_ARB_copy_image                                    DONE (i965, nv50, nvc0, r600, radeonsi)
+  GL_ARB_compute_shader                                in progress (jljusten)
+  GL_ARB_copy_image                                    DONE (i965, nv50, nvc0, radeonsi)
  GL_KHR_debug                                         DONE (all drivers)
  GL_ARB_explicit_uniform_location                     DONE (all drivers that support GLSL)
  GL_ARB_fragment_layer_viewport                       DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe)
  GL_ARB_framebuffer_no_attachments                    DONE (i965)
-  GL_ARB_internalformat_query2                         in progress (elima)
+  GL_ARB_internalformat_query2                         not started
  GL_ARB_invalidate_subdata                            DONE (all drivers)
  GL_ARB_multi_draw_indirect                           DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe)
  GL_ARB_program_interface_query                       DONE (all drivers)
  GL_ARB_robust_buffer_access_behavior                 not started
  GL_ARB_shader_image_size                             DONE (i965)
-  GL_ARB_shader_storage_buffer_object                  DONE (i965, nvc0)
+  GL_ARB_shader_storage_buffer_object                  DONE (i965)
  GL_ARB_stencil_texturing                             DONE (i965/gen8+, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
  GL_ARB_texture_buffer_range                          DONE (nv50, nvc0, i965, r600, radeonsi, llvmpipe)
  GL_ARB_texture_query_levels                          DONE (all drivers that support GLSL 1.30)
@@ -184,9 +184,9 @@ GL 4.4, GLSL 4.40:
  - forced alignment within blocks                     in progress
  - specified vec4-slot component numbers              in progress
  - specified transform/feedback layout                in progress
-  - input/output block locations                       DONE
+  - input/output block locations                       in progress
  GL_ARB_multi_bind                                    DONE (all drivers)
-  GL_ARB_query_buffer_object                           DONE (nvc0)
+  GL_ARB_query_buffer_object                           not started
  GL_ARB_texture_mirror_clamp_to_edge                  DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
  GL_ARB_texture_stencil8                              DONE (nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
  GL_ARB_vertex_type_10f_11f_11f_rev                   DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
@@ -209,16 +209,16 @@ GL 4.5, GLSL 4.50:

 These are the extensions cherry-picked to make GLES 3.1
 GLES3.1, GLSL ES 3.1
-  GL_ARB_arrays_of_arrays                              DONE (all drivers that support GLSL 1.30)
-  GL_ARB_compute_shader                                DONE (i965)
+  GL_ARB_arrays_of_arrays                              DONE (i965)
+  GL_ARB_compute_shader                                in progress (jljusten)
  GL_ARB_draw_indirect                                 DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe)
  GL_ARB_explicit_uniform_location                     DONE (all drivers that support GLSL)
  GL_ARB_framebuffer_no_attachments                    DONE (i965)
  GL_ARB_program_interface_query                       DONE (all drivers)
-  GL_ARB_shader_atomic_counters                        DONE (i965, nvc0)
+  GL_ARB_shader_atomic_counters                        DONE (i965)
  GL_ARB_shader_image_load_store                       DONE (i965)
  GL_ARB_shader_image_size                             DONE (i965)
-  GL_ARB_shader_storage_buffer_object                  DONE (i965, nvc0)
+  GL_ARB_shader_storage_buffer_object                  DONE (i965)
  GL_ARB_shading_language_packing                      DONE (all drivers)
  GL_ARB_separate_shader_objects                       DONE (all drivers)
  GL_ARB_stencil_texturing                             DONE (i965/gen8+, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
@@ -233,7 +233,7 @@ GLES3.1, GLSL ES 3.1
      glMemoryBarrierByRegion                          DONE
      glGetTexLevelParameter[fi]v - needs updates      DONE
      glGetBooleani_v - restrict to GLES enums
-      gl_HelperInvocation support                      DONE (i965, nvc0, r600)
+      gl_HelperInvocation support

 GLES3.2, GLSL ES 3.2
  GL_EXT_color_buffer_float                            DONE (all drivers)
@@ -244,7 +244,7 @@ GLES3.2, GLSL ES 3.2
  GL_OES_copy_image                                    not started (based on GL_ARB_copy_image, which is done for some drivers)
  GL_OES_draw_buffers_indexed                          not started
  GL_OES_draw_elements_base_vertex                     DONE (all drivers)
-  GL_OES_geometry_shader                               started (Marta)
+  GL_OES_geometry_shader                               not started (based on GL_ARB_geometry_shader4, which is done for all drivers)
  GL_OES_gpu_shader5                                   not started (based on parts of GL_ARB_gpu_shader5, which is done for some drivers)
  GL_OES_primitive_bounding box                        not started
  GL_OES_sample_shading                                not started (based on parts of GL_ARB_sample_shading, which is done for some drivers)
@@ -256,7 +256,7 @@ GLES3.2, GLSL ES 3.2
  GL_OES_texture_border_clamp                          not started (based on GL_ARB_texture_border_clamp, which is done)
  GL_OES_texture_buffer                                not started (based on GL_ARB_texture_buffer_object, GL_ARB_texture_buffer_range, and GL_ARB_texture_buffer_object_rgb32 that are all done)
  GL_OES_texture_cube_map_array                        not started (based on GL_ARB_texture_cube_map_array, which is done for all drivers)
-  GL_OES_texture_stencil8                              DONE (all drivers that support GL_ARB_texture_stencil8)
+  GL_OES_texture_stencil8                              not started (based on GL_ARB_texture_stencil8, which is done for some drivers)
  GL_OES_texture_storage_multisample_2d_array          DONE (all drivers that support GL_ARB_texture_multisample)

 More info about these features and the work involved can be found at
--- a/docs/contents.html
+++ b/docs/contents.html
@@ -90,14 +90,14 @@
 <li><a href="http://www.opengl.org" target="_parent">OpenGL website</a>
 <li><a href="http://dri.freedesktop.org" target="_parent">DRI website</a>
 <li><a href="http://www.freedesktop.org" target="_parent">freedesktop.org</a>
-<li><a href="http://planet.freedesktop.org" target="_parent">Developer blogs</a>
 </ul>

 <b>Hosted by:</b>
 <br>
 <blockquote>
 <a href="http://sourceforge.net"
-target="_parent">sourceforge.net</a>
+target="_parent"><img src="http://sourceforge.net/sflogo.php?group_id=3&amp;type=1"
+width="88" height="31" align="bottom" alt="Sourceforge.net" border="0"></a>
 </blockquote>

 </body>
--- a/docs/envvars.html
+++ b/docs/envvars.html
@@ -91,20 +91,11 @@ This is only valid for versions &gt;= 3.0.
 <li> Mesa may not really implement all the features of the given version.
 (for developers only)
 </ul>
-<li>MESA_GLES_VERSION_OVERRIDE - changes the value returned by
-glGetString(GL_VERSION) for OpenGL ES.
-<ul>
-<li> The format should be MAJOR.MINOR
-<li> Examples: 2.0, 3.0, 3.1
-<li> Mesa may not really implement all the features of the given version.
-(for developers only)
-</ul>
 <li>MESA_GLSL_VERSION_OVERRIDE - changes the value returned by
 glGetString(GL_SHADING_LANGUAGE_VERSION). Valid values are integers, such as
 "130".  Mesa will not really implement all the features of the given language version
 if it's higher than what's normally reported. (for developers only)
 <li>MESA_GLSL - <a href="shading.html#envvars">shading language compiler options</a>
-<li>MESA_NO_MINMAX_CACHE - when set, the minmax index cache is globally disabled.
 </ul>


@@ -232,7 +223,7 @@ See src/mesa/state_tracker/st_debug.c for other options.
 <li>LP_PERF - a comma-separated list of options to selectively no-op various
    parts of the driver.  See the source code for details.
 <li>LP_NUM_THREADS - an integer indicating how many threads to use for rendering.
-    Zero turns off threading completely.  The default value is the number of CPU
+    Zero turns of threading completely.  The default value is the number of CPU
    cores present.
 </ul>

@@ -253,25 +244,6 @@ for details.
 </ul>


-<h3>VC4 driver environment variables</h3>
-<ul>
-<li>VC4_DEBUG - a comma-separated list of named flags, which do various things:
-<ul>
-   <li>cl - dump command list during creation</li>
-   <li>qpu - dump generated QPU instructions</li>
-   <li>qir - dump QPU IR during program compile</li>
-   <li>nir - dump NIR during program compile</li>
-   <li>tgsi - dump TGSI during program compile</li>
-   <li>shaderdb - dump program compile information for shader-db analysis</li>
-   <li>perf - print during performance-related events</li>
-   <li>norast - skip actual hardware execution of commands</li>
-   <li>always_flush - flush after each draw call</li>
-   <li>always_sync - wait for finish after each flush</li>
-   <li>dump - write a GPU command stream trace file (VC4 simulator only)</li>
-</ul>
-</ul>
-
-
 <p>
 Other Gallium drivers have their own environment variables.  These may change
 frequently so the source code should be consulted for details.
--- a/docs/index.html
+++ b/docs/index.html
@@ -16,52 +16,6 @@

 <h1>News</h1>

-<h2>February 10, 2016</h2>
-<p>
-<a href="relnotes/11.1.2.html">Mesa 11.1.2</a> is released.
-This is a bug-fix release.
-</p>
-
-<h2>January 22, 2016</h2>
-<p>
-<a href="relnotes/11.0.9.html">Mesa 11.0.9</a> is released.
-This is a bug-fix release.
-<br>
-NOTE: It is anticipated that 11.0.9 will be the final release in the 11.0
-series. Users of 11.0 are encouraged to migrate to the 11.1 series in order
-to obtain future fixes.
-</p>
-
-<h2>January 13, 2016</h2>
-<p>
-<a href="relnotes/11.1.1.html">Mesa 11.1.1</a> is released.
-This is a bug-fix release.
-</p>
-
-<h2>December 21, 2015</h2>
-<p>
-<a href="relnotes/11.0.8.html">Mesa 11.0.8</a> is released.
-This is a bug-fix release.
-</p>
-
-<h2>December 15, 2015</h2>
-<p>
-<a href="relnotes/11.1.0.html">Mesa 11.1.0</a> is released.  This is a new
-development release.  See the release notes for more information about
-the release.
-</p>
-
-<h2>December 9, 2015</h2>
-<p>
-<a href="relnotes/11.0.7.html">Mesa 11.0.7</a> is released.
-This is a bug-fix release.
-</p>
-<p>
-Mesa demos 8.3.0 is also released.
-See the <a href="http://lists.freedesktop.org/archives/mesa-announce/2015-December/000191.html">announcement</a> for more information about the release.
-You can download it from <a href="ftp://ftp.freedesktop.org/pub/mesa/demos/8.3.0/">ftp.freedesktop.org/pub/mesa/demos/8.3.0/</a>.
-</p>
-
 <h2>November 21, 2015</h2>
 <p>
 <a href="relnotes/11.0.6.html">Mesa 11.0.6</a> is released.
--- a/docs/install.html
+++ b/docs/install.html
@@ -39,7 +39,7 @@ Version 2.6.4 or later should work.
 </li>
 <br>
 <li><a href="http://www.makotemplates.org/">Python Mako module</a> -
-Python Mako module is required. Version 0.3.4 or later should work.
+Python Mako module is required. Version 0.7.3 or later should work.
 </li>
 </br>
 <li><a href="http://www.scons.org/">SCons</a> is required for building on
@@ -58,9 +58,6 @@ On Windows with MinGW, install flex and bison with:
 For MSVC on Windows, install
 <a href="http://winflexbison.sourceforge.net/">Win flex-bison</a>.
 </li>
-<br>
-<li>For building on Windows, Microsoft Visual Studio 2013 or later is required.
-</li>
 </ul>


--- a/docs/relnotes.html
+++ b/docs/relnotes.html
@@ -21,12 +21,6 @@ The release notes summarize what's new or changed in each Mesa release.
 </p>

 <ul>
-<li><a href="relnotes/11.1.2.html">11.1.2 release notes</a>
-<li><a href="relnotes/11.0.9.html">11.0.9 release notes</a>
-<li><a href="relnotes/11.1.1.html">11.1.1 release notes</a>
-<li><a href="relnotes/11.0.8.html">11.0.8 release notes</a>
-<li><a href="relnotes/11.1.0.html">11.1.0 release notes</a>
-<li><a href="relnotes/11.0.7.html">11.0.7 release notes</a>
 <li><a href="relnotes/11.0.6.html">11.0.6 release notes</a>
 <li><a href="relnotes/11.0.5.html">11.0.5 release notes</a>
 <li><a href="relnotes/11.0.4.html">11.0.4 release notes</a>
--- a/docs/relnotes/11.0.5.html
+++ b/docs/relnotes/11.0.5.html
@@ -45,6 +45,8 @@ because compatibility contexts are not supported.

 <ul>

+<ul>
+
 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91993">Bug 91993</a> - Graphical glitch in Astromenace (open-source game).</li>

 <li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92214">Bug 92214</a> - Flightgear crashes during splashboot with R600 driver, LLVM 3.7.0 and mesa 11.0.2</li>
--- a/docs/relnotes/11.0.7.html
+++ b/docs/relnotes/11.0.7.html
@@ -1,154 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html lang="en">
-<head>
-  <meta http-equiv="content-type" content="text/html; charset=utf-8">
-  <title>Mesa Release Notes</title>
-  <link rel="stylesheet" type="text/css" href="../mesa.css">
-</head>
-<body>
-
-<div class="header">
-  <h1>The Mesa 3D Graphics Library</h1>
-</div>
-
-<iframe src="../contents.html"></iframe>
-<div class="content">
-
-<h1>Mesa 11.0.7 Release Notes / December 9, 2015</h1>
-
-<p>
-Mesa 11.0.7 is a bug fix release which fixes bugs found since the 11.0.6 release.
-</p>
-<p>
-Mesa 11.0.7 implements the OpenGL 4.1 API, but the version reported by
-glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
-glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
-Some drivers don't support all the features required in OpenGL 4.1.  OpenGL
-4.1 is <strong>only</strong> available if requested at context creation
-because compatibility contexts are not supported.
-</p>
-
-
-<h2>SHA256 checksums</h2>
-<pre>
-07c27004ff68b288097d17b2faa7bdf15ec73c96b7e6c9835266e544adf0a62f  mesa-11.0.7.tar.gz
-e7e90a332ede6c8fd08eff90786a3fd1605a4e62ebf3a9b514047838194538cb  mesa-11.0.7.tar.xz
-</pre>
-
-
-<h2>New features</h2>
-<p>None</p>
-
-<h2>Bug fixes</h2>
-
-<p>This list is likely incomplete.</p>
-
-<ul>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90348">Bug 90348</a> - Spilling failure of b96 merged value</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92363">Bug 92363</a> - [BSW/BDW] ogles1conform Gets test fails</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92438">Bug 92438</a> - Segfault in pushbuf_kref when running the android emulator (qemu) on nv50</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93110">Bug 93110</a> - [NVE4] textureSize() and textureQueryLevels() uses a texture bound during the previous draw call</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93126">Bug 93126</a> - wrongly claim supporting GL_EXT_texture_rg</li>
-
-</ul>
-
-
-<h2>Changes</h2>
-
-<p>Chris Wilson (1):</p>
-<ul>
-  <li>meta: Compute correct buffer size with SkipRows/SkipPixels</li>
-</ul>
-
-<p>Daniel Stone (1):</p>
-<ul>
-  <li>egl/wayland: Ignore rects from SwapBuffersWithDamage</li>
-</ul>
-
-<p>Dave Airlie (4):</p>
-<ul>
-  <li>texgetimage: consolidate 1D array handling code.</li>
-  <li>r600: geometry shader gsvs itemsize workaround</li>
-  <li>r600: rv670 use at least 16es/gs threads</li>
-  <li>r600: workaround empty geom shader.</li>
-</ul>
-
-<p>Emil Velikov (4):</p>
-<ul>
-  <li>docs: add sha256 checksums for 11.0.6</li>
-  <li>get-pick-list.sh: Require explicit "11.0" for nominating stable patches</li>
-  <li>mesa; add get-extra-pick-list.sh script into bin/</li>
-  <li>Update version to 11.0.7</li>
-</ul>
-
-<p>François Tigeot (1):</p>
-<ul>
-  <li>xmlconfig: Add support for DragonFly</li>
-</ul>
-
-<p>Ian Romanick (22):</p>
-<ul>
-  <li>mesa: Make bind_vertex_buffer avilable outside varray.c</li>
-  <li>mesa: Refactor update_array_format to make _mesa_update_array_format_public</li>
-  <li>mesa: Refactor enable_vertex_array_attrib to make _mesa_enable_vertex_array_attrib</li>
-  <li>i965: Pass brw_context instead of gl_context to brw_draw_rectlist</li>
-  <li>i965: Use DSA functions for VBOs in brw_meta_fast_clear</li>
-  <li>i965: Use internal functions for buffer object access</li>
-  <li>i965: Don't pollute the buffer object namespace in brw_meta_fast_clear</li>
-  <li>meta: Use DSA functions for PBO in create_texture_for_pbo</li>
-  <li>meta: Use _mesa_NamedBufferData and _mesa_NamedBufferSubData for users of _mesa_meta_setup_vertex_objects</li>
-  <li>i965: Use _mesa_NamedBufferSubData for users of _mesa_meta_setup_vertex_objects</li>
-  <li>meta: Don't leave the VBO bound after _mesa_meta_setup_vertex_objects</li>
-  <li>meta: Track VBO using gl_buffer_object instead of GL API object handle</li>
-  <li>meta: Use DSA functions for VBOs in _mesa_meta_setup_vertex_objects</li>
-  <li>meta: Use internal functions for buffer object and VAO access</li>
-  <li>meta: Don't pollute the buffer object namespace in _mesa_meta_setup_vertex_objects</li>
-  <li>meta: Partially convert _mesa_meta_DrawTex to DSA</li>
-  <li>meta: Track VBO using gl_buffer_object instead of GL API object handle in _mesa_meta_DrawTex</li>
-  <li>meta: Use internal functions for buffer object and VAO access in _mesa_meta_DrawTex</li>
-  <li>meta: Don't pollute the buffer object namespace in _mesa_meta_DrawTex</li>
-  <li>meta/TexSubImage: Don't pollute the buffer object namespace</li>
-  <li>meta/generate_mipmap: Don't leak the framebuffer object</li>
-  <li>glsl: Fix off-by-one error in array size check assertion</li>
-</ul>
-
-<p>Ilia Mirkin (7):</p>
-<ul>
-  <li>nvc0/ir: actually emit AFETCH on kepler</li>
-  <li>nir: fix typo in idiv lowering, causing large-udiv-udiv failures</li>
-  <li>nouveau: use the buffer usage to determine placement when no binding</li>
-  <li>nv50,nvc0: properly handle buffer storage invalidation on dsa buffer</li>
-  <li>nv50/ir: fix (un)spilling of 3-wide results</li>
-  <li>mesa: support GL_RED/GL_RG in ES2 contexts when driver support exists</li>
-  <li>nvc0/ir: start offset at texBindBase for txq, like regular texturing</li>
-</ul>
-
-<p>Jonathan Gray (1):</p>
-<ul>
-  <li>automake: fix some occurrences of hardcoded -ldl and -lpthread</li>
-</ul>
-
-<p>Leo Liu (1):</p>
-<ul>
-  <li>radeon/vce: disable Stoney VCE for 11.0</li>
-</ul>
-
-<p>Marta Lofstedt (1):</p>
-<ul>
-  <li>gles2: Update gl2ext.h to revision: 32120</li>
-</ul>
-
-<p>Oded Gabbay (1):</p>
-<ul>
-  <li>llvmpipe: disable VSX in ppc due to LLVM PPC bug</li>
-</ul>
-
-
-</div>
-</body>
-</html>
--- a/docs/relnotes/11.0.8.html
+++ b/docs/relnotes/11.0.8.html
@@ -1,200 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html lang="en">
-<head>
-  <meta http-equiv="content-type" content="text/html; charset=utf-8">
-  <title>Mesa Release Notes</title>
-  <link rel="stylesheet" type="text/css" href="../mesa.css">
-</head>
-<body>
-
-<div class="header">
-  <h1>The Mesa 3D Graphics Library</h1>
-</div>
-
-<iframe src="../contents.html"></iframe>
-<div class="content">
-
-<h1>Mesa 11.0.8 Release Notes / December 9, 2015</h1>
-
-<p>
-Mesa 11.0.8 is a bug fix release which fixes bugs found since the 11.0.7 release.
-</p>
-<p>
-Mesa 11.0.8 implements the OpenGL 4.1 API, but the version reported by
-glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
-glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
-Some drivers don't support all the features required in OpenGL 4.1.  OpenGL
-4.1 is <strong>only</strong> available if requested at context creation
-because compatibility contexts are not supported.
-</p>
-
-
-<h2>SHA256 checksums</h2>
-<pre>
-ab9db87b54d7525e4b611b82577ea9a9eae55927558df57b190059d5ecd9406f  mesa-11.0.8.tar.gz
-5696e4730518b6805d2ed5def393c4293f425a2c2c01bd5ed4bdd7ad62f7ad75  mesa-11.0.8.tar.xz
-</pre>
-
-
-<h2>New features</h2>
-<p>None</p>
-
-<h2>Bug fixes</h2>
-
-<p>This list is likely incomplete.</p>
-
-<ul>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91806">Bug 91806</a> - configure does not test whether assembler supports sse4.1</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92849">Bug 92849</a> - [IVB HSW BDW] piglit image load/store load-from-cleared-image.shader_test fails</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92909">Bug 92909</a> - Offset/alignment issue with layout std140 and vec3</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93004">Bug 93004</a> - Guild Wars 2 crash on nouveau DX11 cards</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93215">Bug 93215</a> - [Regression bisected] Ogles1conform Automatic mipmap generation test is fail</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93266">Bug 93266</a> - gl_arb_shading_language_420pack does not allow binding of image variables</li>
-
-</ul>
-
-
-<h2>Changes</h2>
-
-<p>Boyuan Zhang (1):</p>
-<ul>
-  <li>radeon/uvd: uv pitch separation for stoney</li>
-</ul>
-
-<p>Dave Airlie (9):</p>
-<ul>
-  <li>r600: do SQ flush ES ring rolling workaround</li>
-  <li>r600: SMX returns CONTEXT_DONE early workaround</li>
-  <li>r600/shader: split address get out to a function.</li>
-  <li>r600/shader: add utility functions to do single slot arithmatic</li>
-  <li>r600g: fix geom shader input indirect indexing.</li>
-  <li>r600: handle geometry dynamic input array index</li>
-  <li>radeonsi: handle doubles in lds load path.</li>
-  <li>mesa/varray: set double arrays to non-normalised.</li>
-  <li>mesa/shader: return correct attribute location for double matrix arrays</li>
-</ul>
-
-<p>Emil Velikov (8):</p>
-<ul>
-  <li>docs: add sha256 checksums for 11.0.7</li>
-  <li>cherry-ignore: don't pick a specific i965 formats patch</li>
-  <li>Revert "i965/nir: Remove unused indirect handling"</li>
-  <li>Revert "i965/state: Get rid of dword_pitch arguments to buffer functions"</li>
-  <li>Revert "i965/vec4: Use a stride of 1 and byte offsets for UBOs"</li>
-  <li>Revert "i965/fs: Use a stride of 1 and byte offsets for UBOs"</li>
-  <li>Revert "i965/vec4: Use byte offsets for UBO pulls on Sandy Bridge"</li>
-  <li>Update version to 11.0.8</li>
-</ul>
-
-<p>Francisco Jerez (1):</p>
-<ul>
-  <li>i965: Resolve color and flush for all active shader images in intel_update_state().</li>
-</ul>
-
-<p>Ian Romanick (1):</p>
-<ul>
-  <li>meta/generate_mipmap: Work-around GLES 1.x problem with GL_DRAW_FRAMEBUFFER</li>
-</ul>
-
-<p>Ilia Mirkin (17):</p>
-<ul>
-  <li>freedreno/a4xx: support lod_bias</li>
-  <li>freedreno/a4xx: fix 5_5_5_1 texture sampler format</li>
-  <li>freedreno/a4xx: point regid to "red" even for alpha-only rb formats</li>
-  <li>nvc0/ir: fold postfactor into immediate</li>
-  <li>nv50/ir: deal with loops with no breaks</li>
-  <li>nv50/ir: the mad source might not have a defining instruction</li>
-  <li>nv50/ir: fix instruction permutation logic</li>
-  <li>nv50/ir: don't forget to mark flagsDef on cvt in txb lowering</li>
-  <li>nv50/ir: fix DCE to not generate 96-bit loads</li>
-  <li>nv50/ir: avoid looking at uninitialized srcMods entries</li>
-  <li>gk110/ir: fix imul hi emission with limm arg</li>
-  <li>gk104/ir: sampler doesn't matter for txf</li>
-  <li>gk110/ir: fix imad sat/hi flag emission for immediate args</li>
-  <li>nv50/ir: fix cutoff for using r63 vs r127 when replacing zero</li>
-  <li>nv50/ir: can't have predication and immediates</li>
-  <li>glsl: assign varying locations to tess shaders when doing SSO</li>
-  <li>ttn: add TEX2 support</li>
-</ul>
-
-<p>Jason Ekstrand (5):</p>
-<ul>
-  <li>i965/vec4: Use byte offsets for UBO pulls on Sandy Bridge</li>
-  <li>i965/fs: Use a stride of 1 and byte offsets for UBOs</li>
-  <li>i965/vec4: Use a stride of 1 and byte offsets for UBOs</li>
-  <li>i965/state: Get rid of dword_pitch arguments to buffer functions</li>
-  <li>i965/nir: Remove unused indirect handling</li>
-</ul>
-
-<p>Jonathan Gray (2):</p>
-<ul>
-  <li>configure.ac: use pkg-config for libelf</li>
-  <li>configure: check for python2.7 for PYTHON2</li>
-</ul>
-
-<p>Kenneth Graunke (2):</p>
-<ul>
-  <li>i965: Fix fragment shader struct inputs.</li>
-  <li>i965: Fix scalar vertex shader struct outputs.</li>
-</ul>
-
-<p>Marek Olšák (8):</p>
-<ul>
-  <li>radeonsi: fix occlusion queries on Fiji</li>
-  <li>radeonsi: fix a hang due to uninitialized border color registers</li>
-  <li>radeonsi: fix Fiji for LLVM &lt;= 3.7</li>
-  <li>radeonsi: don't call of u_prims_for_vertices for patches and rectangles</li>
-  <li>radeonsi: apply the streamout workaround to Fiji as well</li>
-  <li>gallium/radeon: fix Hyper-Z hangs by programming PA_SC_MODE_CNTL_1 correctly</li>
-  <li>tgsi/scan: add flag colors_written</li>
-  <li>r600g: write all MRTs only if there is exactly one output (fixes a hang)</li>
-</ul>
-
-<p>Matt Turner (1):</p>
-<ul>
-  <li>glsl: Allow binding of image variables with 420pack.</li>
-</ul>
-
-<p>Neil Roberts (2):</p>
-<ul>
-  <li>i965: Add MESA_FORMAT_B8G8R8X8_SRGB to brw_format_for_mesa_format</li>
-  <li>i965: Add B8G8R8X8_SRGB to the alpha format override</li>
-</ul>
-
-<p>Oded Gabbay (1):</p>
-<ul>
-  <li>configura.ac: fix test for SSE4.1 assembler support</li>
-</ul>
-
-<p>Patrick Rudolph (2):</p>
-<ul>
-  <li>nv50,nvc0: fix use-after-free when vertex buffers are unbound</li>
-  <li>gallium/util: return correct number of bound vertex buffers</li>
-</ul>
-
-<p>Samuel Pitoiset (1):</p>
-<ul>
-  <li>nvc0: free memory allocated by the prog which reads MP perf counters</li>
-</ul>
-
-<p>Tapani Pälli (1):</p>
-<ul>
-  <li>i965: use _Shader to get fragment program when updating surface state</li>
-</ul>
-
-<p>Tom Stellard (2):</p>
-<ul>
-  <li>radeonsi: Rename si_shader::ls_rsrc{1,2} to si_shader::rsrc{1,2}</li>
-  <li>radeonsi/compute: Use the compiler's COMPUTE_PGM_RSRC* register values</li>
-</ul>
-
-
-</div>
-</body>
-</html>
--- a/docs/relnotes/11.0.9.html
+++ b/docs/relnotes/11.0.9.html
@@ -1,127 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html lang="en">
-<head>
-  <meta http-equiv="content-type" content="text/html; charset=utf-8">
-  <title>Mesa Release Notes</title>
-  <link rel="stylesheet" type="text/css" href="../mesa.css">
-</head>
-<body>
-
-<div class="header">
-  <h1>The Mesa 3D Graphics Library</h1>
-</div>
-
-<iframe src="../contents.html"></iframe>
-<div class="content">
-
-<h1>Mesa 11.0.9 Release Notes / January 22, 2016</h1>
-
-<p>
-Mesa 11.0.9 is a bug fix release which fixes bugs found since the 11.0.8 release.
-</p>
-<p>
-Mesa 11.0.9 implements the OpenGL 4.1 API, but the version reported by
-glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
-glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
-Some drivers don't support all the features required in OpenGL 4.1.  OpenGL
-4.1 is <strong>only</strong> available if requested at context creation
-because compatibility contexts are not supported.
-</p>
-
-
-<h2>SHA256 checksums</h2>
-<pre>
-1597c2e983f476f98efdd6cd58b5298896d18479ff542bdeff28b98b129ede05  mesa-11.0.9.tar.gz
-a1262ff1c66a16ccf341186cf0e57b306b8589eb2cc5ce92ffb6788ab01d2b01  mesa-11.0.9.tar.xz
-</pre>
-
-
-<h2>New features</h2>
-<p>None</p>
-
-<h2>Bug fixes</h2>
-
-<p>This list is likely incomplete.</p>
-
-<ul>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91596">Bug 91596</a> - EGL_KHR_gl_colorspace (v2) causes problem with Android-x86 GUI</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92229">Bug 92229</a> - [APITRACE] SOMA have serious graphical errors</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93257">Bug 93257</a> - [SKL, bisected] ASTC dEQP tests segfault</li>
-
-</ul>
-
-
-<h2>Changes</h2>
-
-<p>Emil Velikov (6):</p>
-<ul>
-  <li>docs: add sha256 checksums for 11.0.8</li>
-  <li>cherry-ignore: add patch already in branch</li>
-  <li>cherry-ignore: add the dri3 glx null check patch</li>
-  <li>i915: correctly parse/set the context flags</li>
-  <li>egl/dri2: expose srgb configs when KHR_gl_colorspace is available</li>
-  <li>Update version to 11.0.9</li>
-</ul>
-
-<p>Grazvydas Ignotas (1):</p>
-<ul>
-  <li>r600: fix constant buffer size programming</li>
-</ul>
-
-<p>Ilia Mirkin (5):</p>
-<ul>
-  <li>nvc0: don't forget to reset VTX_TMP bufctx slot after blit completion</li>
-  <li>nv50/ir: float(s32 &amp; 0xff) = float(u8), not s8</li>
-  <li>nv50,nvc0: make sure there's pushbuf space and that we ref the bo early</li>
-  <li>nv50,nvc0: fix crash when increasing bsp bo size for h264</li>
-  <li>nvc0: scale up inter_bo size so that it's 16M for a 4K video</li>
-</ul>
-
-<p>Kenneth Graunke (2):</p>
-<ul>
-  <li>ralloc: Fix ralloc_adopt() to the old context's last child's parent.</li>
-  <li>nvc0: Set winding order regardless of domain.</li>
-</ul>
-
-<p>Marek Olšák (1):</p>
-<ul>
-  <li>radeonsi: don't miss changes to SPI_TMPRING_SIZE</li>
-</ul>
-
-<p>Miklós Máté (1):</p>
-<ul>
-  <li>mesa: Don't leak ATIfs instructions in DeleteFragmentShader</li>
-</ul>
-
-<p>Neil Roberts (1):</p>
-<ul>
-  <li>i965: Fix crash when calling glViewport with no surface bound</li>
-</ul>
-
-<p>Nicolai Hähnle (6):</p>
-<ul>
-  <li>gallium/radeon: only dispose locally created target machine in radeon_llvm_compile</li>
-  <li>mesa/bufferobj: make _mesa_delete_buffer_object externally accessible</li>
-  <li>st/mesa: use _mesa_delete_buffer_object</li>
-  <li>radeon: use _mesa_delete_buffer_object</li>
-  <li>i915: use _mesa_delete_buffer_object</li>
-  <li>i965: use _mesa_delete_buffer_object</li>
-</ul>
-
-<p>Oded Gabbay (1):</p>
-<ul>
-  <li>llvmpipe: use vpkswss when dst is signed</li>
-</ul>
-
-<p>Rob Herring (1):</p>
-<ul>
-  <li>freedreno/ir3: fix 32-bit builds with pointer-to-int-cast error enabled</li>
-</ul>
-
-
-</div>
-</body>
-</html>
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -33,8 +33,7 @@ because compatibility contexts are not supported.

 <h2>SHA256 checksums</h2>
 <pre>
-e3bc44be4df5e4dc728dfda7b55b1aaeadfce36eca6a367b76cc07598070cb2d  mesa-11.1.0.tar.gz
-9befe03b04223eb1ede177fa8cac001e2850292c8c12a3ec9929106afad9cf1f  mesa-11.1.0.tar.xz
+TBD.
 </pre>


--- a/docs/relnotes/11.1.1.html
+++ b/docs/relnotes/11.1.1.html
@@ -1,197 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html lang="en">
-<head>
-  <meta http-equiv="content-type" content="text/html; charset=utf-8">
-  <title>Mesa Release Notes</title>
-  <link rel="stylesheet" type="text/css" href="../mesa.css">
-</head>
-<body>
-
-<div class="header">
-  <h1>The Mesa 3D Graphics Library</h1>
-</div>
-
-<iframe src="../contents.html"></iframe>
-<div class="content">
-
-<h1>Mesa 11.1.1 Release Notes / January 13, 2016</h1>
-
-<p>
-Mesa 11.1.1 is a bug fix release which fixes bugs found since the 11.1.0 release.
-</p>
-<p>
-Mesa 11.1.1 implements the OpenGL 4.1 API, but the version reported by
-glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
-glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
-Some drivers don't support all the features required in OpenGL 4.1.  OpenGL
-4.1 is <strong>only</strong> available if requested at context creation
-because compatibility contexts are not supported.
-</p>
-
-
-<h2>SHA256 checksums</h2>
-<pre>
-b15089817540ba0bffd0aad323ecf3a8ff6779568451827c7274890b4a269d58  mesa-11.1.1.tar.gz
-64db074fc514136b5fb3890111f0d50604db52f0b1e94ba3fcb0fe8668a7fd20  mesa-11.1.1.tar.xz
-</pre>
-
-
-<h2>New features</h2>
-<p>None</p>
-
-<h2>Bug fixes</h2>
-
-<p>This list is likely incomplete.</p>
-
-<ul>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91806">Bug 91806</a> - configure does not test whether assembler supports sse4.1</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92229">Bug 92229</a> - [APITRACE] SOMA have serious graphical errors</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92233">Bug 92233</a> - Unigine Heaven 4.0 silhuette run</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93004">Bug 93004</a> - Guild Wars 2 crash on nouveau DX11 cards</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93215">Bug 93215</a> - [Regression bisected] Ogles1conform Automatic mipmap generation test is fail</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93257">Bug 93257</a> - [SKL, bisected] ASTC dEQP tests segfault</li>
-
-</ul>
-
-
-<h2>Changes</h2>
-
-<p>Brian Paul (1):</p>
-<ul>
-  <li>st/mesa: check state-&gt;mesa in early return check in st_validate_state()</li>
-</ul>
-
-<p>Dave Airlie (6):</p>
-<ul>
-  <li>mesa/varray: set double arrays to non-normalised.</li>
-  <li>mesa/shader: return correct attribute location for double matrix arrays</li>
-  <li>glsl: pass stage into mark function</li>
-  <li>glsl/fp64: add helper for dual slot double detection.</li>
-  <li>glsl: fix count_attribute_slots to allow for different 64-bit handling</li>
-  <li>glsl: only update doubles inputs for vertex inputs.</li>
-</ul>
-
-<p>Emil Velikov (4):</p>
-<ul>
-  <li>docs: add sha256 checksums for 11.0.1</li>
-  <li>cherry-ignore: drop the "re-enable" DCC on Stoney</li>
-  <li>cherry-ignore: don't pick a specific i965 formats patch</li>
-  <li>Update version to 11.1.1</li>
-</ul>
-
-<p>Eric Anholt (2):</p>
-<ul>
-  <li>vc4: Warn instead of abort()ing on exec ioctl failures.</li>
-  <li>vc4: Keep sample mask writes from being reordered after TLB writes</li>
-</ul>
-
-<p>Grazvydas Ignotas (1):</p>
-<ul>
-  <li>r600: fix constant buffer size programming</li>
-</ul>
-
-<p>Ian Romanick (1):</p>
-<ul>
-  <li>meta/generate_mipmap: Work-around GLES 1.x problem with GL_DRAW_FRAMEBUFFER</li>
-</ul>
-
-<p>Ilia Mirkin (9):</p>
-<ul>
-  <li>nv50/ir: can't have predication and immediates</li>
-  <li>gk104/ir: simplify and fool-proof texbar algorithm</li>
-  <li>glsl: assign varying locations to tess shaders when doing SSO</li>
-  <li>glx/dri3: a drawable might not be bound at wait time</li>
-  <li>nvc0: don't forget to reset VTX_TMP bufctx slot after blit completion</li>
-  <li>nv50/ir: float(s32 &amp; 0xff) = float(u8), not s8</li>
-  <li>nv50,nvc0: make sure there's pushbuf space and that we ref the bo early</li>
-  <li>nv50,nvc0: fix crash when increasing bsp bo size for h264</li>
-  <li>nvc0: scale up inter_bo size so that it's 16M for a 4K video</li>
-</ul>
-
-<p>Jonathan Gray (2):</p>
-<ul>
-  <li>configure.ac: use pkg-config for libelf</li>
-  <li>configure: check for python2.7 for PYTHON2</li>
-</ul>
-
-<p>Kenneth Graunke (5):</p>
-<ul>
-  <li>ralloc: Fix ralloc_adopt() to the old context's last child's parent.</li>
-  <li>drirc: Disable ARB_blend_func_extended for Heaven 4.0/Valley 1.0.</li>
-  <li>glsl: Fix varying struct locations when varying packing is disabled.</li>
-  <li>nvc0: Set winding order regardless of domain.</li>
-  <li>nir: Add a lower_fdiv option, turn fdiv into fmul/frcp.</li>
-</ul>
-
-<p>Marek Olšák (7):</p>
-<ul>
-  <li>tgsi/scan: add flag colors_written</li>
-  <li>r600g: write all MRTs only if there is exactly one output (fixes a hang)</li>
-  <li>radeonsi: don't call of u_prims_for_vertices for patches and rectangles</li>
-  <li>radeonsi: apply the streamout workaround to Fiji as well</li>
-  <li>gallium/radeon: fix Hyper-Z hangs by programming PA_SC_MODE_CNTL_1 correctly</li>
-  <li>program: add _mesa_reserve_parameter_storage</li>
-  <li>st/mesa: fix GLSL uniform updates for glBitmap &amp; glDrawPixels (v2)</li>
-</ul>
-
-<p>Mark Janes (1):</p>
-<ul>
-  <li>Add missing platform information for KBL</li>
-</ul>
-
-<p>Miklós Máté (1):</p>
-<ul>
-  <li>mesa: Don't leak ATIfs instructions in DeleteFragmentShader</li>
-</ul>
-
-<p>Neil Roberts (3):</p>
-<ul>
-  <li>i965: Add MESA_FORMAT_B8G8R8X8_SRGB to brw_format_for_mesa_format</li>
-  <li>i965: Add B8G8R8X8_SRGB to the alpha format override</li>
-  <li>i965: Fix crash when calling glViewport with no surface bound</li>
-</ul>
-
-<p>Nicolai Hähnle (2):</p>
-<ul>
-  <li>gallium/radeon: only dispose locally created target machine in radeon_llvm_compile</li>
-  <li>gallium/radeon: fix regression in a number of driver queries</li>
-</ul>
-
-<p>Oded Gabbay (1):</p>
-<ul>
-  <li>configura.ac: fix test for SSE4.1 assembler support</li>
-</ul>
-
-<p>Patrick Rudolph (2):</p>
-<ul>
-  <li>nv50,nvc0: fix use-after-free when vertex buffers are unbound</li>
-  <li>gallium/util: return correct number of bound vertex buffers</li>
-</ul>
-
-<p>Rob Herring (1):</p>
-<ul>
-  <li>freedreno/ir3: fix 32-bit builds with pointer-to-int-cast error enabled</li>
-</ul>
-
-<p>Samuel Pitoiset (3):</p>
-<ul>
-  <li>nvc0: free memory allocated by the prog which reads MP perf counters</li>
-  <li>nv50,nvc0: free memory allocated by performance metrics</li>
-  <li>nv50: free memory allocated by the prog which reads MP perf counters</li>
-</ul>
-
-<p>Sarah Sharp (1):</p>
-<ul>
-  <li>mesa: Add KBL PCI IDs and platform information.</li>
-</ul>
-
-
-</div>
-</body>
-</html>
--- a/docs/relnotes/11.1.2.html
+++ b/docs/relnotes/11.1.2.html
@@ -1,182 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html lang="en">
-<head>
-  <meta http-equiv="content-type" content="text/html; charset=utf-8">
-  <title>Mesa Release Notes</title>
-  <link rel="stylesheet" type="text/css" href="../mesa.css">
-</head>
-<body>
-
-<div class="header">
-  <h1>The Mesa 3D Graphics Library</h1>
-</div>
-
-<iframe src="../contents.html"></iframe>
-<div class="content">
-
-<h1>Mesa 11.1.2 Release Notes / February 10, 2016</h1>
-
-<p>
-Mesa 11.1.2 is a bug fix release which fixes bugs found since the 11.1.1 release.
-</p>
-<p>
-Mesa 11.1.2 implements the OpenGL 4.1 API, but the version reported by
-glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
-glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
-Some drivers don't support all the features required in OpenGL 4.1.  OpenGL
-4.1 is <strong>only</strong> available if requested at context creation
-because compatibility contexts are not supported.
-</p>
-
-
-<h2>SHA256 checksums</h2>
-<pre>
-ba0e7462b2936b86e6684c26fbb55519f8d9ad31d13a1c1e1afbe41e73466eea  mesa-11.1.2.tar.gz
-8f72aead896b340ba0f7a4a474bfaf71681f5d675592aec1cb7ba698e319148b  mesa-11.1.2.tar.xz
-</pre>
-
-
-<h2>New features</h2>
-<p>None</p>
-
-<h2>Bug fixes</h2>
-
-<p>This list is likely incomplete.</p>
-
-<ul>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91596">Bug 91596</a> - EGL_KHR_gl_colorspace (v2) causes problem with Android-x86 GUI</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93628">Bug 93628</a> - Exception: attempt to use unavailable module DRM when building MesaGL 11.1.0 on windows</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93648">Bug 93648</a> - Random lines being rendered when playing Dolphin (geometry shaders related, w/ apitrace)</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93650">Bug 93650</a> - GL_ARB_separate_shader_objects is buggy (PCSX2)</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93717">Bug 93717</a> - Meta mipmap generation can corrupt texture state</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93722">Bug 93722</a> - Segfault when compiling shader with a subroutine that takes a parameter</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93731">Bug 93731</a> - glUniformSubroutinesuiv segfaults when subroutine uniform is bound to a specific location</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93761">Bug 93761</a> - A conditional discard in a fragment shader causes no depth writing at all</li>
-
-</ul>
-
-
-<h2>Changes</h2>
-
-<p>Ben Widawsky (1):</p>
-<ul>
-  <li>i965/bxt: Fix conservative wm thread counts.</li>
-</ul>
-
-<p>Dave Airlie (1):</p>
-<ul>
-  <li>glsl: fix subroutine lowering reusing actual parmaters</li>
-</ul>
-
-<p>Emil Velikov (6):</p>
-<ul>
-  <li>docs: add sha256 checksums for 11.1.1</li>
-  <li>cherry-ignore: drop the i965/kbl .num_slices patch</li>
-  <li>i915: correctly parse/set the context flags</li>
-  <li>targets/dri: android: use WHOLE static libraries</li>
-  <li>egl/dri2: expose srgb configs when KHR_gl_colorspace is available</li>
-  <li>Update version to 11.1.2</li>
-</ul>
-
-<p>Eric Anholt (2):</p>
-<ul>
-  <li>vc4: Don't record the seqno of a failed job submit.</li>
-  <li>vc4: Throttle outstanding rendering after submission.</li>
-</ul>
-
-<p>François Tigeot (1):</p>
-<ul>
-  <li>gallium: Add DragonFly support</li>
-</ul>
-
-<p>Grazvydas Ignotas (1):</p>
-<ul>
-  <li>r600g: don't leak driver const buffers</li>
-</ul>
-
-<p>Ian Romanick (2):</p>
-<ul>
-  <li>meta/blit: Restore GL_DEPTH_STENCIL_TEXTURE_MODE state for GL_TEXTURE_RECTANGLE</li>
-  <li>meta: Use internal functions to set texture parameters</li>
-</ul>
-
-<p>Ilia Mirkin (6):</p>
-<ul>
-  <li>st/mesa: use surface format to generate mipmaps when available</li>
-  <li>glsl: always compute proper varying type, irrespective of varying packing</li>
-  <li>nvc0: avoid crashing when there are holes in vertex array bindings</li>
-  <li>nv50,nvc0: fix buffer clearing to respect engine alignment requirements</li>
-  <li>nv50/ir: fix false global CSE on instructions with multiple defs</li>
-  <li>st/mesa: treat a write as a read for range purposes</li>
-</ul>
-
-<p>Jason Ekstrand (3):</p>
-<ul>
-  <li>i965/vec4: Use UW type for multiply into accumulator on GEN8+</li>
-  <li>i965/fs/generator: Take an actual shader stage rather than a string</li>
-  <li>i965/fs: Always set channel 2 of texture headers in some stages</li>
-</ul>
-
-<p>Jose Fonseca (2):</p>
-<ul>
-  <li>scons: Conditionally use DRM module on pipe-loader.</li>
-  <li>pipe-loader: Fix PATH_MAX define on MSVC.</li>
-</ul>
-
-<p>Karol Herbst (1):</p>
-<ul>
-  <li>nv50/ir: fix memory corruption when spilling and redoing RA</li>
-</ul>
-
-<p>Kenneth Graunke (2):</p>
-<ul>
-  <li>glsl: Make bitfield_insert/extract and bfi/bfm non-vectorizable.</li>
-  <li>glsl: Allow implicit int -&gt; uint conversions for bitwise operators (&amp;, ^, |).</li>
-</ul>
-
-<p>Leo Liu (2):</p>
-<ul>
-  <li>vl: add zig zag scan for list 4x4</li>
-  <li>st/omx/dec/h264: fix corruption when scaling matrix present flag set</li>
-</ul>
-
-<p>Marek Olšák (1):</p>
-<ul>
-  <li>radeonsi: don't miss changes to SPI_TMPRING_SIZE</li>
-</ul>
-
-<p>Nicolai Hähnle (11):</p>
-<ul>
-  <li>mesa/bufferobj: make _mesa_delete_buffer_object externally accessible</li>
-  <li>st/mesa: use _mesa_delete_buffer_object</li>
-  <li>radeon: use _mesa_delete_buffer_object</li>
-  <li>i915: use _mesa_delete_buffer_object</li>
-  <li>i965: use _mesa_delete_buffer_object</li>
-  <li>util/u_pstipple.c: copy immediates during transformation</li>
-  <li>radeonsi: extract the VGT_GS_MODE calculation into its own function</li>
-  <li>radeonsi: ensure that VGT_GS_MODE is sent when necessary</li>
-  <li>radeonsi: add DCC buffer for sampler views on new CS</li>
-  <li>st/mesa: use the correct address generation functions in st_TexSubImage blit</li>
-  <li>radeonsi: fix discard-only fragment shaders (11.1 version)</li>
-</ul>
-
-<p>Timothy Arceri (4):</p>
-<ul>
-  <li>glsl: fix segfault linking subroutine uniform with explicit location</li>
-  <li>mesa: fix segfault in glUniformSubroutinesuiv()</li>
-  <li>glsl: fix interface block error message</li>
-  <li>glsl: create helper to remove outer vertex index array used by some stages</li>
-</ul>
-
-
-</div>
-</body>
-</html>
--- a/docs/relnotes/11.2.0.html
+++ b/docs/relnotes/11.2.0.html
@@ -1,85 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html lang="en">
-<head>
-  <meta http-equiv="content-type" content="text/html; charset=utf-8">
-  <title>Mesa Release Notes</title>
-  <link rel="stylesheet" type="text/css" href="../mesa.css">
-</head>
-<body>
-
-<div class="header">
-  <h1>The Mesa 3D Graphics Library</h1>
-</div>
-
-<iframe src="../contents.html"></iframe>
-<div class="content">
-
-<h1>Mesa 11.2.0 Release Notes / TBD</h1>
-
-<p>
-Mesa 11.2.0 is a new development release.
-People who are concerned with stability and reliability should stick
-with a previous release or wait for Mesa 11.2.1.
-</p>
-<p>
-Mesa 11.2.0 implements the OpenGL 4.1 API, but the version reported by
-glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
-glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
-Some drivers don't support all the features required in OpenGL 4.1.  OpenGL
-4.1 is <strong>only</strong> available if requested at context creation
-because compatibility contexts are not supported.
-</p>
-
-
-<h2>SHA256 checksums</h2>
-<pre>
-TBD.
-</pre>
-
-
-<h2>New features</h2>
-
-<p>
-Note: some of the new features are only available with certain drivers.
-</p>
-
-<ul>
-<li>GL_ARB_arrays_of_arrays on all gallium drivers that provide GLSL 1.30</li>
-<li>GL_ARB_base_instance on freedreno/a4xx</li>
-<li>GL_ARB_compute_shader on i965</li>
-<li>GL_ARB_copy_image on r600</li>
-<li>GL_ARB_indirect_parameters on nvc0</li>
-<li>GL_ARB_query_buffer_object on nvc0</li>
-<li>GL_ARB_shader_atomic_counters on nvc0</li>
-<li>GL_ARB_shader_draw_parameters on i965, nvc0</li>
-<li>GL_ARB_shader_storage_buffer_object on nvc0</li>
-<li>GL_ARB_tessellation_shader on i965 and r600 (evergreen/cayman only)</li>
-<li>GL_ARB_texture_buffer_object_rgb32 on freedreno/a4xx</li>
-<li>GL_ARB_texture_buffer_range on freedreno/a4xx</li>
-<li>GL_ARB_texture_query_lod on freedreno/a4xx</li>
-<li>GL_ARB_texture_rgb10_a2ui on freedreno/a4xx</li>
-<li>GL_ARB_texture_view on freedreno/a4xx</li>
-<li>GL_ARB_vertex_type_10f_11f_11f_rev on freedreno/a4xx</li>
-<li>GL_KHR_texture_compression_astc_ldr on freedreno/a4xx</li>
-<li>GL_AMD_performance_monitor on radeonsi (CIK+ only)</li>
-<li>GL_ATI_meminfo on r600, radeonsi</li>
-<li>GL_NVX_gpu_memory_info on r600, radeonsi</li>
-<li>New OSMesaCreateContextAttribs() function (for creating core profile
-    contexts)</li>
-</ul>
-
-<h2>Bug fixes</h2>
-
-TBD.
-
-<h2>Changes</h2>
-
-Microsoft Visual Studio 2013 or later is now required for building
-on Windows.
-Previously, Visual Studio 2008 and later were supported.
-
-TBD.
-
-</div>
-</body>
-</html>
--- a/docs/thanks.html
+++ b/docs/thanks.html
@@ -42,7 +42,9 @@ Tungsten Graphics, Inc. have supported the ongoing development of Mesa.
 <li>The
 <a href="http://www.mesa3d.org">Mesa</a>
 website is hosted by
-<a href="http://sourceforge.net">sourceforge.net</a>.
+<a href="http://sourceforge.net">
+<img src="http://sourceforge.net/sflogo.php?group_id=3&amp;type=1"
+width="88" height="31" align="bottom" alt="Sourceforge.net" border="0"></a>
 <br>
 <br>

--- a/include/D3D9/d3d9types.h
+++ b/include/D3D9/d3d9types.h
@@ -227,7 +227,6 @@ typedef struct _RGNDATA {
 #define D3DERR_DRIVERINVALIDCALL         MAKE_D3DHRESULT(2157)
 #define D3DERR_DEVICEREMOVED             MAKE_D3DHRESULT(2160)
 #define D3DERR_DEVICEHUNG                MAKE_D3DHRESULT(2164)
-#define S_PRESENT_OCCLUDED               MAKE_D3DSTATUS(2168)

 /********************************************************
 * Bitmasks                                             *
--- a/include/GL/osmesa.h
+++ b/include/GL/osmesa.h
@@ -58,8 +58,8 @@ extern "C" {
 #include <GL/gl.h>


-#define OSMESA_MAJOR_VERSION 11
-#define OSMESA_MINOR_VERSION 2
+#define OSMESA_MAJOR_VERSION 10
+#define OSMESA_MINOR_VERSION 0
 #define OSMESA_PATCH_VERSION 0


@@ -95,18 +95,6 @@ extern "C" {
 #define OSMESA_MAX_WIDTH	0x24  /* new in 4.0 */
 #define OSMESA_MAX_HEIGHT	0x25  /* new in 4.0 */

-/*
- * Accepted in OSMesaCreateContextAttrib's attribute list.
- */
-#define OSMESA_DEPTH_BITS            0x30
-#define OSMESA_STENCIL_BITS          0x31
-#define OSMESA_ACCUM_BITS            0x32
-#define OSMESA_PROFILE               0x33
-#define OSMESA_CORE_PROFILE          0x34
-#define OSMESA_COMPAT_PROFILE        0x35
-#define OSMESA_CONTEXT_MAJOR_VERSION 0x36
-#define OSMESA_CONTEXT_MINOR_VERSION 0x37
-

 typedef struct osmesa_context *OSMesaContext;

@@ -139,35 +127,6 @@ OSMesaCreateContextExt( GLenum format, GLint depthBits, GLint stencilBits,
                        GLint accumBits, OSMesaContext sharelist);


-/*
- * Create an Off-Screen Mesa rendering context with attribute list.
- * The list is composed of (attribute, value) pairs and terminated with
- * attribute==0.  Supported Attributes:
- *
- * Attributes                    Values
- * --------------------------------------------------------------------------
- * OSMESA_FORMAT                 OSMESA_RGBA*, OSMESA_BGRA, OSMESA_ARGB, etc.
- * OSMESA_DEPTH_BITS             0*, 16, 24, 32
- * OSMESA_STENCIL_BITS           0*, 8
- * OSMESA_ACCUM_BITS             0*, 16
- * OSMESA_PROFILE                OSMESA_COMPAT_PROFILE*, OSMESA_CORE_PROFILE
- * OSMESA_CONTEXT_MAJOR_VERSION  1*, 2, 3
- * OSMESA_CONTEXT_MINOR_VERSION  0+
- *
- * Note: * = default value
- *
- * We return a context version >= what's specified by OSMESA_CONTEXT_MAJOR/
- * MINOR_VERSION for the given profile.  For example, if you request a GL 1.4
- * compat profile, you might get a GL 3.0 compat profile.
- * Otherwise, null is returned if the version/profile is not supported.
- *
- * New in Mesa 11.2
- */
-GLAPI OSMesaContext GLAPIENTRY
-OSMesaCreateContextAttribs( const int *attribList, OSMesaContext sharelist );
-
-
-
 /*
 * Destroy an Off-Screen Mesa rendering context.
 *
--- a/include/c99/inttypes.h
+++ b/include/c99/inttypes.h
@@ -0,0 +1,305 @@
+// ISO C9x  compliant inttypes.h for Microsoft Visual Studio
+// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 
+// 
+//  Copyright (c) 2006 Alexander Chemeris
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// 
+//   1. Redistributions of source code must retain the above copyright notice,
+//      this list of conditions and the following disclaimer.
+// 
+//   2. Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+// 
+//   3. The name of the author may be used to endorse or promote products
+//      derived from this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// 
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef _MSC_VER // [
+#error "Use this header only with Microsoft Visual C++ compilers!"
+#endif // _MSC_VER ]
+
+#ifndef _MSC_INTTYPES_H_ // [
+#define _MSC_INTTYPES_H_
+
+#if _MSC_VER > 1000
+#pragma once
+#endif
+
+#include "stdint.h"
+
+// 7.8 Format conversion of integer types
+
+typedef struct {
+   intmax_t quot;
+   intmax_t rem;
+} imaxdiv_t;
+
+// 7.8.1 Macros for format specifiers
+
+#if !defined(__cplusplus) || defined(__STDC_FORMAT_MACROS) // [   See footnote 185 at page 198
+
+// The fprintf macros for signed integers are:
+#define PRId8       "d"
+#define PRIi8       "i"
+#define PRIdLEAST8  "d"
+#define PRIiLEAST8  "i"
+#define PRIdFAST8   "d"
+#define PRIiFAST8   "i"
+
+#define PRId16       "hd"
+#define PRIi16       "hi"
+#define PRIdLEAST16  "hd"
+#define PRIiLEAST16  "hi"
+#define PRIdFAST16   "hd"
+#define PRIiFAST16   "hi"
+
+#define PRId32       "I32d"
+#define PRIi32       "I32i"
+#define PRIdLEAST32  "I32d"
+#define PRIiLEAST32  "I32i"
+#define PRIdFAST32   "I32d"
+#define PRIiFAST32   "I32i"
+
+#define PRId64       "I64d"
+#define PRIi64       "I64i"
+#define PRIdLEAST64  "I64d"
+#define PRIiLEAST64  "I64i"
+#define PRIdFAST64   "I64d"
+#define PRIiFAST64   "I64i"
+
+#define PRIdMAX     "I64d"
+#define PRIiMAX     "I64i"
+
+#define PRIdPTR     "Id"
+#define PRIiPTR     "Ii"
+
+// The fprintf macros for unsigned integers are:
+#define PRIo8       "o"
+#define PRIu8       "u"
+#define PRIx8       "x"
+#define PRIX8       "X"
+#define PRIoLEAST8  "o"
+#define PRIuLEAST8  "u"
+#define PRIxLEAST8  "x"
+#define PRIXLEAST8  "X"
+#define PRIoFAST8   "o"
+#define PRIuFAST8   "u"
+#define PRIxFAST8   "x"
+#define PRIXFAST8   "X"
+
+#define PRIo16       "ho"
+#define PRIu16       "hu"
+#define PRIx16       "hx"
+#define PRIX16       "hX"
+#define PRIoLEAST16  "ho"
+#define PRIuLEAST16  "hu"
+#define PRIxLEAST16  "hx"
+#define PRIXLEAST16  "hX"
+#define PRIoFAST16   "ho"
+#define PRIuFAST16   "hu"
+#define PRIxFAST16   "hx"
+#define PRIXFAST16   "hX"
+
+#define PRIo32       "I32o"
+#define PRIu32       "I32u"
+#define PRIx32       "I32x"
+#define PRIX32       "I32X"
+#define PRIoLEAST32  "I32o"
+#define PRIuLEAST32  "I32u"
+#define PRIxLEAST32  "I32x"
+#define PRIXLEAST32  "I32X"
+#define PRIoFAST32   "I32o"
+#define PRIuFAST32   "I32u"
+#define PRIxFAST32   "I32x"
+#define PRIXFAST32   "I32X"
+
+#define PRIo64       "I64o"
+#define PRIu64       "I64u"
+#define PRIx64       "I64x"
+#define PRIX64       "I64X"
+#define PRIoLEAST64  "I64o"
+#define PRIuLEAST64  "I64u"
+#define PRIxLEAST64  "I64x"
+#define PRIXLEAST64  "I64X"
+#define PRIoFAST64   "I64o"
+#define PRIuFAST64   "I64u"
+#define PRIxFAST64   "I64x"
+#define PRIXFAST64   "I64X"
+
+#define PRIoMAX     "I64o"
+#define PRIuMAX     "I64u"
+#define PRIxMAX     "I64x"
+#define PRIXMAX     "I64X"
+
+#define PRIoPTR     "Io"
+#define PRIuPTR     "Iu"
+#define PRIxPTR     "Ix"
+#define PRIXPTR     "IX"
+
+// The fscanf macros for signed integers are:
+#define SCNd8       "d"
+#define SCNi8       "i"
+#define SCNdLEAST8  "d"
+#define SCNiLEAST8  "i"
+#define SCNdFAST8   "d"
+#define SCNiFAST8   "i"
+
+#define SCNd16       "hd"
+#define SCNi16       "hi"
+#define SCNdLEAST16  "hd"
+#define SCNiLEAST16  "hi"
+#define SCNdFAST16   "hd"
+#define SCNiFAST16   "hi"
+
+#define SCNd32       "ld"
+#define SCNi32       "li"
+#define SCNdLEAST32  "ld"
+#define SCNiLEAST32  "li"
+#define SCNdFAST32   "ld"
+#define SCNiFAST32   "li"
+
+#define SCNd64       "I64d"
+#define SCNi64       "I64i"
+#define SCNdLEAST64  "I64d"
+#define SCNiLEAST64  "I64i"
+#define SCNdFAST64   "I64d"
+#define SCNiFAST64   "I64i"
+
+#define SCNdMAX     "I64d"
+#define SCNiMAX     "I64i"
+
+#ifdef _WIN64 // [
+#  define SCNdPTR     "I64d"
+#  define SCNiPTR     "I64i"
+#else  // _WIN64 ][
+#  define SCNdPTR     "ld"
+#  define SCNiPTR     "li"
+#endif  // _WIN64 ]
+
+// The fscanf macros for unsigned integers are:
+#define SCNo8       "o"
+#define SCNu8       "u"
+#define SCNx8       "x"
+#define SCNX8       "X"
+#define SCNoLEAST8  "o"
+#define SCNuLEAST8  "u"
+#define SCNxLEAST8  "x"
+#define SCNXLEAST8  "X"
+#define SCNoFAST8   "o"
+#define SCNuFAST8   "u"
+#define SCNxFAST8   "x"
+#define SCNXFAST8   "X"
+
+#define SCNo16       "ho"
+#define SCNu16       "hu"
+#define SCNx16       "hx"
+#define SCNX16       "hX"
+#define SCNoLEAST16  "ho"
+#define SCNuLEAST16  "hu"
+#define SCNxLEAST16  "hx"
+#define SCNXLEAST16  "hX"
+#define SCNoFAST16   "ho"
+#define SCNuFAST16   "hu"
+#define SCNxFAST16   "hx"
+#define SCNXFAST16   "hX"
+
+#define SCNo32       "lo"
+#define SCNu32       "lu"
+#define SCNx32       "lx"
+#define SCNX32       "lX"
+#define SCNoLEAST32  "lo"
+#define SCNuLEAST32  "lu"
+#define SCNxLEAST32  "lx"
+#define SCNXLEAST32  "lX"
+#define SCNoFAST32   "lo"
+#define SCNuFAST32   "lu"
+#define SCNxFAST32   "lx"
+#define SCNXFAST32   "lX"
+
+#define SCNo64       "I64o"
+#define SCNu64       "I64u"
+#define SCNx64       "I64x"
+#define SCNX64       "I64X"
+#define SCNoLEAST64  "I64o"
+#define SCNuLEAST64  "I64u"
+#define SCNxLEAST64  "I64x"
+#define SCNXLEAST64  "I64X"
+#define SCNoFAST64   "I64o"
+#define SCNuFAST64   "I64u"
+#define SCNxFAST64   "I64x"
+#define SCNXFAST64   "I64X"
+
+#define SCNoMAX     "I64o"
+#define SCNuMAX     "I64u"
+#define SCNxMAX     "I64x"
+#define SCNXMAX     "I64X"
+
+#ifdef _WIN64 // [
+#  define SCNoPTR     "I64o"
+#  define SCNuPTR     "I64u"
+#  define SCNxPTR     "I64x"
+#  define SCNXPTR     "I64X"
+#else  // _WIN64 ][
+#  define SCNoPTR     "lo"
+#  define SCNuPTR     "lu"
+#  define SCNxPTR     "lx"
+#  define SCNXPTR     "lX"
+#endif  // _WIN64 ]
+
+#endif // __STDC_FORMAT_MACROS ]
+
+// 7.8.2 Functions for greatest-width integer types
+
+// 7.8.2.1 The imaxabs function
+#define imaxabs _abs64
+
+// 7.8.2.2 The imaxdiv function
+
+// This is modified version of div() function from Microsoft's div.c found
+// in %MSVC.NET%\crt\src\div.c
+#ifdef STATIC_IMAXDIV // [
+static
+#else // STATIC_IMAXDIV ][
+_inline
+#endif // STATIC_IMAXDIV ]
+imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom)
+{
+   imaxdiv_t result;
+
+   result.quot = numer / denom;
+   result.rem = numer % denom;
+
+   if (numer < 0 && result.rem > 0) {
+      // did division wrong; must fix up
+      ++result.quot;
+      result.rem -= denom;
+   }
+
+   return result;
+}
+
+// 7.8.2.3 The strtoimax and strtoumax functions
+#define strtoimax _strtoi64
+#define strtoumax _strtoui64
+
+// 7.8.2.4 The wcstoimax and wcstoumax functions
+#define wcstoimax _wcstoi64
+#define wcstoumax _wcstoui64
+
+
+#endif // _MSC_INTTYPES_H_ ]
--- a/src/mesa/state_tracker/st_cb_compute.h
+++ b/src/mesa/state_tracker/st_cb_compute.h
@@ -1,6 +1,6 @@
 /**************************************************************************
 *
- * Copyright 2016 Samuel Pitoiset
+ * Copyright 2007-2010 VMware, Inc.
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@@ -11,28 +11,36 @@
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
 **************************************************************************/

-#ifndef ST_CB_COMPUTE_H
-#define ST_CB_COMPUTE_H
+#ifndef _STDBOOL_H_
+#define _STDBOOL_H_

-#include "main/compiler.h"
+#ifndef __cplusplus

-struct dd_function_table;
+#define false   0
+#define true    1
+#define bool    _Bool

-extern void
-st_init_compute_functions(struct dd_function_table *functions);
+/* For compilers that don't have the builtin _Bool type. */
+#if (defined(_MSC_VER) && _MSC_VER < 1800)
+typedef unsigned char _Bool;
+#endif

-#endif /* ST_CB_COMPUTE_H */
+#endif /* !__cplusplus */
+
+#define __bool_true_false_are_defined   1
+
+#endif /* !_STDBOOL_H_ */
--- a/include/c99/stdint.h
+++ b/include/c99/stdint.h
@@ -0,0 +1,247 @@
+// ISO C9x  compliant stdint.h for Microsoft Visual Studio
+// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 
+// 
+//  Copyright (c) 2006-2008 Alexander Chemeris
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// 
+//   1. Redistributions of source code must retain the above copyright notice,
+//      this list of conditions and the following disclaimer.
+// 
+//   2. Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+// 
+//   3. The name of the author may be used to endorse or promote products
+//      derived from this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// 
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef _MSC_VER // [
+#error "Use this header only with Microsoft Visual C++ compilers!"
+#endif // _MSC_VER ]
+
+#ifndef _MSC_STDINT_H_ // [
+#define _MSC_STDINT_H_
+
+#if _MSC_VER > 1000
+#pragma once
+#endif
+
+#include <limits.h>
+
+// For Visual Studio 6 in C++ mode and for many Visual Studio versions when
+// compiling for ARM we should wrap <wchar.h> include with 'extern "C++" {}'
+// or compiler give many errors like this:
+//   error C2733: second C linkage of overloaded function 'wmemchr' not allowed
+#ifdef __cplusplus
+extern "C" {
+#endif
+#  include <wchar.h>
+#ifdef __cplusplus
+}
+#endif
+
+// Define _W64 macros to mark types changing their size, like intptr_t.
+#ifndef _W64
+#  if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300
+#     define _W64 __w64
+#  else
+#     define _W64
+#  endif
+#endif
+
+
+// 7.18.1 Integer types
+
+// 7.18.1.1 Exact-width integer types
+
+// Visual Studio 6 and Embedded Visual C++ 4 doesn't
+// realize that, e.g. char has the same size as __int8
+// so we give up on __intX for them.
+#if (_MSC_VER < 1300)
+   typedef signed char       int8_t;
+   typedef signed short      int16_t;
+   typedef signed int        int32_t;
+   typedef unsigned char     uint8_t;
+   typedef unsigned short    uint16_t;
+   typedef unsigned int      uint32_t;
+#else
+   typedef signed __int8     int8_t;
+   typedef signed __int16    int16_t;
+   typedef signed __int32    int32_t;
+   typedef unsigned __int8   uint8_t;
+   typedef unsigned __int16  uint16_t;
+   typedef unsigned __int32  uint32_t;
+#endif
+typedef signed __int64       int64_t;
+typedef unsigned __int64     uint64_t;
+
+
+// 7.18.1.2 Minimum-width integer types
+typedef int8_t    int_least8_t;
+typedef int16_t   int_least16_t;
+typedef int32_t   int_least32_t;
+typedef int64_t   int_least64_t;
+typedef uint8_t   uint_least8_t;
+typedef uint16_t  uint_least16_t;
+typedef uint32_t  uint_least32_t;
+typedef uint64_t  uint_least64_t;
+
+// 7.18.1.3 Fastest minimum-width integer types
+typedef int8_t    int_fast8_t;
+typedef int16_t   int_fast16_t;
+typedef int32_t   int_fast32_t;
+typedef int64_t   int_fast64_t;
+typedef uint8_t   uint_fast8_t;
+typedef uint16_t  uint_fast16_t;
+typedef uint32_t  uint_fast32_t;
+typedef uint64_t  uint_fast64_t;
+
+// 7.18.1.4 Integer types capable of holding object pointers
+#ifdef _WIN64 // [
+   typedef signed __int64    intptr_t;
+   typedef unsigned __int64  uintptr_t;
+#else // _WIN64 ][
+   typedef _W64 signed int   intptr_t;
+   typedef _W64 unsigned int uintptr_t;
+#endif // _WIN64 ]
+
+// 7.18.1.5 Greatest-width integer types
+typedef int64_t   intmax_t;
+typedef uint64_t  uintmax_t;
+
+
+// 7.18.2 Limits of specified-width integer types
+
+#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [   See footnote 220 at page 257 and footnote 221 at page 259
+
+// 7.18.2.1 Limits of exact-width integer types
+#define INT8_MIN     ((int8_t)_I8_MIN)
+#define INT8_MAX     _I8_MAX
+#define INT16_MIN    ((int16_t)_I16_MIN)
+#define INT16_MAX    _I16_MAX
+#define INT32_MIN    ((int32_t)_I32_MIN)
+#define INT32_MAX    _I32_MAX
+#define INT64_MIN    ((int64_t)_I64_MIN)
+#define INT64_MAX    _I64_MAX
+#define UINT8_MAX    _UI8_MAX
+#define UINT16_MAX   _UI16_MAX
+#define UINT32_MAX   _UI32_MAX
+#define UINT64_MAX   _UI64_MAX
+
+// 7.18.2.2 Limits of minimum-width integer types
+#define INT_LEAST8_MIN    INT8_MIN
+#define INT_LEAST8_MAX    INT8_MAX
+#define INT_LEAST16_MIN   INT16_MIN
+#define INT_LEAST16_MAX   INT16_MAX
+#define INT_LEAST32_MIN   INT32_MIN
+#define INT_LEAST32_MAX   INT32_MAX
+#define INT_LEAST64_MIN   INT64_MIN
+#define INT_LEAST64_MAX   INT64_MAX
+#define UINT_LEAST8_MAX   UINT8_MAX
+#define UINT_LEAST16_MAX  UINT16_MAX
+#define UINT_LEAST32_MAX  UINT32_MAX
+#define UINT_LEAST64_MAX  UINT64_MAX
+
+// 7.18.2.3 Limits of fastest minimum-width integer types
+#define INT_FAST8_MIN    INT8_MIN
+#define INT_FAST8_MAX    INT8_MAX
+#define INT_FAST16_MIN   INT16_MIN
+#define INT_FAST16_MAX   INT16_MAX
+#define INT_FAST32_MIN   INT32_MIN
+#define INT_FAST32_MAX   INT32_MAX
+#define INT_FAST64_MIN   INT64_MIN
+#define INT_FAST64_MAX   INT64_MAX
+#define UINT_FAST8_MAX   UINT8_MAX
+#define UINT_FAST16_MAX  UINT16_MAX
+#define UINT_FAST32_MAX  UINT32_MAX
+#define UINT_FAST64_MAX  UINT64_MAX
+
+// 7.18.2.4 Limits of integer types capable of holding object pointers
+#ifdef _WIN64 // [
+#  define INTPTR_MIN   INT64_MIN
+#  define INTPTR_MAX   INT64_MAX
+#  define UINTPTR_MAX  UINT64_MAX
+#else // _WIN64 ][
+#  define INTPTR_MIN   INT32_MIN
+#  define INTPTR_MAX   INT32_MAX
+#  define UINTPTR_MAX  UINT32_MAX
+#endif // _WIN64 ]
+
+// 7.18.2.5 Limits of greatest-width integer types
+#define INTMAX_MIN   INT64_MIN
+#define INTMAX_MAX   INT64_MAX
+#define UINTMAX_MAX  UINT64_MAX
+
+// 7.18.3 Limits of other integer types
+
+#ifdef _WIN64 // [
+#  define PTRDIFF_MIN  _I64_MIN
+#  define PTRDIFF_MAX  _I64_MAX
+#else  // _WIN64 ][
+#  define PTRDIFF_MIN  _I32_MIN
+#  define PTRDIFF_MAX  _I32_MAX
+#endif  // _WIN64 ]
+
+#define SIG_ATOMIC_MIN  INT_MIN
+#define SIG_ATOMIC_MAX  INT_MAX
+
+#ifndef SIZE_MAX // [
+#  ifdef _WIN64 // [
+#     define SIZE_MAX  _UI64_MAX
+#  else // _WIN64 ][
+#     define SIZE_MAX  _UI32_MAX
+#  endif // _WIN64 ]
+#endif // SIZE_MAX ]
+
+// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h>
+#ifndef WCHAR_MIN // [
+#  define WCHAR_MIN  0
+#endif  // WCHAR_MIN ]
+#ifndef WCHAR_MAX // [
+#  define WCHAR_MAX  _UI16_MAX
+#endif  // WCHAR_MAX ]
+
+#define WINT_MIN  0
+#define WINT_MAX  _UI16_MAX
+
+#endif // __STDC_LIMIT_MACROS ]
+
+
+// 7.18.4 Limits of other integer types
+
+#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [   See footnote 224 at page 260
+
+// 7.18.4.1 Macros for minimum-width integer constants
+
+#define INT8_C(val)  val##i8
+#define INT16_C(val) val##i16
+#define INT32_C(val) val##i32
+#define INT64_C(val) val##i64
+
+#define UINT8_C(val)  val##ui8
+#define UINT16_C(val) val##ui16
+#define UINT32_C(val) val##ui32
+#define UINT64_C(val) val##ui64
+
+// 7.18.4.2 Macros for greatest-width integer constants
+#define INTMAX_C   INT64_C
+#define UINTMAX_C  UINT64_C
+
+#endif // __STDC_CONSTANT_MACROS ]
+
+
+#endif // _MSC_STDINT_H_ ]
--- a/include/c99_compat.h
+++ b/include/c99_compat.h
@@ -36,17 +36,17 @@
 */
 #if defined(_MSC_VER)

-#  if _MSC_VER < 1800
-#    error "Microsoft Visual Studio 2013 or higher required"
+#  if _MSC_VER < 1500
+#    error "Microsoft Visual Studio 2008 or higher required"
 #  endif

   /*
-    * Visual Studio will complain if we define the `inline` keyword, but
+    * Visual Studio 2012 will complain if we define the `inline` keyword, but
    * actually it only supports the keyword on C++.
    *
    * To avoid this the _ALLOW_KEYWORD_MACROS must be set.
    */
-#  if !defined(_ALLOW_KEYWORD_MACROS)
+#  if (_MSC_VER >= 1700) && !defined(_ALLOW_KEYWORD_MACROS)
 #    define _ALLOW_KEYWORD_MACROS
 #  endif

@@ -81,6 +81,8 @@
     /* Intel compiler supports inline keyword */
 #  elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100)
 #    define inline __inline
+#  elif defined(__SUNPRO_C) && defined(__C99FEATURES__)
+     /* C99 supports inline keyword */
 #  elif (__STDC_VERSION__ >= 199901L)
     /* C99 supports inline keyword */
 #  else
@@ -98,6 +100,8 @@
 #ifndef restrict
 #  if (__STDC_VERSION__ >= 199901L)
     /* C99 */
+#  elif defined(__SUNPRO_C) && defined(__C99FEATURES__)
+     /* C99 */
 #  elif defined(__GNUC__)
 #    define restrict __restrict__
 #  elif defined(_MSC_VER)
@@ -114,6 +118,8 @@
 #ifndef __func__
 #  if (__STDC_VERSION__ >= 199901L)
     /* C99 */
+#  elif defined(__SUNPRO_C) && defined(__C99FEATURES__)
+     /* C99 */
 #  elif defined(__GNUC__)
 #    define __func__ __FUNCTION__
 #  elif defined(_MSC_VER)
--- a/include/c99_math.h
+++ b/include/c99_math.h
@@ -38,16 +38,55 @@
 #include "c99_compat.h"


+#if defined(_MSC_VER)
+
 /* This is to ensure that we get M_PI, etc. definitions */
-#if defined(_MSC_VER) && !defined(_USE_MATH_DEFINES)
+#if !defined(_USE_MATH_DEFINES)
 #error _USE_MATH_DEFINES define required when building with MSVC
 #endif 

+#if _MSC_VER < 1800
+#define isfinite(x) _finite((double)(x))
+#define isnan(x) _isnan((double)(x))
+#endif /* _MSC_VER < 1800 */

-#if !defined(_MSC_VER) && \
-    __STDC_VERSION__ < 199901L && \
-    (!defined(_XOPEN_SOURCE) || _XOPEN_SOURCE < 600) && \
-    !defined(__cplusplus)
+#if _MSC_VER < 1800
+static inline double log2( double x )
+{
+   const double invln2 = 1.442695041;
+   return log( x ) * invln2;
+}
+
+static inline double
+round(double x)
+{
+   return x >= 0.0 ? floor(x + 0.5) : ceil(x - 0.5);
+}
+
+static inline float
+roundf(float x)
+{
+   return x >= 0.0f ? floorf(x + 0.5f) : ceilf(x - 0.5f);
+}
+#endif
+
+#ifndef INFINITY
+#include <float.h> // DBL_MAX
+#define INFINITY (DBL_MAX + DBL_MAX)
+#endif
+
+#ifndef NAN
+#define NAN (INFINITY - INFINITY)
+#endif
+
+#endif /* _MSC_VER */
+
+
+#if (defined(_MSC_VER) && _MSC_VER < 1800) || \
+    (!defined(_MSC_VER) && \
+     __STDC_VERSION__ < 199901L && \
+     (!defined(_XOPEN_SOURCE) || _XOPEN_SOURCE < 600) && \
+     !defined(__cplusplus))

 static inline long int
 lrint(double d)
--- a/include/d3dadapter/present.h
+++ b/include/d3dadapter/present.h
@@ -69,8 +69,6 @@ typedef struct ID3DPresentVtbl
    HRESULT (WINAPI *SetCursor)(ID3DPresent *This, void *pBitmap, POINT *pHotspot, BOOL bShow);
    HRESULT (WINAPI *SetGammaRamp)(ID3DPresent *This, const D3DGAMMARAMP *pRamp, HWND hWndOverride);
    HRESULT (WINAPI *GetWindowInfo)(ID3DPresent *This,  HWND hWnd, int *width, int *height, int *depth);
-    /* Available since version 1.1 */
-    BOOL (WINAPI *GetWindowOccluded)(ID3DPresent *This);
 } ID3DPresentVtbl;

 struct ID3DPresent
@@ -98,7 +96,6 @@ struct ID3DPresent
 #define ID3DPresent_SetCursor(p,a,b,c) (p)->lpVtbl->SetCursor(p,a,b,c)
 #define ID3DPresent_SetGammaRamp(p,a,b) (p)->lpVtbl->SetGammaRamp(p,a,b)
 #define ID3DPresent_GetWindowInfo(p,a,b,c,d) (p)->lpVtbl->GetWindowSize(p,a,b,c,d)
-#define ID3DPresent_GetWindowOccluded(p) (p)->lpVtbl->GetWindowOccluded(p)

 typedef struct ID3DPresentGroupVtbl
 {
--- a/include/pci_ids/i965_pci_ids.h
+++ b/include/pci_ids/i965_pci_ids.h
@@ -112,7 +112,6 @@ CHIPSET(0x162E, bdw_gt3, "Intel(R) Broadwell GT3")
 CHIPSET(0x1902, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)")
 CHIPSET(0x1906, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)")
 CHIPSET(0x190A, skl_gt1, "Intel(R) Skylake GT1")
-CHIPSET(0x190B, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)")
 CHIPSET(0x190E, skl_gt1, "Intel(R) Skylake GT1")
 CHIPSET(0x1912, skl_gt2, "Intel(R) HD Graphics 530 (Skylake GT2)")
 CHIPSET(0x1913, skl_gt2, "Intel(R) Skylake GT2f")
@@ -123,39 +122,16 @@ CHIPSET(0x191A, skl_gt2, "Intel(R) Skylake GT2")
 CHIPSET(0x191B, skl_gt2, "Intel(R) HD Graphics 530 (Skylake GT2)")
 CHIPSET(0x191D, skl_gt2, "Intel(R) HD Graphics P530 (Skylake GT2)")
 CHIPSET(0x191E, skl_gt2, "Intel(R) HD Graphics 515 (Skylake GT2)")
-CHIPSET(0x1921, skl_gt2, "Intel(R) HD Graphics 520 (Skylake GT2)")
-CHIPSET(0x1923, skl_gt3, "Intel(R) Skylake GT3e")
-CHIPSET(0x1926, skl_gt3, "Intel(R) Iris Graphics 540 (Skylake GT3e)")
+CHIPSET(0x1921, skl_gt2, "Intel(R) Skylake GT2")
+CHIPSET(0x1923, skl_gt3, "Intel(R) Iris Graphics 540 (Skylake GT3e)")
+CHIPSET(0x1926, skl_gt3, "Intel(R) HD Graphics 535 (Skylake GT3)")
 CHIPSET(0x1927, skl_gt3, "Intel(R) Iris Graphics 550 (Skylake GT3e)")
 CHIPSET(0x192A, skl_gt4, "Intel(R) Skylake GT4")
-CHIPSET(0x192B, skl_gt3, "Intel(R) Iris Graphics 555 (Skylake GT3e)")
-CHIPSET(0x192D, skl_gt3, "Intel(R) Iris Graphics P555 (Skylake GT3e)")
-CHIPSET(0x1932, skl_gt4, "Intel(R) Iris Pro Graphics 580 (Skylake GT4e)")
-CHIPSET(0x193A, skl_gt4, "Intel(R) Iris Pro Graphics P580 (Skylake GT4e)")
-CHIPSET(0x193B, skl_gt4, "Intel(R) Iris Pro Graphics 580 (Skylake GT4e)")
-CHIPSET(0x193D, skl_gt4, "Intel(R) Iris Pro Graphics P580 (Skylake GT4e)")
-CHIPSET(0x5902, kbl_gt1, "Intel(R) Kabylake GT1")
-CHIPSET(0x5906, kbl_gt1, "Intel(R) Kabylake GT1")
-CHIPSET(0x590A, kbl_gt1, "Intel(R) Kabylake GT1")
-CHIPSET(0x590B, kbl_gt1, "Intel(R) Kabylake GT1")
-CHIPSET(0x590E, kbl_gt1, "Intel(R) Kabylake GT1")
-CHIPSET(0x5913, kbl_gt1_5, "Intel(R) Kabylake GT1.5")
-CHIPSET(0x5915, kbl_gt1_5, "Intel(R) Kabylake GT1.5")
-CHIPSET(0x5917, kbl_gt1_5, "Intel(R) Kabylake GT1.5")
-CHIPSET(0x5912, kbl_gt2, "Intel(R) Kabylake GT2")
-CHIPSET(0x5916, kbl_gt2, "Intel(R) Kabylake GT2")
-CHIPSET(0x591A, kbl_gt2, "Intel(R) Kabylake GT2")
-CHIPSET(0x591B, kbl_gt2, "Intel(R) Kabylake GT2")
-CHIPSET(0x591D, kbl_gt2, "Intel(R) Kabylake GT2")
-CHIPSET(0x591E, kbl_gt2, "Intel(R) Kabylake GT2")
-CHIPSET(0x5921, kbl_gt2, "Intel(R) Kabylake GT2F")
-CHIPSET(0x5926, kbl_gt3, "Intel(R) Kabylake GT3")
-CHIPSET(0x592A, kbl_gt3, "Intel(R) Kabylake GT3")
-CHIPSET(0x592B, kbl_gt3, "Intel(R) Kabylake GT3")
-CHIPSET(0x5932, kbl_gt4, "Intel(R) Kabylake GT4")
-CHIPSET(0x593A, kbl_gt4, "Intel(R) Kabylake GT4")
-CHIPSET(0x593B, kbl_gt4, "Intel(R) Kabylake GT4")
-CHIPSET(0x593D, kbl_gt4, "Intel(R) Kabylake GT4")
+CHIPSET(0x192B, skl_gt3, "Intel(R) Iris Graphics (Skylake GT3fe)")
+CHIPSET(0x1932, skl_gt4, "Intel(R) Skylake GT4")
+CHIPSET(0x193A, skl_gt4, "Intel(R) Skylake GT4")
+CHIPSET(0x193B, skl_gt4, "Intel(R) Skylake GT4")
+CHIPSET(0x193D, skl_gt4, "Intel(R) Skylake GT4")
 CHIPSET(0x22B0, chv,     "Intel(R) HD Graphics (Cherryview)")
 CHIPSET(0x22B1, chv,     "Intel(R) HD Graphics (Cherryview)")
 CHIPSET(0x22B2, chv,     "Intel(R) HD Graphics (Cherryview)")
--- a/include/pci_ids/virtio_gpu_pci_ids.h
+++ b/include/pci_ids/virtio_gpu_pci_ids.h
@@ -1 +0,0 @@
-CHIPSET(0x0010, VIRTGL, VIRTGL)
--- a/scons/gallium.py
+++ b/scons/gallium.py
@@ -94,8 +94,16 @@ def msvc2013_compat(env):
            '-Werror=pointer-arith',
        ])

+def msvc2008_compat(env):
+    msvc2013_compat(env)
+    if env['gcc']:
+        env.Append(CFLAGS = [
+            '-Werror=declaration-after-statement',
+        ])
+
 def createMSVCCompatMethods(env):
    env.AddMethod(msvc2013_compat, 'MSVC2013Compat')
+    env.AddMethod(msvc2008_compat, 'MSVC2008Compat')


 def num_jobs():
@@ -292,7 +300,7 @@ def generate(env):

    # C preprocessor options
    cppdefines = []
-    cppdefines += ['__STDC_LIMIT_MACROS', '__STDC_CONSTANT_MACROS']
+    cppdefines += ['__STDC_LIMIT_MACROS']
    if env['build'] in ('debug', 'checked'):
        cppdefines += ['DEBUG']
    else:
@@ -471,12 +479,20 @@ def generate(env):
        # See also:
        # - http://msdn.microsoft.com/en-us/library/19z1t1wy.aspx
        # - cl /?
+        if 'MSVC_VERSION' not in env or distutils.version.LooseVersion(env['MSVC_VERSION']) < distutils.version.LooseVersion('12.0'):
+            # Use bundled stdbool.h and stdint.h headers for older MSVC
+            # versions.  stdint.h was introduced in MSVC 2010, but stdbool.h
+            # was only introduced in MSVC 2013.
+            top_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
+            env.Append(CPPPATH = [os.path.join(top_dir, 'include/c99')])
        if env['build'] == 'debug':
            ccflags += [
              '/Od', # disable optimizations
              '/Oi', # enable intrinsic functions
            ]
        else:
+            if 'MSVC_VERSION' in env and distutils.version.LooseVersion(env['MSVC_VERSION']) < distutils.version.LooseVersion('11.0'):
+                print 'scons: warning: Visual Studio versions prior to 2012 are known to produce incorrect code when optimizations are enabled ( https://bugs.freedesktop.org/show_bug.cgi?id=58718 )'
            ccflags += [
                '/O2', # optimize for speed
            ]
--- a/scons/llvm.py
+++ b/scons/llvm.py
@@ -106,19 +106,7 @@ def generate(env):
        ])
        env.Prepend(LIBPATH = [os.path.join(llvm_dir, 'lib')])
        # LIBS should match the output of `llvm-config --libs engine mcjit bitwriter x86asmprinter`
-        if llvm_version >= distutils.version.LooseVersion('3.7'):
-            env.Prepend(LIBS = [
-                'LLVMBitWriter', 'LLVMX86Disassembler', 'LLVMX86AsmParser',
-                'LLVMX86CodeGen', 'LLVMSelectionDAG', 'LLVMAsmPrinter',
-                'LLVMCodeGen', 'LLVMScalarOpts', 'LLVMProfileData',
-                'LLVMInstCombine', 'LLVMInstrumentation', 'LLVMTransformUtils', 'LLVMipa',
-                'LLVMAnalysis', 'LLVMX86Desc', 'LLVMMCDisassembler',
-                'LLVMX86Info', 'LLVMX86AsmPrinter', 'LLVMX86Utils',
-                'LLVMMCJIT', 'LLVMTarget', 'LLVMExecutionEngine',
-                'LLVMRuntimeDyld', 'LLVMObject', 'LLVMMCParser',
-                'LLVMBitReader', 'LLVMMC', 'LLVMCore', 'LLVMSupport'
-            ])
-        elif llvm_version >= distutils.version.LooseVersion('3.6'):
+        if llvm_version >= distutils.version.LooseVersion('3.6'):
            env.Prepend(LIBS = [
                'LLVMBitWriter', 'LLVMX86Disassembler', 'LLVMX86AsmParser',
                'LLVMX86CodeGen', 'LLVMSelectionDAG', 'LLVMAsmPrinter',
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -21,11 +21,8 @@

 SUBDIRS = . gtest util mapi/glapi/gen mapi

-# include only conditionally ?
-SUBDIRS += compiler
-
 if NEED_OPENGL_COMMON
-SUBDIRS += mesa
+SUBDIRS += glsl mesa
 endif

 SUBDIRS += loader
--- a/src/SConscript
+++ b/src/SConscript
@@ -5,7 +5,7 @@ if env['platform'] == 'windows':
    SConscript('getopt/SConscript')

 SConscript('util/SConscript')
-SConscript('compiler/SConscript')
+SConscript('glsl/SConscript')

 if env['hostonly']:
    # We are just compiling the things necessary on the host for cross
--- a/src/compiler/.gitignore
+++ b/src/compiler/.gitignore
@@ -1 +0,0 @@
-glsl_compiler
--- a/src/compiler/Android.mk
+++ b/src/compiler/Android.mk
@@ -1,67 +0,0 @@
-# Mesa 3-D graphics library
-#
-# Copyright (C) 2015 Intel Corporation
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included
-# in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-
-LOCAL_PATH := $(call my-dir)
-
-include $(LOCAL_PATH)/Makefile.sources
-
-# ---------------------------------------
-# Build libmesa_compiler
-# ---------------------------------------
-
-include $(CLEAR_VARS)
-
-LOCAL_SRC_FILES := $(LIBCOMPILER_FILES)
-
-LOCAL_C_INCLUDES := \
-	$(MESA_TOP)/src/mapi \
-	$(MESA_TOP)/src/mesa \
-	$(MESA_TOP)/src/gallium/include \
-	$(MESA_TOP)/src/gallium/auxiliary
-
-LOCAL_MODULE := libmesa_compiler
-
-include $(MESA_COMMON_MK)
-include $(BUILD_STATIC_LIBRARY)
-
-# ---------------------------------------
-# Build libmesa_nir
-# ---------------------------------------
-
-include $(CLEAR_VARS)
-
-LOCAL_SRC_FILES := \
-	$(NIR_FILES)
-
-LOCAL_C_INCLUDES := \
-	$(MESA_TOP)/src/mapi \
-	$(MESA_TOP)/src/mesa \
-	$(MESA_TOP)/src/gallium/include \
-	$(MESA_TOP)/src/gallium/auxiliary
-
-LOCAL_STATIC_LIBRARIES := libmesa_compiler
-
-LOCAL_MODULE := libmesa_nir
-
-include $(LOCAL_PATH)/Android.gen.mk
-include $(MESA_COMMON_MK)
-include $(BUILD_STATIC_LIBRARY)
--- a/src/compiler/Makefile.am
+++ b/src/compiler/Makefile.am
@@ -1,325 +0,0 @@
-#
-# Copyright © 2012 Jon TURNEY
-# Copyright (C) 2015 Intel Corporation
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice (including the next
-# paragraph) shall be included in all copies or substantial portions of the
-# Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-# IN THE SOFTWARE.
-
-include Makefile.sources
-
-AM_CPPFLAGS = \
-	-I$(top_srcdir)/include \
-	-I$(top_srcdir)/src \
-	-I$(top_srcdir)/src/mapi \
-	-I$(top_srcdir)/src/mesa/ \
-	-I$(top_builddir)/src/compiler/glsl\
-	-I$(top_srcdir)/src/compiler/glsl\
-	-I$(top_srcdir)/src/compiler/glsl/glcpp\
-	-I$(top_srcdir)/src/gallium/include \
-	-I$(top_srcdir)/src/gallium/auxiliary \
-	-I$(top_srcdir)/src/gtest/include \
-	$(DEFINES)
-
-AM_CFLAGS = \
-	$(VISIBILITY_CFLAGS) \
-	$(MSVC2013_COMPAT_CFLAGS)
-
-AM_CXXFLAGS = \
-	$(VISIBILITY_CXXFLAGS) \
-	$(MSVC2013_COMPAT_CXXFLAGS)
-
-noinst_LTLIBRARIES = libcompiler.la
-
-libcompiler_la_SOURCES = $(LIBCOMPILER_FILES)
-
-check_PROGRAMS =
-TESTS =
-BUILT_SOURCES =
-CLEANFILES =
-EXTRA_DIST = SConscript
-
-
-EXTRA_DIST += glsl/tests glsl/glcpp/tests glsl/README	\
-	glsl/TODO glsl/glcpp/README			\
-	glsl/glsl_lexer.ll				\
-	glsl/glsl_parser.yy				\
-	glsl/glcpp/glcpp-lex.l				\
-	glsl/glcpp/glcpp-parse.y			\
-	glsl/Makefile.sources				\
-	glsl/SConscript
-
-TESTS += glsl/glcpp/tests/glcpp-test			\
-	glsl/glcpp/tests/glcpp-test-cr-lf		\
-	glsl/tests/blob-test				\
-	glsl/tests/general-ir-test			\
-	glsl/tests/optimization-test			\
-	glsl/tests/sampler-types-test			\
-	glsl/tests/uniform-initializer-test
-
-TESTS_ENVIRONMENT= \
-	export PYTHON2=$(PYTHON2); \
-	export PYTHON_FLAGS=$(PYTHON_FLAGS);
-
-check_PROGRAMS +=					\
-	glsl/glcpp/glcpp				\
-	glsl/glsl_test					\
-	glsl/tests/blob-test				\
-	glsl/tests/general-ir-test			\
-	glsl/tests/sampler-types-test			\
-	glsl/tests/uniform-initializer-test
-
-noinst_PROGRAMS = glsl_compiler
-
-glsl_tests_blob_test_SOURCES =				\
-	glsl/tests/blob_test.c
-glsl_tests_blob_test_LDADD =				\
-	glsl/libglsl.la
-
-glsl_tests_general_ir_test_SOURCES =			\
-	glsl/standalone_scaffolding.cpp			\
-	glsl/tests/builtin_variable_test.cpp		\
-	glsl/tests/invalidate_locations_test.cpp	\
-	glsl/tests/general_ir_test.cpp			\
-	glsl/tests/varyings_test.cpp
-glsl_tests_general_ir_test_CFLAGS =			\
-	$(PTHREAD_CFLAGS)
-glsl_tests_general_ir_test_LDADD =			\
-	$(top_builddir)/src/gtest/libgtest.la		\
-	glsl/libglsl.la		\
-	$(top_builddir)/src/libglsl_util.la		\
-	$(PTHREAD_LIBS)
-
-glsl_tests_uniform_initializer_test_SOURCES =		\
-	glsl/tests/copy_constant_to_storage_tests.cpp	\
-	glsl/tests/set_uniform_initializer_tests.cpp	\
-	glsl/tests/uniform_initializer_utils.cpp	\
-	glsl/tests/uniform_initializer_utils.h
-glsl_tests_uniform_initializer_test_CFLAGS =		\
-	$(PTHREAD_CFLAGS)
-glsl_tests_uniform_initializer_test_LDADD =		\
-	$(top_builddir)/src/gtest/libgtest.la		\
-	glsl/libglsl.la		\
-	$(top_builddir)/src/libglsl_util.la		\
-	$(PTHREAD_LIBS)
-
-glsl_tests_sampler_types_test_SOURCES =			\
-	glsl/tests/sampler_types_test.cpp
-glsl_tests_sampler_types_test_CFLAGS =			\
-	$(PTHREAD_CFLAGS)
-glsl_tests_sampler_types_test_LDADD =			\
-	$(top_builddir)/src/gtest/libgtest.la		\
-	glsl/libglsl.la					\
-	$(top_builddir)/src/libglsl_util.la		\
-	$(PTHREAD_LIBS)
-
-noinst_LTLIBRARIES += glsl/libglsl.la glsl/libglcpp.la
-
-glsl_libglcpp_la_LIBADD =				\
-	$(top_builddir)/src/util/libmesautil.la
-glsl_libglcpp_la_SOURCES =				\
-	glsl/glcpp/glcpp-lex.c				\
-	glsl/glcpp/glcpp-parse.c			\
-	glsl/glcpp/glcpp-parse.h			\
-	$(LIBGLCPP_FILES)
-
-glsl_glcpp_glcpp_SOURCES =				\
-	glsl/glcpp/glcpp.c
-glsl_glcpp_glcpp_LDADD =				\
-	glsl/libglcpp.la	\
-	$(top_builddir)/src/libglsl_util.la		\
-	-lm
-
-glsl_libglsl_la_LIBADD = \
-	nir/libnir.la \
-	glsl/libglcpp.la
-
-glsl_libglsl_la_SOURCES =				\
-	glsl/glsl_lexer.cpp				\
-	glsl/glsl_parser.cpp				\
-	glsl/glsl_parser.h				\
-	$(LIBGLSL_FILES)
-
-
-glsl_compiler_SOURCES = \
-	$(GLSL_COMPILER_CXX_FILES)
-
-glsl_compiler_LDADD =					\
-	glsl/libglsl.la					\
-	$(top_builddir)/src/libglsl_util.la		\
-	$(top_builddir)/src/util/libmesautil.la		\
-	$(PTHREAD_LIBS)
-
-glsl_glsl_test_SOURCES = \
-	glsl/standalone_scaffolding.cpp \
-	glsl/test.cpp \
-	glsl/test_optpass.cpp \
-	glsl/test_optpass.h
-
-glsl_glsl_test_LDADD =					\
-	glsl/libglsl.la					\
-	$(top_builddir)/src/libglsl_util.la		\
-	$(PTHREAD_LIBS)
-
-# We write our own rules for yacc and lex below. We'd rather use automake,
-# but automake makes it especially difficult for a number of reasons:
-#
-#  * < automake-1.12 generates .h files from .yy and .ypp files, but
-#    >=automake-1.12 generates .hh and .hpp files respectively. There's no
-#    good way of making a project that uses C++ yacc files compatible with
-#    both versions of automake. Strong work automake developers.
-#
-#  * Since we're generating code from .l/.y files in a subdirectory (glcpp/)
-#    we'd like the resulting generated code to also go in glcpp/ for purposes
-#    of distribution. Automake gives no way to do this.
-#
-#  * Since we're building multiple yacc parsers into one library (and via one
-#    Makefile) we have to use per-target YFLAGS. Using per-target YFLAGS causes
-#    automake to name the resulting generated code as <library-name>_filename.c.
-#    Frankly, that's ugly and we don't want a libglcpp_glcpp_parser.h file.
-
-# In order to make build output print "LEX" and "YACC", we reproduce the
-# automake variables below.
-
-AM_V_LEX = $(am__v_LEX_$(V))
-am__v_LEX_ = $(am__v_LEX_$(AM_DEFAULT_VERBOSITY))
-am__v_LEX_0 = @echo "  LEX     " $@;
-am__v_LEX_1 =
-
-AM_V_YACC = $(am__v_YACC_$(V))
-am__v_YACC_ = $(am__v_YACC_$(AM_DEFAULT_VERBOSITY))
-am__v_YACC_0 = @echo "  YACC    " $@;
-am__v_YACC_1 =
-
-MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
-YACC_GEN = $(AM_V_YACC)$(YACC) $(YFLAGS)
-LEX_GEN = $(AM_V_LEX)$(LEX) $(LFLAGS)
-
-glsl/glsl_parser.cpp glsl/glsl_parser.h: glsl/glsl_parser.yy
-	$(MKDIR_GEN)
-	$(YACC_GEN) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl/glsl_parser.h $(srcdir)/glsl/glsl_parser.yy
-
-glsl/glsl_lexer.cpp: glsl/glsl_lexer.ll
-	$(MKDIR_GEN)
-	$(LEX_GEN) -o $@ $(srcdir)/glsl/glsl_lexer.ll
-
-glsl/glcpp/glcpp-parse.c glsl/glcpp/glcpp-parse.h: glsl/glcpp/glcpp-parse.y
-	$(MKDIR_GEN)
-	$(YACC_GEN) -o $@ -p "glcpp_parser_" --defines=$(builddir)/glsl/glcpp/glcpp-parse.h $(srcdir)/glsl/glcpp/glcpp-parse.y
-
-glsl/glcpp/glcpp-lex.c: glsl/glcpp/glcpp-lex.l
-	$(MKDIR_GEN)
-	$(LEX_GEN) -o $@ $(srcdir)/glsl/glcpp/glcpp-lex.l
-
-# Only the parsers (specifically the header files generated at the same time)
-# need to be in BUILT_SOURCES. Though if we list the parser headers YACC is
-# called for the .c/.cpp file and the .h files. By listing the .c/.cpp files
-# YACC is only executed once for each parser. The rest of the generated code
-# will be created at the appropriate times according to standard automake
-# dependency rules.
-BUILT_SOURCES +=					\
-	glsl/glsl_parser.cpp				\
-	glsl/glsl_lexer.cpp				\
-	glsl/glcpp/glcpp-parse.c			\
-	glsl/glcpp/glcpp-lex.c
-CLEANFILES +=						\
-	glsl/glcpp/glcpp-parse.h			\
-	glsl/glsl_parser.h				\
-	glsl/glsl_parser.cpp				\
-	glsl/glsl_lexer.cpp				\
-	glsl/glcpp/glcpp-parse.c			\
-	glsl/glcpp/glcpp-lex.c
-
-clean-local:
-	$(RM) -r subtest-cr subtest-cr-lf subtest-lf subtest-lf-cr
-
-dist-hook:
-	$(RM) glsl/glcpp/tests/*.out
-	$(RM) glsl/glcpp/tests/subtest*/*.out
-
-noinst_LTLIBRARIES += nir/libnir.la
-
-nir_libnir_la_CPPFLAGS = \
-	$(AM_CPPFLAGS) \
-	-I$(top_builddir)/src/compiler/nir \
-	-I$(top_srcdir)/src/compiler/nir
-
-nir_libnir_la_LIBADD = \
-	libcompiler.la
-
-nir_libnir_la_SOURCES =					\
-	$(NIR_FILES)					\
-	$(NIR_GENERATED_FILES)
-
-PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
-
-nir/nir_builder_opcodes.h: nir/nir_opcodes.py nir/nir_builder_opcodes_h.py
-	$(MKDIR_GEN)
-	$(PYTHON_GEN) $(srcdir)/nir/nir_builder_opcodes_h.py > $@ || ($(RM) $@; false)
-
-nir/nir_constant_expressions.c: nir/nir_opcodes.py nir/nir_constant_expressions.py
-	$(MKDIR_GEN)
-	$(PYTHON_GEN) $(srcdir)/nir/nir_constant_expressions.py > $@ || ($(RM) $@; false)
-
-nir/nir_opcodes.h: nir/nir_opcodes.py nir/nir_opcodes_h.py
-	$(MKDIR_GEN)
-	$(PYTHON_GEN) $(srcdir)/nir/nir_opcodes_h.py > $@ || ($(RM) $@; false)
-
-nir/nir_opcodes.c: nir/nir_opcodes.py nir/nir_opcodes_c.py
-	$(MKDIR_GEN)
-	$(PYTHON_GEN) $(srcdir)/nir/nir_opcodes_c.py > $@ || ($(RM) $@; false)
-
-nir/nir_opt_algebraic.c: nir/nir_opt_algebraic.py nir/nir_algebraic.py
-	$(MKDIR_GEN)
-	$(PYTHON_GEN) $(srcdir)/nir/nir_opt_algebraic.py > $@ || ($(RM) $@; false)
-
-
-check_PROGRAMS += nir/tests/control_flow_tests
-
-nir_tests_control_flow_tests_CPPFLAGS = \
-	$(AM_CPPFLAGS) \
-	-I$(top_builddir)/src/compiler/nir \
-	-I$(top_srcdir)/src/compiler/nir
-
-nir_tests_control_flow_tests_SOURCES =			\
-	nir/tests/control_flow_tests.cpp
-nir_tests_control_flow_tests_CFLAGS =			\
-	$(PTHREAD_CFLAGS)
-nir_tests_control_flow_tests_LDADD =			\
-	$(top_builddir)/src/gtest/libgtest.la		\
-	nir/libnir.la	\
-	$(top_builddir)/src/util/libmesautil.la		\
-	$(PTHREAD_LIBS)
-
-
-TESTS += nir/tests/control_flow_tests
-
-
-BUILT_SOURCES += $(NIR_GENERATED_FILES)
-CLEANFILES += $(NIR_GENERATED_FILES)
-
-EXTRA_DIST += \
-	nir/nir_algebraic.py				\
-	nir/nir_builder_opcodes_h.py			\
-	nir/nir_constant_expressions.py			\
-	nir/nir_opcodes.py				\
-	nir/nir_opcodes_c.py				\
-	nir/nir_opcodes_h.py				\
-	nir/nir_opt_algebraic.py			\
-	nir/tests					\
-	nir/Makefile.sources
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -1,226 +0,0 @@
-LIBCOMPILER_FILES = \
-	builtin_type_macros.h \
-	glsl_types.cpp \
-	glsl_types.h \
-	nir_types.cpp \
-	nir_types.h \
-	shader_enums.c \
-	shader_enums.h
-
-# libglsl
-
-LIBGLSL_FILES = \
-	glsl/ast.h \
-	glsl/ast_array_index.cpp \
-	glsl/ast_expr.cpp \
-	glsl/ast_function.cpp \
-	glsl/ast_to_hir.cpp \
-	glsl/ast_type.cpp \
-	glsl/blob.c \
-	glsl/blob.h \
-	glsl/builtin_functions.cpp \
-	glsl/builtin_types.cpp \
-	glsl/builtin_variables.cpp \
-	glsl/glsl_parser_extras.cpp \
-	glsl/glsl_parser_extras.h \
-	glsl/glsl_symbol_table.cpp \
-	glsl/glsl_symbol_table.h \
-	glsl/hir_field_selection.cpp \
-	glsl/ir_basic_block.cpp \
-	glsl/ir_basic_block.h \
-	glsl/ir_builder.cpp \
-	glsl/ir_builder.h \
-	glsl/ir_clone.cpp \
-	glsl/ir_constant_expression.cpp \
-	glsl/ir.cpp \
-	glsl/ir.h \
-	glsl/ir_equals.cpp \
-	glsl/ir_expression_flattening.cpp \
-	glsl/ir_expression_flattening.h \
-	glsl/ir_function_can_inline.cpp \
-	glsl/ir_function_detect_recursion.cpp \
-	glsl/ir_function_inlining.h \
-	glsl/ir_function.cpp \
-	glsl/ir_hierarchical_visitor.cpp \
-	glsl/ir_hierarchical_visitor.h \
-	glsl/ir_hv_accept.cpp \
-	glsl/ir_import_prototypes.cpp \
-	glsl/ir_optimization.h \
-	glsl/ir_print_visitor.cpp \
-	glsl/ir_print_visitor.h \
-	glsl/ir_reader.cpp \
-	glsl/ir_reader.h \
-	glsl/ir_rvalue_visitor.cpp \
-	glsl/ir_rvalue_visitor.h \
-	glsl/ir_set_program_inouts.cpp \
-	glsl/ir_uniform.h \
-	glsl/ir_validate.cpp \
-	glsl/ir_variable_refcount.cpp \
-	glsl/ir_variable_refcount.h \
-	glsl/ir_visitor.h \
-	glsl/linker.cpp \
-	glsl/linker.h \
-	glsl/link_atomics.cpp \
-	glsl/link_functions.cpp \
-	glsl/link_interface_blocks.cpp \
-	glsl/link_uniforms.cpp \
-	glsl/link_uniform_initializers.cpp \
-	glsl/link_uniform_block_active_visitor.cpp \
-	glsl/link_uniform_block_active_visitor.h \
-	glsl/link_uniform_blocks.cpp \
-	glsl/link_varyings.cpp \
-	glsl/link_varyings.h \
-	glsl/list.h \
-	glsl/loop_analysis.cpp \
-	glsl/loop_analysis.h \
-	glsl/loop_controls.cpp \
-	glsl/loop_unroll.cpp \
-	glsl/lower_buffer_access.cpp \
-	glsl/lower_buffer_access.h \
-	glsl/lower_clip_distance.cpp \
-	glsl/lower_const_arrays_to_uniforms.cpp \
-	glsl/lower_discard.cpp \
-	glsl/lower_discard_flow.cpp \
-	glsl/lower_if_to_cond_assign.cpp \
-	glsl/lower_instructions.cpp \
-	glsl/lower_jumps.cpp \
-	glsl/lower_mat_op_to_vec.cpp \
-	glsl/lower_noise.cpp \
-	glsl/lower_offset_array.cpp \
-	glsl/lower_packed_varyings.cpp \
-	glsl/lower_named_interface_blocks.cpp \
-	glsl/lower_packing_builtins.cpp \
-	glsl/lower_subroutine.cpp \
-	glsl/lower_tess_level.cpp \
-	glsl/lower_texture_projection.cpp \
-	glsl/lower_variable_index_to_cond_assign.cpp \
-	glsl/lower_vec_index_to_cond_assign.cpp \
-	glsl/lower_vec_index_to_swizzle.cpp \
-	glsl/lower_vector.cpp \
-	glsl/lower_vector_derefs.cpp \
-	glsl/lower_vector_insert.cpp \
-	glsl/lower_vertex_id.cpp \
-	glsl/lower_output_reads.cpp \
-	glsl/lower_shared_reference.cpp \
-	glsl/lower_ubo_reference.cpp \
-	glsl/opt_algebraic.cpp \
-	glsl/opt_array_splitting.cpp \
-	glsl/opt_conditional_discard.cpp \
-	glsl/opt_constant_folding.cpp \
-	glsl/opt_constant_propagation.cpp \
-	glsl/opt_constant_variable.cpp \
-	glsl/opt_copy_propagation.cpp \
-	glsl/opt_copy_propagation_elements.cpp \
-	glsl/opt_dead_builtin_variables.cpp \
-	glsl/opt_dead_builtin_varyings.cpp \
-	glsl/opt_dead_code.cpp \
-	glsl/opt_dead_code_local.cpp \
-	glsl/opt_dead_functions.cpp \
-	glsl/opt_flatten_nested_if_blocks.cpp \
-	glsl/opt_flip_matrices.cpp \
-	glsl/opt_function_inlining.cpp \
-	glsl/opt_if_simplification.cpp \
-	glsl/opt_minmax.cpp \
-	glsl/opt_noop_swizzle.cpp \
-	glsl/opt_rebalance_tree.cpp \
-	glsl/opt_redundant_jumps.cpp \
-	glsl/opt_structure_splitting.cpp \
-	glsl/opt_swizzle_swizzle.cpp \
-	glsl/opt_tree_grafting.cpp \
-	glsl/opt_vectorize.cpp \
-	glsl/program.h \
-	glsl/s_expression.cpp \
-	glsl/s_expression.h
-
-# glsl_compiler
-
-GLSL_COMPILER_CXX_FILES = \
-	glsl/standalone_scaffolding.cpp \
-	glsl/standalone_scaffolding.h \
-	glsl/main.cpp
-
-# libglsl generated sources
-LIBGLSL_GENERATED_CXX_FILES = \
-	glsl/glsl_lexer.cpp \
-	glsl/glsl_parser.cpp
-
-# libglcpp
-
-LIBGLCPP_FILES = \
-	glsl/glcpp/glcpp.h \
-	glsl/glcpp/pp.c
-
-LIBGLCPP_GENERATED_FILES = \
-	glsl/glcpp/glcpp-lex.c \
-	glsl/glcpp/glcpp-parse.c
-
-NIR_GENERATED_FILES = \
-	nir/nir_builder_opcodes.h \
-	nir/nir_constant_expressions.c \
-	nir/nir_opcodes.c \
-	nir/nir_opcodes.h \
-	nir/nir_opt_algebraic.c
-
-NIR_FILES = \
-	nir/glsl_to_nir.cpp \
-	nir/glsl_to_nir.h \
-	nir/nir.c \
-	nir/nir.h \
-	nir/nir_array.h \
-	nir/nir_builder.h \
-	nir/nir_clone.c \
-	nir/nir_constant_expressions.h \
-	nir/nir_control_flow.c \
-	nir/nir_control_flow.h \
-	nir/nir_control_flow_private.h \
-	nir/nir_dominance.c \
-	nir/nir_from_ssa.c \
-	nir/nir_gs_count_vertices.c \
-	nir/nir_intrinsics.c \
-	nir/nir_intrinsics.h \
-	nir/nir_instr_set.c \
-	nir/nir_instr_set.h \
-	nir/nir_liveness.c \
-	nir/nir_lower_alu_to_scalar.c \
-	nir/nir_lower_atomics.c \
-	nir/nir_lower_clip.c \
-	nir/nir_lower_global_vars_to_local.c \
-	nir/nir_lower_gs_intrinsics.c \
-	nir/nir_lower_load_const_to_scalar.c \
-	nir/nir_lower_locals_to_regs.c \
-	nir/nir_lower_idiv.c \
-	nir/nir_lower_io.c \
-	nir/nir_lower_outputs_to_temporaries.c \
-	nir/nir_lower_phis_to_scalar.c \
-	nir/nir_lower_samplers.c \
-	nir/nir_lower_system_values.c \
-	nir/nir_lower_tex.c \
-	nir/nir_lower_to_source_mods.c \
-	nir/nir_lower_two_sided_color.c \
-	nir/nir_lower_vars_to_ssa.c \
-	nir/nir_lower_var_copies.c \
-	nir/nir_lower_vec_to_movs.c \
-	nir/nir_metadata.c \
-	nir/nir_move_vec_src_uses_to_dest.c \
-	nir/nir_normalize_cubemap_coords.c \
-	nir/nir_opt_constant_folding.c \
-	nir/nir_opt_copy_propagate.c \
-	nir/nir_opt_cse.c \
-	nir/nir_opt_dce.c \
-	nir/nir_opt_dead_cf.c \
-	nir/nir_opt_gcm.c \
-	nir/nir_opt_global_to_local.c \
-	nir/nir_opt_peephole_select.c \
-	nir/nir_opt_remove_phis.c \
-	nir/nir_opt_undef.c \
-	nir/nir_print.c \
-	nir/nir_remove_dead_variables.c \
-	nir/nir_search.c \
-	nir/nir_search.h \
-	nir/nir_split_var_copies.c \
-	nir/nir_sweep.c \
-	nir/nir_to_ssa.c \
-	nir/nir_validate.c \
-	nir/nir_vla.h \
-	nir/nir_worklist.c \
-	nir/nir_worklist.h
--- a/src/compiler/SConscript
+++ b/src/compiler/SConscript
@@ -1,24 +0,0 @@
-Import('*')
-
-env = env.Clone()
-
-env.MSVC2013Compat()
-
-env.Prepend(CPPPATH = [
-    '#include',
-    '#src',
-    '#src/mapi',
-    '#src/mesa',
-    '#src/gallium/include',
-    '#src/gallium/auxiliary',
-])
-
-sources = env.ParseSourceList('Makefile.sources', 'LIBCOMPILER_FILES')
-
-compiler = env.ConvenienceLibrary(
-    target = 'compiler',
-    source = sources
-)
-Export('compiler')
-
-SConscript('glsl/SConscript')
--- a/src/compiler/glsl/Android.gen.mk
+++ b/src/compiler/glsl/Android.gen.mk
@@ -1,76 +0,0 @@
-# Mesa 3-D graphics library
-#
-# Copyright (C) 2010-2011 Chia-I Wu <olvaffe@gmail.com>
-# Copyright (C) 2010-2011 LunarG Inc.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included
-# in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-
-# included by glsl Android.mk for source generation
-
-ifeq ($(LOCAL_MODULE_CLASS),)
-LOCAL_MODULE_CLASS := STATIC_LIBRARIES
-endif
-
-intermediates := $(call local-generated-sources-dir)
-
-LOCAL_SRC_FILES := $(LOCAL_SRC_FILES)
-
-LOCAL_C_INCLUDES += \
-	$(intermediates)/glcpp \
-	$(MESA_TOP)/src/glsl/glcpp \
-
-LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/, \
-	$(LIBGLCPP_GENERATED_FILES) \
-	$(LIBGLSL_GENERATED_CXX_FILES))
-
-define local-l-or-ll-to-c-or-cpp
-	@mkdir -p $(dir $@)
-	@echo "Mesa Lex: $(PRIVATE_MODULE) <= $<"
-	$(hide) $(LEX) --nounistd -o$@ $<
-endef
-
-define glsl_local-y-to-c-and-h
-	@mkdir -p $(dir $@)
-	@echo "Mesa Yacc: $(PRIVATE_MODULE) <= $<"
-	$(hide) $(YACC) -o $@ -p "glcpp_parser_" $<
-endef
-
-define local-yy-to-cpp-and-h
-	@mkdir -p $(dir $@)
-	@echo "Mesa Yacc: $(PRIVATE_MODULE) <= $<"
-	$(hide) $(YACC) -p "_mesa_glsl_" -o $@ $<
-	touch $(@:$1=$(YACC_HEADER_SUFFIX))
-	echo '#ifndef '$(@F:$1=_h) > $(@:$1=.h)
-	echo '#define '$(@F:$1=_h) >> $(@:$1=.h)
-	cat $(@:$1=$(YACC_HEADER_SUFFIX)) >> $(@:$1=.h)
-	echo '#endif' >> $(@:$1=.h)
-	rm -f $(@:$1=$(YACC_HEADER_SUFFIX))
-endef
-
-$(intermediates)/glsl_lexer.cpp: $(LOCAL_PATH)/glsl_lexer.ll
-	$(call local-l-or-ll-to-c-or-cpp)
-
-$(intermediates)/glsl_parser.cpp: $(LOCAL_PATH)/glsl_parser.yy
-	$(call local-yy-to-cpp-and-h,.cpp)
-
-$(intermediates)/glcpp/glcpp-lex.c: $(LOCAL_PATH)/glcpp/glcpp-lex.l
-	$(call local-l-or-ll-to-c-or-cpp)
-
-$(intermediates)/glcpp/glcpp-parse.c: $(LOCAL_PATH)/glcpp/glcpp-parse.y
-	$(call glsl_local-y-to-c-and-h)
--- a/src/compiler/glsl/lower_buffer_access.cpp
+++ b/src/compiler/glsl/lower_buffer_access.cpp
@@ -1,494 +0,0 @@
-/*
- * Copyright (c) 2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file lower_buffer_access.cpp
- *
- * Helper for IR lowering pass to replace dereferences of buffer object based
- * shader variables with intrinsic function calls.
- *
- * This helper is used by lowering passes for UBOs, SSBOs and compute shader
- * shared variables.
- */
-
-#include "lower_buffer_access.h"
-#include "ir_builder.h"
-#include "main/macros.h"
-#include "util/list.h"
-#include "glsl_parser_extras.h"
-
-using namespace ir_builder;
-
-namespace lower_buffer_access {
-
-static inline int
-writemask_for_size(unsigned n)
-{
-   return ((1 << n) - 1);
-}
-
-/**
- * Takes a deref and recursively calls itself to break the deref down to the
- * point that the reads or writes generated are contiguous scalars or vectors.
- */
-void
-lower_buffer_access::emit_access(void *mem_ctx,
-                                 bool is_write,
-                                 ir_dereference *deref,
-                                 ir_variable *base_offset,
-                                 unsigned int deref_offset,
-                                 bool row_major,
-                                 int matrix_columns,
-                                 unsigned int packing,
-                                 unsigned int write_mask)
-{
-   if (deref->type->is_record()) {
-      unsigned int field_offset = 0;
-
-      for (unsigned i = 0; i < deref->type->length; i++) {
-         const struct glsl_struct_field *field =
-            &deref->type->fields.structure[i];
-         ir_dereference *field_deref =
-            new(mem_ctx) ir_dereference_record(deref->clone(mem_ctx, NULL),
-                                               field->name);
-
-         field_offset =
-            glsl_align(field_offset,
-                       field->type->std140_base_alignment(row_major));
-
-         emit_access(mem_ctx, is_write, field_deref, base_offset,
-                     deref_offset + field_offset,
-                     row_major, 1, packing,
-                     writemask_for_size(field_deref->type->vector_elements));
-
-         field_offset += field->type->std140_size(row_major);
-      }
-      return;
-   }
-
-   if (deref->type->is_array()) {
-      unsigned array_stride = packing == GLSL_INTERFACE_PACKING_STD430 ?
-         deref->type->fields.array->std430_array_stride(row_major) :
-         glsl_align(deref->type->fields.array->std140_size(row_major), 16);
-
-      for (unsigned i = 0; i < deref->type->length; i++) {
-         ir_constant *element = new(mem_ctx) ir_constant(i);
-         ir_dereference *element_deref =
-            new(mem_ctx) ir_dereference_array(deref->clone(mem_ctx, NULL),
-                                              element);
-         emit_access(mem_ctx, is_write, element_deref, base_offset,
-                     deref_offset + i * array_stride,
-                     row_major, 1, packing,
-                     writemask_for_size(element_deref->type->vector_elements));
-      }
-      return;
-   }
-
-   if (deref->type->is_matrix()) {
-      for (unsigned i = 0; i < deref->type->matrix_columns; i++) {
-         ir_constant *col = new(mem_ctx) ir_constant(i);
-         ir_dereference *col_deref =
-            new(mem_ctx) ir_dereference_array(deref->clone(mem_ctx, NULL), col);
-
-         if (row_major) {
-            /* For a row-major matrix, the next column starts at the next
-             * element.
-             */
-            int size_mul = deref->type->is_double() ? 8 : 4;
-            emit_access(mem_ctx, is_write, col_deref, base_offset,
-                        deref_offset + i * size_mul,
-                        row_major, deref->type->matrix_columns, packing,
-                        writemask_for_size(col_deref->type->vector_elements));
-         } else {
-            int size_mul;
-
-            /* std430 doesn't round up vec2 size to a vec4 size */
-            if (packing == GLSL_INTERFACE_PACKING_STD430 &&
-                deref->type->vector_elements == 2 &&
-                !deref->type->is_double()) {
-               size_mul = 8;
-            } else {
-               /* std140 always rounds the stride of arrays (and matrices) to a
-                * vec4, so matrices are always 16 between columns/rows. With
-                * doubles, they will be 32 apart when there are more than 2 rows.
-                *
-                * For both std140 and std430, if the member is a
-                * three-'component vector with components consuming N basic
-                * machine units, the base alignment is 4N. For vec4, base
-                * alignment is 4N.
-                */
-               size_mul = (deref->type->is_double() &&
-                           deref->type->vector_elements > 2) ? 32 : 16;
-            }
-
-            emit_access(mem_ctx, is_write, col_deref, base_offset,
-                        deref_offset + i * size_mul,
-                        row_major, deref->type->matrix_columns, packing,
-                        writemask_for_size(col_deref->type->vector_elements));
-         }
-      }
-      return;
-   }
-
-   assert(deref->type->is_scalar() || deref->type->is_vector());
-
-   if (!row_major) {
-      ir_rvalue *offset =
-         add(base_offset, new(mem_ctx) ir_constant(deref_offset));
-      unsigned mask =
-         is_write ? write_mask : (1 << deref->type->vector_elements) - 1;
-      insert_buffer_access(mem_ctx, deref, deref->type, offset, mask, -1);
-   } else {
-      unsigned N = deref->type->is_double() ? 8 : 4;
-
-      /* We're dereffing a column out of a row-major matrix, so we
-       * gather the vector from each stored row.
-      */
-      assert(deref->type->base_type == GLSL_TYPE_FLOAT ||
-             deref->type->base_type == GLSL_TYPE_DOUBLE);
-      /* Matrices, row_major or not, are stored as if they were
-       * arrays of vectors of the appropriate size in std140.
-       * Arrays have their strides rounded up to a vec4, so the
-       * matrix stride is always 16. However a double matrix may either be 16
-       * or 32 depending on the number of columns.
-       */
-      assert(matrix_columns <= 4);
-      unsigned matrix_stride = 0;
-      /* Matrix stride for std430 mat2xY matrices are not rounded up to
-       * vec4 size. From OpenGL 4.3 spec, section 7.6.2.2 "Standard Uniform
-       * Block Layout":
-       *
-       * "2. If the member is a two- or four-component vector with components
-       * consuming N basic machine units, the base alignment is 2N or 4N,
-       * respectively." [...]
-       * "4. If the member is an array of scalars or vectors, the base alignment
-       * and array stride are set to match the base alignment of a single array
-       * element, according to rules (1), (2), and (3), and rounded up to the
-       * base alignment of a vec4." [...]
-       * "7. If the member is a row-major matrix with C columns and R rows, the
-       * matrix is stored identically to an array of R row vectors with C
-       * components each, according to rule (4)." [...]
-       * "When using the std430 storage layout, shader storage blocks will be
-       * laid out in buffer storage identically to uniform and shader storage
-       * blocks using the std140 layout, except that the base alignment and
-       * stride of arrays of scalars and vectors in rule 4 and of structures in
-       * rule 9 are not rounded up a multiple of the base alignment of a vec4."
-       */
-      if (packing == GLSL_INTERFACE_PACKING_STD430 && matrix_columns == 2)
-         matrix_stride = 2 * N;
-      else
-         matrix_stride = glsl_align(matrix_columns * N, 16);
-
-      const glsl_type *deref_type = deref->type->base_type == GLSL_TYPE_FLOAT ?
-         glsl_type::float_type : glsl_type::double_type;
-
-      for (unsigned i = 0; i < deref->type->vector_elements; i++) {
-         ir_rvalue *chan_offset =
-            add(base_offset,
-                new(mem_ctx) ir_constant(deref_offset + i * matrix_stride));
-         if (!is_write || ((1U << i) & write_mask))
-            insert_buffer_access(mem_ctx, deref, deref_type, chan_offset,
-                                 (1U << i), i);
-      }
-   }
-}
-
-/**
- * Determine if a thing being dereferenced is row-major
- *
- * There is some trickery here.
- *
- * If the thing being dereferenced is a member of uniform block \b without an
- * instance name, then the name of the \c ir_variable is the field name of an
- * interface type.  If this field is row-major, then the thing referenced is
- * row-major.
- *
- * If the thing being dereferenced is a member of uniform block \b with an
- * instance name, then the last dereference in the tree will be an
- * \c ir_dereference_record.  If that record field is row-major, then the
- * thing referenced is row-major.
- */
-bool
-lower_buffer_access::is_dereferenced_thing_row_major(const ir_rvalue *deref)
-{
-   bool matrix = false;
-   const ir_rvalue *ir = deref;
-
-   while (true) {
-      matrix = matrix || ir->type->without_array()->is_matrix();
-
-      switch (ir->ir_type) {
-      case ir_type_dereference_array: {
-         const ir_dereference_array *const array_deref =
-            (const ir_dereference_array *) ir;
-
-         ir = array_deref->array;
-         break;
-      }
-
-      case ir_type_dereference_record: {
-         const ir_dereference_record *const record_deref =
-            (const ir_dereference_record *) ir;
-
-         ir = record_deref->record;
-
-         const int idx = ir->type->field_index(record_deref->field);
-         assert(idx >= 0);
-
-         const enum glsl_matrix_layout matrix_layout =
-            glsl_matrix_layout(ir->type->fields.structure[idx].matrix_layout);
-
-         switch (matrix_layout) {
-         case GLSL_MATRIX_LAYOUT_INHERITED:
-            break;
-         case GLSL_MATRIX_LAYOUT_COLUMN_MAJOR:
-            return false;
-         case GLSL_MATRIX_LAYOUT_ROW_MAJOR:
-            return matrix || deref->type->without_array()->is_record();
-         }
-
-         break;
-      }
-
-      case ir_type_dereference_variable: {
-         const ir_dereference_variable *const var_deref =
-            (const ir_dereference_variable *) ir;
-
-         const enum glsl_matrix_layout matrix_layout =
-            glsl_matrix_layout(var_deref->var->data.matrix_layout);
-
-         switch (matrix_layout) {
-         case GLSL_MATRIX_LAYOUT_INHERITED: {
-            /* For interface block matrix variables we handle inherited
-             * layouts at HIR generation time, but we don't do that for shared
-             * variables, which are always column-major
-             */
-            ir_variable *var = deref->variable_referenced();
-            assert((var->is_in_buffer_block() && !matrix) ||
-                   var->data.mode == ir_var_shader_shared);
-            return false;
-         }
-         case GLSL_MATRIX_LAYOUT_COLUMN_MAJOR:
-            return false;
-         case GLSL_MATRIX_LAYOUT_ROW_MAJOR:
-            return matrix || deref->type->without_array()->is_record();
-         }
-
-         unreachable("invalid matrix layout");
-         break;
-      }
-
-      default:
-         return false;
-      }
-   }
-
-   /* The tree must have ended with a dereference that wasn't an
-    * ir_dereference_variable.  That is invalid, and it should be impossible.
-    */
-   unreachable("invalid dereference tree");
-   return false;
-}
-
-/**
- * This function initializes various values that will be used later by
- * emit_access when actually emitting loads or stores.
- *
- * Note: const_offset is an input as well as an output, clients must
- * initialize it to the offset of the variable in the underlying block, and
- * this function will adjust it by adding the constant offset of the member
- * being accessed into that variable.
- */
-void
-lower_buffer_access::setup_buffer_access(void *mem_ctx,
-                                         ir_variable *var,
-                                         ir_rvalue *deref,
-                                         ir_rvalue **offset,
-                                         unsigned *const_offset,
-                                         bool *row_major,
-                                         int *matrix_columns,
-                                         const glsl_struct_field **struct_field,
-                                         unsigned packing)
-{
-   *offset = new(mem_ctx) ir_constant(0u);
-   *row_major = is_dereferenced_thing_row_major(deref);
-   *matrix_columns = 1;
-
-   /* Calculate the offset to the start of the region of the UBO
-    * dereferenced by *rvalue.  This may be a variable offset if an
-    * array dereference has a variable index.
-    */
-   while (deref) {
-      switch (deref->ir_type) {
-      case ir_type_dereference_variable: {
-         deref = NULL;
-         break;
-      }
-
-      case ir_type_dereference_array: {
-         ir_dereference_array *deref_array = (ir_dereference_array *) deref;
-         unsigned array_stride;
-         if (deref_array->array->type->is_vector()) {
-            /* We get this when storing or loading a component out of a vector
-             * with a non-constant index. This happens for v[i] = f where v is
-             * a vector (or m[i][j] = f where m is a matrix). If we don't
-             * lower that here, it gets turned into v = vector_insert(v, i,
-             * f), which loads the entire vector, modifies one component and
-             * then write the entire thing back.  That breaks if another
-             * thread or SIMD channel is modifying the same vector.
-             */
-            array_stride = 4;
-            if (deref_array->array->type->is_double())
-               array_stride *= 2;
-         } else if (deref_array->array->type->is_matrix() && *row_major) {
-            /* When loading a vector out of a row major matrix, the
-             * step between the columns (vectors) is the size of a
-             * float, while the step between the rows (elements of a
-             * vector) is handled below in emit_ubo_loads.
-             */
-            array_stride = 4;
-            if (deref_array->array->type->is_double())
-               array_stride *= 2;
-            *matrix_columns = deref_array->array->type->matrix_columns;
-         } else if (deref_array->type->without_array()->is_interface()) {
-            /* We're processing an array dereference of an interface instance
-             * array. The thing being dereferenced *must* be a variable
-             * dereference because interfaces cannot be embedded in other
-             * types. In terms of calculating the offsets for the lowering
-             * pass, we don't care about the array index. All elements of an
-             * interface instance array will have the same offsets relative to
-             * the base of the block that backs them.
-             */
-            deref = deref_array->array->as_dereference();
-            break;
-         } else {
-            /* Whether or not the field is row-major (because it might be a
-             * bvec2 or something) does not affect the array itself. We need
-             * to know whether an array element in its entirety is row-major.
-             */
-            const bool array_row_major =
-               is_dereferenced_thing_row_major(deref_array);
-
-            /* The array type will give the correct interface packing
-             * information
-             */
-            if (packing == GLSL_INTERFACE_PACKING_STD430) {
-               array_stride = deref_array->type->std430_array_stride(array_row_major);
-            } else {
-               array_stride = deref_array->type->std140_size(array_row_major);
-               array_stride = glsl_align(array_stride, 16);
-            }
-         }
-
-         ir_rvalue *array_index = deref_array->array_index;
-         if (array_index->type->base_type == GLSL_TYPE_INT)
-            array_index = i2u(array_index);
-
-         ir_constant *const_index =
-            array_index->constant_expression_value(NULL);
-         if (const_index) {
-            *const_offset += array_stride * const_index->value.u[0];
-         } else {
-            *offset = add(*offset,
-                          mul(array_index,
-                              new(mem_ctx) ir_constant(array_stride)));
-         }
-         deref = deref_array->array->as_dereference();
-         break;
-      }
-
-      case ir_type_dereference_record: {
-         ir_dereference_record *deref_record = (ir_dereference_record *) deref;
-         const glsl_type *struct_type = deref_record->record->type;
-         unsigned intra_struct_offset = 0;
-
-         for (unsigned int i = 0; i < struct_type->length; i++) {
-            const glsl_type *type = struct_type->fields.structure[i].type;
-
-            ir_dereference_record *field_deref = new(mem_ctx)
-               ir_dereference_record(deref_record->record,
-                                     struct_type->fields.structure[i].name);
-            const bool field_row_major =
-               is_dereferenced_thing_row_major(field_deref);
-
-            ralloc_free(field_deref);
-
-            unsigned field_align = 0;
-
-            if (packing == GLSL_INTERFACE_PACKING_STD430)
-               field_align = type->std430_base_alignment(field_row_major);
-            else
-               field_align = type->std140_base_alignment(field_row_major);
-
-            intra_struct_offset = glsl_align(intra_struct_offset, field_align);
-
-            if (strcmp(struct_type->fields.structure[i].name,
-                       deref_record->field) == 0) {
-               if (struct_field)
-                  *struct_field = &struct_type->fields.structure[i];
-               break;
-            }
-
-            if (packing == GLSL_INTERFACE_PACKING_STD430)
-               intra_struct_offset += type->std430_size(field_row_major);
-            else
-               intra_struct_offset += type->std140_size(field_row_major);
-
-            /* If the field just examined was itself a structure, apply rule
-             * #9:
-             *
-             *     "The structure may have padding at the end; the base offset
-             *     of the member following the sub-structure is rounded up to
-             *     the next multiple of the base alignment of the structure."
-             */
-            if (type->without_array()->is_record()) {
-               intra_struct_offset = glsl_align(intra_struct_offset,
-                                                field_align);
-
-            }
-         }
-
-         *const_offset += intra_struct_offset;
-         deref = deref_record->record->as_dereference();
-         break;
-      }
-
-      case ir_type_swizzle: {
-         ir_swizzle *deref_swizzle = (ir_swizzle *) deref;
-
-         assert(deref_swizzle->mask.num_components == 1);
-
-         *const_offset += deref_swizzle->mask.x * sizeof(int);
-         deref = deref_swizzle->val->as_dereference();
-         break;
-      }
-
-      default:
-         assert(!"not reached");
-         deref = NULL;
-         break;
-      }
-   }
-}
-
-} /* namespace lower_buffer_access */
--- a/src/compiler/glsl/lower_buffer_access.h
+++ b/src/compiler/glsl/lower_buffer_access.h
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file lower_buffer_access.h
- *
- * Helper for IR lowering pass to replace dereferences of buffer object based
- * shader variables with intrinsic function calls.
- *
- * This helper is used by lowering passes for UBOs, SSBOs and compute shader
- * shared variables.
- */
-
-#pragma once
-#ifndef LOWER_BUFFER_ACCESS_H
-#define LOWER_BUFFER_ACCESS_H
-
-#include "ir.h"
-#include "ir_rvalue_visitor.h"
-
-namespace lower_buffer_access {
-
-class lower_buffer_access : public ir_rvalue_enter_visitor {
-public:
-   virtual void
-   insert_buffer_access(void *mem_ctx, ir_dereference *deref,
-                        const glsl_type *type, ir_rvalue *offset,
-                        unsigned mask, int channel) = 0;
-
-   void emit_access(void *mem_ctx, bool is_write, ir_dereference *deref,
-                    ir_variable *base_offset, unsigned int deref_offset,
-                    bool row_major, int matrix_columns,
-                    unsigned int packing, unsigned int write_mask);
-
-   bool is_dereferenced_thing_row_major(const ir_rvalue *deref);
-
-   void setup_buffer_access(void *mem_ctx, ir_variable *var, ir_rvalue *deref,
-                            ir_rvalue **offset, unsigned *const_offset,
-                            bool *row_major, int *matrix_columns,
-                            const glsl_struct_field **struct_field,
-                            unsigned packing);
-};
-
-} /* namespace lower_buffer_access */
-
-#endif /* LOWER_BUFFER_ACCESS_H */
--- a/src/compiler/glsl/lower_shared_reference.cpp
+++ b/src/compiler/glsl/lower_shared_reference.cpp
@@ -1,496 +0,0 @@
-/*
- * Copyright (c) 2015 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file lower_shared_reference.cpp
- *
- * IR lower pass to replace dereferences of compute shader shared variables
- * with intrinsic function calls.
- *
- * This relieves drivers of the responsibility of allocating space for the
- * shared variables in the shared memory region.
- */
-
-#include "lower_buffer_access.h"
-#include "ir_builder.h"
-#include "main/macros.h"
-#include "util/list.h"
-#include "glsl_parser_extras.h"
-
-using namespace ir_builder;
-
-namespace {
-
-struct var_offset {
-   struct list_head node;
-   const ir_variable *var;
-   unsigned offset;
-};
-
-class lower_shared_reference_visitor :
-      public lower_buffer_access::lower_buffer_access {
-public:
-
-   lower_shared_reference_visitor(struct gl_shader *shader)
-      : list_ctx(ralloc_context(NULL)), shader(shader), shared_size(0u)
-   {
-      list_inithead(&var_offsets);
-   }
-
-   ~lower_shared_reference_visitor()
-   {
-      ralloc_free(list_ctx);
-   }
-
-   enum {
-      shared_load_access,
-      shared_store_access,
-      shared_atomic_access,
-   } buffer_access_type;
-
-   void insert_buffer_access(void *mem_ctx, ir_dereference *deref,
-                             const glsl_type *type, ir_rvalue *offset,
-                             unsigned mask, int channel);
-
-   void handle_rvalue(ir_rvalue **rvalue);
-   ir_visitor_status visit_enter(ir_assignment *ir);
-   void handle_assignment(ir_assignment *ir);
-
-   ir_call *lower_shared_atomic_intrinsic(ir_call *ir);
-   ir_call *check_for_shared_atomic_intrinsic(ir_call *ir);
-   ir_visitor_status visit_enter(ir_call *ir);
-
-   unsigned get_shared_offset(const ir_variable *);
-
-   ir_call *shared_load(void *mem_ctx, const struct glsl_type *type,
-                        ir_rvalue *offset);
-   ir_call *shared_store(void *mem_ctx, ir_rvalue *deref, ir_rvalue *offset,
-                         unsigned write_mask);
-
-   void *list_ctx;
-   struct gl_shader *shader;
-   struct list_head var_offsets;
-   unsigned shared_size;
-   bool progress;
-};
-
-unsigned
-lower_shared_reference_visitor::get_shared_offset(const ir_variable *var)
-{
-   list_for_each_entry(var_offset, var_entry, &var_offsets, node) {
-      if (var_entry->var == var)
-         return var_entry->offset;
-   }
-
-   struct var_offset *new_entry = rzalloc(list_ctx, struct var_offset);
-   list_add(&new_entry->node, &var_offsets);
-   new_entry->var = var;
-
-   unsigned var_align = var->type->std430_base_alignment(false);
-   new_entry->offset = glsl_align(shared_size, var_align);
-
-   unsigned var_size = var->type->std430_size(false);
-   shared_size = new_entry->offset + var_size;
-
-   return new_entry->offset;
-}
-
-void
-lower_shared_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
-{
-   if (!*rvalue)
-      return;
-
-   ir_dereference *deref = (*rvalue)->as_dereference();
-   if (!deref)
-      return;
-
-   ir_variable *var = deref->variable_referenced();
-   if (!var || var->data.mode != ir_var_shader_shared)
-      return;
-
-   buffer_access_type = shared_load_access;
-
-   void *mem_ctx = ralloc_parent(shader->ir);
-
-   ir_rvalue *offset = NULL;
-   unsigned const_offset = get_shared_offset(var);
-   bool row_major;
-   int matrix_columns;
-   assert(var->get_interface_type() == NULL);
-   const unsigned packing = GLSL_INTERFACE_PACKING_STD430;
-
-   setup_buffer_access(mem_ctx, var, deref,
-                       &offset, &const_offset,
-                       &row_major, &matrix_columns, NULL, packing);
-
-   /* Now that we've calculated the offset to the start of the
-    * dereference, walk over the type and emit loads into a temporary.
-    */
-   const glsl_type *type = (*rvalue)->type;
-   ir_variable *load_var = new(mem_ctx) ir_variable(type,
-                                                    "shared_load_temp",
-                                                    ir_var_temporary);
-   base_ir->insert_before(load_var);
-
-   ir_variable *load_offset = new(mem_ctx) ir_variable(glsl_type::uint_type,
-                                                       "shared_load_temp_offset",
-                                                       ir_var_temporary);
-   base_ir->insert_before(load_offset);
-   base_ir->insert_before(assign(load_offset, offset));
-
-   deref = new(mem_ctx) ir_dereference_variable(load_var);
-
-   emit_access(mem_ctx, false, deref, load_offset, const_offset, row_major,
-               matrix_columns, packing, 0);
-
-   *rvalue = deref;
-
-   progress = true;
-}
-
-void
-lower_shared_reference_visitor::handle_assignment(ir_assignment *ir)
-{
-   if (!ir || !ir->lhs)
-      return;
-
-   ir_rvalue *rvalue = ir->lhs->as_rvalue();
-   if (!rvalue)
-      return;
-
-   ir_dereference *deref = ir->lhs->as_dereference();
-   if (!deref)
-      return;
-
-   ir_variable *var = ir->lhs->variable_referenced();
-   if (!var || var->data.mode != ir_var_shader_shared)
-      return;
-
-   buffer_access_type = shared_store_access;
-
-   /* We have a write to a shared variable, so declare a temporary and rewrite
-    * the assignment so that the temporary is the LHS.
-    */
-   void *mem_ctx = ralloc_parent(shader->ir);
-
-   const glsl_type *type = rvalue->type;
-   ir_variable *store_var = new(mem_ctx) ir_variable(type,
-                                                     "shared_store_temp",
-                                                     ir_var_temporary);
-   base_ir->insert_before(store_var);
-   ir->lhs = new(mem_ctx) ir_dereference_variable(store_var);
-
-   ir_rvalue *offset = NULL;
-   unsigned const_offset = get_shared_offset(var);
-   bool row_major;
-   int matrix_columns;
-   assert(var->get_interface_type() == NULL);
-   const unsigned packing = GLSL_INTERFACE_PACKING_STD430;
-
-   setup_buffer_access(mem_ctx, var, deref,
-                       &offset, &const_offset,
-                       &row_major, &matrix_columns, NULL, packing);
-
-   deref = new(mem_ctx) ir_dereference_variable(store_var);
-
-   ir_variable *store_offset = new(mem_ctx) ir_variable(glsl_type::uint_type,
-                                                        "shared_store_temp_offset",
-                                                        ir_var_temporary);
-   base_ir->insert_before(store_offset);
-   base_ir->insert_before(assign(store_offset, offset));
-
-   /* Now we have to write the value assigned to the temporary back to memory */
-   emit_access(mem_ctx, true, deref, store_offset, const_offset, row_major,
-               matrix_columns, packing, ir->write_mask);
-
-   progress = true;
-}
-
-ir_visitor_status
-lower_shared_reference_visitor::visit_enter(ir_assignment *ir)
-{
-   handle_assignment(ir);
-   return rvalue_visit(ir);
-}
-
-void
-lower_shared_reference_visitor::insert_buffer_access(void *mem_ctx,
-                                                     ir_dereference *deref,
-                                                     const glsl_type *type,
-                                                     ir_rvalue *offset,
-                                                     unsigned mask,
-                                                     int channel)
-{
-   if (buffer_access_type == shared_store_access) {
-      ir_call *store = shared_store(mem_ctx, deref, offset, mask);
-      base_ir->insert_after(store);
-   } else {
-      ir_call *load = shared_load(mem_ctx, type, offset);
-      base_ir->insert_before(load);
-      ir_rvalue *value = load->return_deref->as_rvalue()->clone(mem_ctx, NULL);
-      base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
-                                    value));
-   }
-}
-
-static bool
-compute_shader_enabled(const _mesa_glsl_parse_state *state)
-{
-   return state->stage == MESA_SHADER_COMPUTE;
-}
-
-ir_call *
-lower_shared_reference_visitor::shared_store(void *mem_ctx,
-                                             ir_rvalue *deref,
-                                             ir_rvalue *offset,
-                                             unsigned write_mask)
-{
-   exec_list sig_params;
-
-   ir_variable *offset_ref = new(mem_ctx)
-      ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
-   sig_params.push_tail(offset_ref);
-
-   ir_variable *val_ref = new(mem_ctx)
-      ir_variable(deref->type, "value" , ir_var_function_in);
-   sig_params.push_tail(val_ref);
-
-   ir_variable *writemask_ref = new(mem_ctx)
-      ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in);
-   sig_params.push_tail(writemask_ref);
-
-   ir_function_signature *sig = new(mem_ctx)
-      ir_function_signature(glsl_type::void_type, compute_shader_enabled);
-   assert(sig);
-   sig->replace_parameters(&sig_params);
-   sig->is_intrinsic = true;
-
-   ir_function *f = new(mem_ctx) ir_function("__intrinsic_store_shared");
-   f->add_signature(sig);
-
-   exec_list call_params;
-   call_params.push_tail(offset->clone(mem_ctx, NULL));
-   call_params.push_tail(deref->clone(mem_ctx, NULL));
-   call_params.push_tail(new(mem_ctx) ir_constant(write_mask));
-   return new(mem_ctx) ir_call(sig, NULL, &call_params);
-}
-
-ir_call *
-lower_shared_reference_visitor::shared_load(void *mem_ctx,
-                                            const struct glsl_type *type,
-                                            ir_rvalue *offset)
-{
-   exec_list sig_params;
-
-   ir_variable *offset_ref = new(mem_ctx)
-      ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in);
-   sig_params.push_tail(offset_ref);
-
-   ir_function_signature *sig =
-      new(mem_ctx) ir_function_signature(type, compute_shader_enabled);
-   assert(sig);
-   sig->replace_parameters(&sig_params);
-   sig->is_intrinsic = true;
-
-   ir_function *f = new(mem_ctx) ir_function("__intrinsic_load_shared");
-   f->add_signature(sig);
-
-   ir_variable *result = new(mem_ctx)
-      ir_variable(type, "shared_load_result", ir_var_temporary);
-   base_ir->insert_before(result);
-   ir_dereference_variable *deref_result = new(mem_ctx)
-      ir_dereference_variable(result);
-
-   exec_list call_params;
-   call_params.push_tail(offset->clone(mem_ctx, NULL));
-
-   return new(mem_ctx) ir_call(sig, deref_result, &call_params);
-}
-
-/* Lowers the intrinsic call to a new internal intrinsic that swaps the access
- * to the shared variable in the first parameter by an offset. This involves
- * creating the new internal intrinsic (i.e. the new function signature).
- */
-ir_call *
-lower_shared_reference_visitor::lower_shared_atomic_intrinsic(ir_call *ir)
-{
-   /* Shared atomics usually have 2 parameters, the shared variable and an
-    * integer argument. The exception is CompSwap, that has an additional
-    * integer parameter.
-    */
-   int param_count = ir->actual_parameters.length();
-   assert(param_count == 2 || param_count == 3);
-
-   /* First argument must be a scalar integer shared variable */
-   exec_node *param = ir->actual_parameters.get_head();
-   ir_instruction *inst = (ir_instruction *) param;
-   assert(inst->ir_type == ir_type_dereference_variable ||
-          inst->ir_type == ir_type_dereference_array ||
-          inst->ir_type == ir_type_dereference_record ||
-          inst->ir_type == ir_type_swizzle);
-
-   ir_rvalue *deref = (ir_rvalue *) inst;
-   assert(deref->type->is_scalar() && deref->type->is_integer());
-
-   ir_variable *var = deref->variable_referenced();
-   assert(var);
-
-   /* Compute the offset to the start if the dereference
-    */
-   void *mem_ctx = ralloc_parent(shader->ir);
-
-   ir_rvalue *offset = NULL;
-   unsigned const_offset = get_shared_offset(var);
-   bool row_major;
-   int matrix_columns;
-   assert(var->get_interface_type() == NULL);
-   const unsigned packing = GLSL_INTERFACE_PACKING_STD430;
-   buffer_access_type = shared_atomic_access;
-
-   setup_buffer_access(mem_ctx, var, deref,
-                       &offset, &const_offset,
-                       &row_major, &matrix_columns, NULL, packing);
-
-   assert(offset);
-   assert(!row_major);
-   assert(matrix_columns == 1);
-
-   ir_rvalue *deref_offset =
-      add(offset, new(mem_ctx) ir_constant(const_offset));
-
-   /* Create the new internal function signature that will take an offset
-    * instead of a shared variable
-    */
-   exec_list sig_params;
-   ir_variable *sig_param = new(mem_ctx)
-      ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
-   sig_params.push_tail(sig_param);
-
-   const glsl_type *type = deref->type->base_type == GLSL_TYPE_INT ?
-      glsl_type::int_type : glsl_type::uint_type;
-   sig_param = new(mem_ctx)
-         ir_variable(type, "data1", ir_var_function_in);
-   sig_params.push_tail(sig_param);
-
-   if (param_count == 3) {
-      sig_param = new(mem_ctx)
-            ir_variable(type, "data2", ir_var_function_in);
-      sig_params.push_tail(sig_param);
-   }
-
-   ir_function_signature *sig =
-      new(mem_ctx) ir_function_signature(deref->type,
-                                         compute_shader_enabled);
-   assert(sig);
-   sig->replace_parameters(&sig_params);
-   sig->is_intrinsic = true;
-
-   char func_name[64];
-   sprintf(func_name, "%s_shared", ir->callee_name());
-   ir_function *f = new(mem_ctx) ir_function(func_name);
-   f->add_signature(sig);
-
-   /* Now, create the call to the internal intrinsic */
-   exec_list call_params;
-   call_params.push_tail(deref_offset);
-   param = ir->actual_parameters.get_head()->get_next();
-   ir_rvalue *param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
-   call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
-   if (param_count == 3) {
-      param = param->get_next();
-      param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
-      call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
-   }
-   ir_dereference_variable *return_deref =
-      ir->return_deref->clone(mem_ctx, NULL);
-   return new(mem_ctx) ir_call(sig, return_deref, &call_params);
-}
-
-ir_call *
-lower_shared_reference_visitor::check_for_shared_atomic_intrinsic(ir_call *ir)
-{
-   exec_list& params = ir->actual_parameters;
-
-   if (params.length() < 2 || params.length() > 3)
-      return ir;
-
-   ir_rvalue *rvalue =
-      ((ir_instruction *) params.get_head())->as_rvalue();
-   if (!rvalue)
-      return ir;
-
-   ir_variable *var = rvalue->variable_referenced();
-   if (!var || var->data.mode != ir_var_shader_shared)
-      return ir;
-
-   const char *callee = ir->callee_name();
-   if (!strcmp("__intrinsic_atomic_add", callee) ||
-       !strcmp("__intrinsic_atomic_min", callee) ||
-       !strcmp("__intrinsic_atomic_max", callee) ||
-       !strcmp("__intrinsic_atomic_and", callee) ||
-       !strcmp("__intrinsic_atomic_or", callee) ||
-       !strcmp("__intrinsic_atomic_xor", callee) ||
-       !strcmp("__intrinsic_atomic_exchange", callee) ||
-       !strcmp("__intrinsic_atomic_comp_swap", callee)) {
-      return lower_shared_atomic_intrinsic(ir);
-   }
-
-   return ir;
-}
-
-ir_visitor_status
-lower_shared_reference_visitor::visit_enter(ir_call *ir)
-{
-   ir_call *new_ir = check_for_shared_atomic_intrinsic(ir);
-   if (new_ir != ir) {
-      progress = true;
-      base_ir->replace_with(new_ir);
-      return visit_continue_with_parent;
-   }
-
-   return rvalue_visit(ir);
-}
-
-} /* unnamed namespace */
-
-void
-lower_shared_reference(struct gl_shader *shader, unsigned *shared_size)
-{
-   if (shader->Stage != MESA_SHADER_COMPUTE)
-      return;
-
-   lower_shared_reference_visitor v(shader);
-
-   /* Loop over the instructions lowering references, because we take a deref
-    * of an shared variable array using a shared variable dereference as the
-    * index will produce a collection of instructions all of which have cloned
-    * shared variable dereferences for that array index.
-    */
-   do {
-      v.progress = false;
-      visit_list_elements(&v, shader->ir);
-   } while (v.progress);
-
-   *shared_size = v.shared_size;
-}
--- a/src/compiler/nir/Makefile.sources
+++ b/src/compiler/nir/Makefile.sources
@@ -1,71 +0,0 @@
-NIR_GENERATED_FILES = \
-	nir_builder_opcodes.h \
-	nir_constant_expressions.c \
-	nir_opcodes.c \
-	nir_opcodes.h \
-	nir_opt_algebraic.c
-
-NIR_FILES = \
-	glsl_to_nir.cpp \
-	glsl_to_nir.h \
-	nir.c \
-	nir.h \
-	nir_array.h \
-	nir_builder.h \
-	nir_clone.c \
-	nir_constant_expressions.h \
-	nir_control_flow.c \
-	nir_control_flow.h \
-	nir_control_flow_private.h \
-	nir_dominance.c \
-	nir_from_ssa.c \
-	nir_gs_count_vertices.c \
-	nir_intrinsics.c \
-	nir_intrinsics.h \
-	nir_instr_set.c \
-	nir_instr_set.h \
-	nir_liveness.c \
-	nir_lower_alu_to_scalar.c \
-	nir_lower_atomics.c \
-	nir_lower_clip.c \
-	nir_lower_global_vars_to_local.c \
-	nir_lower_gs_intrinsics.c \
-	nir_lower_load_const_to_scalar.c \
-	nir_lower_locals_to_regs.c \
-	nir_lower_idiv.c \
-	nir_lower_io.c \
-	nir_lower_outputs_to_temporaries.c \
-	nir_lower_phis_to_scalar.c \
-	nir_lower_samplers.c \
-	nir_lower_system_values.c \
-	nir_lower_tex.c \
-	nir_lower_to_source_mods.c \
-	nir_lower_two_sided_color.c \
-	nir_lower_vars_to_ssa.c \
-	nir_lower_var_copies.c \
-	nir_lower_vec_to_movs.c \
-	nir_metadata.c \
-	nir_move_vec_src_uses_to_dest.c \
-	nir_normalize_cubemap_coords.c \
-	nir_opt_constant_folding.c \
-	nir_opt_copy_propagate.c \
-	nir_opt_cse.c \
-	nir_opt_dce.c \
-	nir_opt_dead_cf.c \
-	nir_opt_gcm.c \
-	nir_opt_global_to_local.c \
-	nir_opt_peephole_select.c \
-	nir_opt_remove_phis.c \
-	nir_opt_undef.c \
-	nir_print.c \
-	nir_remove_dead_variables.c \
-	nir_search.c \
-	nir_search.h \
-	nir_split_var_copies.c \
-	nir_sweep.c \
-	nir_to_ssa.c \
-	nir_validate.c \
-	nir_vla.h \
-	nir_worklist.c \
-	nir_worklist.h
-
--- a/src/compiler/nir/nir_intrinsics.h
+++ b/src/compiler/nir/nir_intrinsics.h
@@ -1,317 +0,0 @@
-/*
- * Copyright © 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Connor Abbott (cwabbott0@gmail.com)
- *
- */
-
-/**
- * This header file defines all the available intrinsics in one place. It
- * expands to a list of macros of the form:
- *
- * INTRINSIC(name, num_srcs, src_components, has_dest, dest_components,
- *              num_variables, num_indices, idx0, idx1, idx2, flags)
- *
- * Which should correspond one-to-one with the nir_intrinsic_info structure. It
- * is included in both ir.h to create the nir_intrinsic enum (with members of
- * the form nir_intrinsic_(name)) and and in opcodes.c to create
- * nir_intrinsic_infos, which is a const array of nir_intrinsic_info structures
- * for each intrinsic.
- */
-
-#define ARR(...) { __VA_ARGS__ }
-
-
-INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
-INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 1, WRMASK, xx, xx, 0)
-INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, xx, xx, xx, 0)
-
-/*
- * Interpolation of input.  The interp_var_at* intrinsics are similar to the
- * load_var intrinsic acting an a shader input except that they interpolate
- * the input differently.  The at_sample and at_offset intrinsics take an
- * aditional source that is a integer sample id or a vec2 position offset
- * respectively.
- */
-
-INTRINSIC(interp_var_at_centroid, 0, ARR(0), true, 0, 1, 0, xx, xx, xx,
-          NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
-INTRINSIC(interp_var_at_sample, 1, ARR(1), true, 0, 1, 0, xx, xx, xx,
-          NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
-INTRINSIC(interp_var_at_offset, 1, ARR(2), true, 0, 1, 0, xx, xx, xx,
-          NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
-
-/*
- * Ask the driver for the size of a given buffer. It takes the buffer index
- * as source.
- */
-INTRINSIC(get_buffer_size, 1, ARR(1), true, 1, 0, 0, xx, xx, xx,
-          NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
-
-/*
- * a barrier is an intrinsic with no inputs/outputs but which can't be moved
- * around/optimized in general
- */
-#define BARRIER(name) INTRINSIC(name, 0, ARR(), false, 0, 0, 0, xx, xx, xx, 0)
-
-BARRIER(barrier)
-BARRIER(discard)
-
-/*
- * Memory barrier with semantics analogous to the memoryBarrier() GLSL
- * intrinsic.
- */
-BARRIER(memory_barrier)
-
-/*
- * Shader clock intrinsic with semantics analogous to the clock2x32ARB()
- * GLSL intrinsic.
- * The latter can be used as code motion barrier, which is currently not
- * feasible with NIR.
- */
-INTRINSIC(shader_clock, 0, ARR(), true, 1, 0, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
-
-/*
- * Memory barrier with semantics analogous to the compute shader
- * groupMemoryBarrier(), memoryBarrierAtomicCounter(), memoryBarrierBuffer(),
- * memoryBarrierImage() and memoryBarrierShared() GLSL intrinsics.
- */
-BARRIER(group_memory_barrier)
-BARRIER(memory_barrier_atomic_counter)
-BARRIER(memory_barrier_buffer)
-BARRIER(memory_barrier_image)
-BARRIER(memory_barrier_shared)
-
-/** A conditional discard, with a single boolean source. */
-INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, xx, xx, xx, 0)
-
-/**
- * Basic Geometry Shader intrinsics.
- *
- * emit_vertex implements GLSL's EmitStreamVertex() built-in.  It takes a single
- * index, which is the stream ID to write to.
- *
- * end_primitive implements GLSL's EndPrimitive() built-in.
- */
-INTRINSIC(emit_vertex,   0, ARR(), false, 0, 0, 1, STREAM_ID, xx, xx, 0)
-INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, STREAM_ID, xx, xx, 0)
-
-/**
- * Geometry Shader intrinsics with a vertex count.
- *
- * Alternatively, drivers may implement these intrinsics, and use
- * nir_lower_gs_intrinsics() to convert from the basic intrinsics.
- *
- * These maintain a count of the number of vertices emitted, as an additional
- * unsigned integer source.
- */
-INTRINSIC(emit_vertex_with_counter, 1, ARR(1), false, 0, 0, 1, STREAM_ID, xx, xx, 0)
-INTRINSIC(end_primitive_with_counter, 1, ARR(1), false, 0, 0, 1, STREAM_ID, xx, xx, 0)
-INTRINSIC(set_vertex_count, 1, ARR(1), false, 0, 0, 0, xx, xx, xx, 0)
-
-/*
- * Atomic counters
- *
- * The *_var variants take an atomic_uint nir_variable, while the other,
- * lowered, variants take a constant buffer index and register offset.
- */
-
-#define ATOMIC(name, flags) \
-   INTRINSIC(atomic_counter_##name##_var, 0, ARR(), true, 1, 1, 0, xx, xx, xx, flags) \
-   INTRINSIC(atomic_counter_##name, 1, ARR(1), true, 1, 0, 1, BASE, xx, xx, flags)
-
-ATOMIC(inc, 0)
-ATOMIC(dec, 0)
-ATOMIC(read, NIR_INTRINSIC_CAN_ELIMINATE)
-
-/*
- * Image load, store and atomic intrinsics.
- *
- * All image intrinsics take an image target passed as a nir_variable.  Image
- * variables contain a number of memory and layout qualifiers that influence
- * the semantics of the intrinsic.
- *
- * All image intrinsics take a four-coordinate vector and a sample index as
- * first two sources, determining the location within the image that will be
- * accessed by the intrinsic.  Components not applicable to the image target
- * in use are undefined.  Image store takes an additional four-component
- * argument with the value to be written, and image atomic operations take
- * either one or two additional scalar arguments with the same meaning as in
- * the ARB_shader_image_load_store specification.
- */
-INTRINSIC(image_load, 2, ARR(4, 1), true, 4, 1, 0, xx, xx, xx,
-          NIR_INTRINSIC_CAN_ELIMINATE)
-INTRINSIC(image_store, 3, ARR(4, 1, 4), false, 0, 1, 0, xx, xx, xx, 0)
-INTRINSIC(image_atomic_add, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
-INTRINSIC(image_atomic_min, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
-INTRINSIC(image_atomic_max, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
-INTRINSIC(image_atomic_and, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
-INTRINSIC(image_atomic_or, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
-INTRINSIC(image_atomic_xor, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
-INTRINSIC(image_atomic_exchange, 3, ARR(4, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
-INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), true, 1, 1, 0, xx, xx, xx, 0)
-INTRINSIC(image_size, 0, ARR(), true, 4, 1, 0, xx, xx, xx,
-          NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
-INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0, xx, xx, xx,
-          NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
-
-/*
- * SSBO atomic intrinsics
- *
- * All of the SSBO atomic memory operations read a value from memory,
- * compute a new value using one of the operations below, write the new
- * value to memory, and return the original value read.
- *
- * All operations take 3 sources except CompSwap that takes 4. These
- * sources represent:
- *
- * 0: The SSBO buffer index.
- * 1: The offset into the SSBO buffer of the variable that the atomic
- *    operation will operate on.
- * 2: The data parameter to the atomic function (i.e. the value to add
- *    in ssbo_atomic_add, etc).
- * 3: For CompSwap only: the second data parameter.
- */
-INTRINSIC(ssbo_atomic_add, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(ssbo_atomic_imin, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(ssbo_atomic_umin, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(ssbo_atomic_imax, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(ssbo_atomic_umax, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(ssbo_atomic_and, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(ssbo_atomic_or, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(ssbo_atomic_xor, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(ssbo_atomic_exchange, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-
-/*
- * CS shared variable atomic intrinsics
- *
- * All of the shared variable atomic memory operations read a value from
- * memory, compute a new value using one of the operations below, write the
- * new value to memory, and return the original value read.
- *
- * All operations take 2 sources except CompSwap that takes 3. These
- * sources represent:
- *
- * 0: The offset into the shared variable storage region that the atomic
- *    operation will operate on.
- * 1: The data parameter to the atomic function (i.e. the value to add
- *    in shared_atomic_add, etc).
- * 2: For CompSwap only: the second data parameter.
- */
-INTRINSIC(shared_atomic_add, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_imin, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_umin, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_imax, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_umax, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_and, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_or, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_xor, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 0, xx, xx, xx, 0)
-
-#define SYSTEM_VALUE(name, components, num_indices, idx0, idx1, idx2) \
-   INTRINSIC(load_##name, 0, ARR(), true, components, 0, num_indices, \
-   idx0, idx1, idx2, \
-   NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
-
-SYSTEM_VALUE(front_face, 1, 0, xx, xx, xx)
-SYSTEM_VALUE(vertex_id, 1, 0, xx, xx, xx)
-SYSTEM_VALUE(vertex_id_zero_base, 1, 0, xx, xx, xx)
-SYSTEM_VALUE(base_vertex, 1, 0, xx, xx, xx)
-SYSTEM_VALUE(instance_id, 1, 0, xx, xx, xx)
-SYSTEM_VALUE(base_instance, 1, 0, xx, xx, xx)
-SYSTEM_VALUE(draw_id, 1, 0, xx, xx, xx)
-SYSTEM_VALUE(sample_id, 1, 0, xx, xx, xx)
-SYSTEM_VALUE(sample_pos, 2, 0, xx, xx, xx)
-SYSTEM_VALUE(sample_mask_in, 1, 0, xx, xx, xx)
-SYSTEM_VALUE(primitive_id, 1, 0, xx, xx, xx)
-SYSTEM_VALUE(invocation_id, 1, 0, xx, xx, xx)
-SYSTEM_VALUE(tess_coord, 3, 0, xx, xx, xx)
-SYSTEM_VALUE(tess_level_outer, 4, 0, xx, xx, xx)
-SYSTEM_VALUE(tess_level_inner, 2, 0, xx, xx, xx)
-SYSTEM_VALUE(patch_vertices_in, 1, 0, xx, xx, xx)
-SYSTEM_VALUE(local_invocation_id, 3, 0, xx, xx, xx)
-SYSTEM_VALUE(work_group_id, 3, 0, xx, xx, xx)
-SYSTEM_VALUE(user_clip_plane, 4, 1, UCP_ID, xx, xx)
-SYSTEM_VALUE(num_work_groups, 3, 0, xx, xx, xx)
-SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx)
-
-/*
- * Load operations pull data from some piece of GPU memory.  All load
- * operations operate in terms of offsets into some piece of theoretical
- * memory.  Loads from externally visible memory (UBO and SSBO) simply take a
- * byte offset as a source.  Loads from opaque memory (uniforms, inputs, etc.)
- * take a base+offset pair where the base (const_index[0]) gives the location
- * of the start of the variable being loaded and and the offset source is a
- * offset into that variable.
- *
- * Some load operations such as UBO/SSBO load and per_vertex loads take an
- * additional source to specify which UBO/SSBO/vertex to load from.
- *
- * The exact address type depends on the lowering pass that generates the
- * load/store intrinsics.  Typically, this is vec4 units for things such as
- * varying slots and float units for fragment shader inputs.  UBO and SSBO
- * offsets are always in bytes.
- */
-
-#define LOAD(name, srcs, num_indices, idx0, idx1, idx2, flags) \
-   INTRINSIC(load_##name, srcs, ARR(1, 1, 1, 1), true, 0, 0, num_indices, idx0, idx1, idx2, flags)
-
-/* src[] = { offset }. const_index[] = { base } */
-LOAD(uniform, 1, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
-/* src[] = { buffer_index, offset }. No const_index */
-LOAD(ubo, 2, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
-/* src[] = { offset }. const_index[] = { base } */
-LOAD(input, 1, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
-/* src[] = { vertex, offset }. const_index[] = { base } */
-LOAD(per_vertex_input, 2, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
-/* src[] = { buffer_index, offset }. No const_index */
-LOAD(ssbo, 2, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
-/* src[] = { offset }. const_index[] = { base } */
-LOAD(output, 1, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
-/* src[] = { vertex, offset }. const_index[] = { base } */
-LOAD(per_vertex_output, 2, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
-/* src[] = { offset }. const_index[] = { base } */
-LOAD(shared, 1, 1, BASE, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
-
-/*
- * Stores work the same way as loads, except now the first source is the value
- * to store and the second (and possibly third) source specify where to store
- * the value.  SSBO and shared memory stores also have a write mask as
- * const_index[0].
- */
-
-#define STORE(name, srcs, num_indices, idx0, idx1, idx2, flags) \
-   INTRINSIC(store_##name, srcs, ARR(0, 1, 1, 1), false, 0, 0, num_indices, idx0, idx1, idx2, flags)
-
-/* src[] = { value, offset }. const_index[] = { base, write_mask } */
-STORE(output, 2, 2, BASE, WRMASK, xx, 0)
-/* src[] = { value, vertex, offset }. const_index[] = { base, write_mask } */
-STORE(per_vertex_output, 3, 2, BASE, WRMASK, xx, 0)
-/* src[] = { value, block_index, offset }. const_index[] = { write_mask } */
-STORE(ssbo, 3, 1, WRMASK, xx, xx, 0)
-/* src[] = { value, offset }. const_index[] = { base, write_mask } */
-STORE(shared, 2, 2, BASE, WRMASK, xx, 0)
-
-LAST_INTRINSIC(store_shared)
--- a/src/egl/Android.mk
+++ b/src/egl/Android.mk
@@ -44,8 +44,9 @@ LOCAL_CFLAGS := \
 	-DHAVE_ANDROID_PLATFORM

 ifeq ($(MESA_LOLLIPOP_BUILD),true)
-LOCAL_CFLAGS_32 := -DDEFAULT_DRIVER_DIR=\"/system/lib/dri\"
-LOCAL_CFLAGS_64 := -DDEFAULT_DRIVER_DIR=\"/system/lib64/dri\"
+LOCAL_CFLAGS_arm := -DDEFAULT_DRIVER_DIR=\"/system/lib/dri\"
+LOCAL_CFLAGS_x86 := -DDEFAULT_DRIVER_DIR=\"/system/lib/dri\"
+LOCAL_CFLAGS_x86_64 := -DDEFAULT_DRIVER_DIR=\"/system/lib64/dri\"
 else
 LOCAL_CFLAGS += -DDEFAULT_DRIVER_DIR=\"/system/lib/dri\"
 endif
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -235,8 +235,6 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,

      case __DRI_ATTRIB_FRAMEBUFFER_SRGB_CAPABLE:
         srgb = value != 0;
-         if (!disp->Extensions.KHR_gl_colorspace && srgb)
-            return NULL;
         break;

      default:
--- a/src/egl/drivers/dri2/platform_android.c
+++ b/src/egl/drivers/dri2/platform_android.c
@@ -532,12 +532,7 @@ droid_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *dpy)
      { HAL_PIXEL_FORMAT_RGB_888,   { 0xff, 0xff00, 0xff0000, 0x0 } },
      { HAL_PIXEL_FORMAT_RGB_565,   { 0xf800, 0x7e0, 0x1f, 0x0 } },
      { HAL_PIXEL_FORMAT_BGRA_8888, { 0xff0000, 0xff00, 0xff, 0xff000000 } },
-      { 0, { 0, 0, 0, 0 } }
-   };
-   EGLint config_attrs[] = {
-     EGL_NATIVE_VISUAL_ID,   0,
-     EGL_NATIVE_VISUAL_TYPE, 0,
-     EGL_NONE
+      { 0, 0, { 0, 0, 0, 0 } }
   };
   int count, i, j;

@@ -545,9 +540,6 @@ droid_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *dpy)
   for (i = 0; visuals[i].format; i++) {
      int format_count = 0;

-      config_attrs[1] = visuals[i].format;
-      config_attrs[3] = visuals[i].format;
-
      for (j = 0; dri2_dpy->driver_configs[j]; j++) {
         const EGLint surface_type = EGL_WINDOW_BIT | EGL_PBUFFER_BIT;
         struct dri2_egl_config *dri2_conf;
@@ -561,8 +553,10 @@ droid_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *dpy)
            continue;

         dri2_conf = dri2_add_config(dpy, dri2_dpy->driver_configs[j],
-               count + 1, surface_type, config_attrs, visuals[i].rgba_masks);
+               count + 1, surface_type, NULL, visuals[i].rgba_masks);
         if (dri2_conf) {
+            dri2_conf->base.NativeVisualID = visuals[i].format;
+            dri2_conf->base.NativeVisualType = visuals[i].format;
            count++;
            format_count++;
         }
--- a/src/egl/drivers/dri2/platform_x11.c
+++ b/src/egl/drivers/dri2/platform_x11.c
@@ -472,8 +472,6 @@ dri2_x11_get_buffers(__DRIdrawable * driDrawable,
 					    dri2_surf->drawable,
 					    count, count, attachments);
   reply = xcb_dri2_get_buffers_reply (dri2_dpy->conn, cookie, NULL);
-   if (reply == NULL)
-      return NULL;
   buffers = xcb_dri2_get_buffers_buffers (reply);
   if (buffers == NULL)
      return NULL;
@@ -872,12 +870,7 @@ dri2_x11_swap_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw)
   struct dri2_egl_surface *dri2_surf = dri2_egl_surface(draw);

   if (dri2_dpy->dri2) {
-      if (dri2_x11_swap_buffers_msc(drv, disp, draw, 0, 0, 0) != -1) {
-          return EGL_TRUE;
-      }
-      /* Swap failed with a window drawable. */
-      _eglError(EGL_BAD_NATIVE_WINDOW, __FUNCTION__);
-      return EGL_FALSE;
+      return dri2_x11_swap_buffers_msc(drv, disp, draw, 0, 0, 0) != -1;
   } else {
      assert(dri2_dpy->swrast);

--- a/src/egl/main/eglapi.c
+++ b/src/egl/main/eglapi.c
@@ -1555,14 +1555,8 @@ eglGetSyncAttrib(EGLDisplay dpy, EGLSync sync, EGLint attribute, EGLAttrib *valu
 static EGLBoolean EGLAPIENTRY
 eglGetSyncAttribKHR(EGLDisplay dpy, EGLSync sync, EGLint attribute, EGLint *value)
 {
-   EGLAttrib attrib;
-   EGLBoolean result;
-
-   if (!value)
-      RETURN_EGL_ERROR(NULL, EGL_BAD_PARAMETER, EGL_FALSE);
-
-   attrib = *value;
-   result = eglGetSyncAttrib(dpy, sync, attribute, &attrib);
+   EGLAttrib attrib = *value;
+   EGLBoolean result = eglGetSyncAttrib(dpy, sync, attribute, &attrib);

   /* The EGL_KHR_fence_sync spec says this about eglGetSyncAttribKHR:
    *
--- a/src/egl/main/eglsync.c
+++ b/src/egl/main/eglsync.c
@@ -144,6 +144,9 @@ EGLBoolean
 _eglGetSyncAttrib(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync,
                  EGLint attribute, EGLAttrib *value)
 {
+   if (!value)
+      return _eglError(EGL_BAD_PARAMETER, "eglGetSyncAttribKHR");
+
   switch (attribute) {
   case EGL_SYNC_TYPE_KHR:
      *value = sync->Type;
--- a/src/gallium/Android.mk
+++ b/src/gallium/Android.mk
@@ -83,11 +83,6 @@ ifneq ($(filter vc4, $(MESA_GPU_DRIVERS)),)
 SUBDIRS += winsys/vc4/drm drivers/vc4
 endif

-# virgl
-ifneq ($(filter virgl, $(MESA_GPU_DRIVERS)),)
-SUBDIRS += winsys/virgl/drm winsys/virgl/vtest drivers/virgl
-endif
-
 # vmwgfx
 ifneq ($(filter vmwgfx, $(MESA_GPU_DRIVERS)),)
 SUBDIRS += winsys/svga/drm drivers/svga
--- a/src/gallium/auxiliary/Makefile.am
+++ b/src/gallium/auxiliary/Makefile.am
@@ -1,37 +1,25 @@
 include Makefile.sources
 include $(top_srcdir)/src/gallium/Automake.inc

-noinst_LTLIBRARIES = libgallium_nir.la
+noinst_LTLIBRARIES = libgallium.la

 AM_CFLAGS = \
 	-I$(top_srcdir)/src/loader \
+	-I$(top_builddir)/src/glsl/nir \
 	-I$(top_srcdir)/src/gallium/auxiliary/util \
 	$(GALLIUM_CFLAGS) \
 	$(VISIBILITY_CFLAGS) \
-	$(MSVC2013_COMPAT_CFLAGS)
+	$(MSVC2008_COMPAT_CXXFLAGS)

 AM_CXXFLAGS = \
 	$(VISIBILITY_CXXFLAGS) \
-	$(MSVC2013_COMPAT_CXXFLAGS)
-
-libgallium_nir_la_SOURCES = \
-	$(NIR_SOURCES)
-
-libgallium_nir_la_CFLAGS = \
-	-I$(top_builddir)/src/compiler/nir \
-	$(GALLIUM_CFLAGS) \
-	$(VISIBILITY_CFLAGS) \
-	$(MSVC2013_COMPAT_CFLAGS)
-
-noinst_LTLIBRARIES += libgallium.la
+	$(MSVC2008_COMPAT_CXXFLAGS)

 libgallium_la_SOURCES = \
 	$(C_SOURCES) \
+	$(NIR_SOURCES) \
 	$(GENERATED_SOURCES)

-libgallium_la_LIBADD = \
-	libgallium_nir.la
-
 if HAVE_MESA_LLVM

 AM_CFLAGS += \
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -93,8 +93,6 @@ C_SOURCES := \
 	pipebuffer/pb_bufmgr_ondemand.c \
 	pipebuffer/pb_bufmgr_pool.c \
 	pipebuffer/pb_bufmgr_slab.c \
-	pipebuffer/pb_cache.c \
-	pipebuffer/pb_cache.h \
 	pipebuffer/pb_validate.c \
 	pipebuffer/pb_validate.h \
 	postprocess/filters.h \
@@ -191,13 +189,11 @@ C_SOURCES := \
 	util/u_cpu_detect.c \
 	util/u_cpu_detect.h \
 	util/u_debug.c \
-	util/u_debug.h \
 	util/u_debug_describe.c \
 	util/u_debug_describe.h \
 	util/u_debug_flush.c \
 	util/u_debug_flush.h \
-	util/u_debug_image.c \
-	util/u_debug_image.h \
+	util/u_debug.h \
 	util/u_debug_memory.c \
 	util/u_debug_refcnt.c \
 	util/u_debug_refcnt.h \
@@ -223,6 +219,8 @@ C_SOURCES := \
 	util/u_format.h \
 	util/u_format_etc.c \
 	util/u_format_etc.h \
+	util/u_format_fake.c \
+	util/u_format_fake.h \
 	util/u_format_latc.c \
 	util/u_format_latc.h \
 	util/u_format_other.c \
@@ -254,6 +252,7 @@ C_SOURCES := \
 	util/u_helpers.h \
 	util/u_index_modify.c \
 	util/u_index_modify.h \
+	util/u_init.h \
 	util/u_inlines.h \
 	util/u_keymap.c \
 	util/u_keymap.h \
@@ -273,7 +272,6 @@ C_SOURCES := \
 	util/u_prim_restart.h \
 	util/u_pstipple.c \
 	util/u_pstipple.h \
-	util/u_pwr8.h \
 	util/u_range.h \
 	util/u_rect.h \
 	util/u_resource.c \
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -10,7 +10,7 @@ env.Append(CPPPATH = [

 env = env.Clone()

-env.MSVC2013Compat()
+env.MSVC2008Compat()

 env.CodeGenerate(
    target = 'indices/u_indices_gen.c', 
--- a/src/gallium/auxiliary/cso_cache/cso_cache.c
+++ b/src/gallium/auxiliary/cso_cache/cso_cache.c
@@ -247,7 +247,7 @@ struct cso_cache *cso_cache_create(void)
 {
   struct cso_cache *sc = MALLOC_STRUCT(cso_cache);
   int i;
-   if (!sc)
+   if (sc == NULL)
      return NULL;

   sc->max_size           = 4096;
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -69,11 +69,8 @@ struct cso_context {

   boolean has_geometry_shader;
   boolean has_tessellation;
-   boolean has_compute_shader;
   boolean has_streamout;

-   unsigned saved_state;  /**< bitmask of CSO_BIT_x flags */
-
   struct pipe_sampler_view *fragment_views[PIPE_MAX_SHADER_SAMPLER_VIEWS];
   unsigned nr_fragment_views;

@@ -109,7 +106,6 @@ struct cso_context {
   void *geometry_shader, *geometry_shader_saved;
   void *tessctrl_shader, *tessctrl_shader_saved;
   void *tesseval_shader, *tesseval_shader_saved;
-   void *compute_shader;
   void *velements, *velements_saved;
   struct pipe_query *render_condition, *render_condition_saved;
   uint render_condition_mode, render_condition_mode_saved;
@@ -248,7 +244,7 @@ static void cso_init_vbuf(struct cso_context *cso)
 struct cso_context *cso_create_context( struct pipe_context *pipe )
 {
   struct cso_context *ctx = CALLOC_STRUCT(cso_context);
-   if (!ctx)
+   if (ctx == NULL)
      goto out;

   ctx->cache = cso_cache_create();
@@ -276,15 +272,6 @@ struct cso_context *cso_create_context( struct pipe_context *pipe )
                                PIPE_SHADER_CAP_MAX_INSTRUCTIONS) > 0) {
      ctx->has_tessellation = TRUE;
   }
-   if (pipe->screen->get_shader_param(pipe->screen, PIPE_SHADER_COMPUTE,
-                                      PIPE_SHADER_CAP_MAX_INSTRUCTIONS) > 0) {
-      int supported_irs =
-         pipe->screen->get_shader_param(pipe->screen, PIPE_SHADER_COMPUTE,
-                                        PIPE_SHADER_CAP_SUPPORTED_IRS);
-      if (supported_irs & (1 << PIPE_SHADER_IR_TGSI)) {
-         ctx->has_compute_shader = TRUE;
-      }
-   }
   if (pipe->screen->get_param(pipe->screen,
                               PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS) != 0) {
      ctx->has_streamout = TRUE;
@@ -346,10 +333,6 @@ void cso_destroy_context( struct cso_context *ctx )
         ctx->pipe->bind_tes_state(ctx->pipe, NULL);
         ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_TESS_EVAL, 0, NULL);
      }
-      if (ctx->has_compute_shader) {
-         ctx->pipe->bind_compute_state(ctx->pipe, NULL);
-         ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_COMPUTE, 0, NULL);
-      }
      ctx->pipe->bind_vertex_elements_state( ctx->pipe, NULL );

      if (ctx->has_streamout)
@@ -442,15 +425,13 @@ enum pipe_error cso_set_blend(struct cso_context *ctx,
   return PIPE_OK;
 }

-static void
-cso_save_blend(struct cso_context *ctx)
+void cso_save_blend(struct cso_context *ctx)
 {
   assert(!ctx->blend_saved);
   ctx->blend_saved = ctx->blend;
 }

-static void
-cso_restore_blend(struct cso_context *ctx)
+void cso_restore_blend(struct cso_context *ctx)
 {
   if (ctx->blend != ctx->blend_saved) {
      ctx->blend = ctx->blend_saved;
@@ -507,15 +488,13 @@ cso_set_depth_stencil_alpha(struct cso_context *ctx,
   return PIPE_OK;
 }

-static void
-cso_save_depth_stencil_alpha(struct cso_context *ctx)
+void cso_save_depth_stencil_alpha(struct cso_context *ctx)
 {
   assert(!ctx->depth_stencil_saved);
   ctx->depth_stencil_saved = ctx->depth_stencil;
 }

-static void
-cso_restore_depth_stencil_alpha(struct cso_context *ctx)
+void cso_restore_depth_stencil_alpha(struct cso_context *ctx)
 {
   if (ctx->depth_stencil != ctx->depth_stencil_saved) {
      ctx->depth_stencil = ctx->depth_stencil_saved;
@@ -568,15 +547,13 @@ enum pipe_error cso_set_rasterizer(struct cso_context *ctx,
   return PIPE_OK;
 }

-static void
-cso_save_rasterizer(struct cso_context *ctx)
+void cso_save_rasterizer(struct cso_context *ctx)
 {
   assert(!ctx->rasterizer_saved);
   ctx->rasterizer_saved = ctx->rasterizer;
 }

-static void
-cso_restore_rasterizer(struct cso_context *ctx)
+void cso_restore_rasterizer(struct cso_context *ctx)
 {
   if (ctx->rasterizer != ctx->rasterizer_saved) {
      ctx->rasterizer = ctx->rasterizer_saved;
@@ -604,15 +581,13 @@ void cso_delete_fragment_shader(struct cso_context *ctx, void *handle )
   ctx->pipe->delete_fs_state(ctx->pipe, handle);
 }

-static void
-cso_save_fragment_shader(struct cso_context *ctx)
+void cso_save_fragment_shader(struct cso_context *ctx)
 {
   assert(!ctx->fragment_shader_saved);
   ctx->fragment_shader_saved = ctx->fragment_shader;
 }

-static void
-cso_restore_fragment_shader(struct cso_context *ctx)
+void cso_restore_fragment_shader(struct cso_context *ctx)
 {
   if (ctx->fragment_shader_saved != ctx->fragment_shader) {
      ctx->pipe->bind_fs_state(ctx->pipe, ctx->fragment_shader_saved);
@@ -640,15 +615,13 @@ void cso_delete_vertex_shader(struct cso_context *ctx, void *handle )
   ctx->pipe->delete_vs_state(ctx->pipe, handle);
 }

-static void
-cso_save_vertex_shader(struct cso_context *ctx)
+void cso_save_vertex_shader(struct cso_context *ctx)
 {
   assert(!ctx->vertex_shader_saved);
   ctx->vertex_shader_saved = ctx->vertex_shader;
 }

-static void
-cso_restore_vertex_shader(struct cso_context *ctx)
+void cso_restore_vertex_shader(struct cso_context *ctx)
 {
   if (ctx->vertex_shader_saved != ctx->vertex_shader) {
      ctx->pipe->bind_vs_state(ctx->pipe, ctx->vertex_shader_saved);
@@ -667,14 +640,12 @@ void cso_set_framebuffer(struct cso_context *ctx,
   }
 }

-static void
-cso_save_framebuffer(struct cso_context *ctx)
+void cso_save_framebuffer(struct cso_context *ctx)
 {
   util_copy_framebuffer_state(&ctx->fb_saved, &ctx->fb);
 }

-static void
-cso_restore_framebuffer(struct cso_context *ctx)
+void cso_restore_framebuffer(struct cso_context *ctx)
 {
   if (memcmp(&ctx->fb, &ctx->fb_saved, sizeof(ctx->fb))) {
      util_copy_framebuffer_state(&ctx->fb, &ctx->fb_saved);
@@ -693,33 +664,13 @@ void cso_set_viewport(struct cso_context *ctx,
   }
 }

-/**
- * Setup viewport state for given width and height (position is always (0,0)).
- * Invert the Y axis if 'invert' is true.
- */
-void
-cso_set_viewport_dims(struct cso_context *ctx,
-                      float width, float height, boolean invert)
-{
-   struct pipe_viewport_state vp;
-   vp.scale[0] = width * 0.5f;
-   vp.scale[1] = height * (invert ? -0.5f : 0.5f);
-   vp.scale[2] = 0.5f;
-   vp.translate[0] = 0.5f * width;
-   vp.translate[1] = 0.5f * height;
-   vp.translate[2] = 0.5f;
-   cso_set_viewport(ctx, &vp);
-}
-
-static void
-cso_save_viewport(struct cso_context *ctx)
+void cso_save_viewport(struct cso_context *ctx)
 {
   ctx->vp_saved = ctx->vp;
 }


-static void
-cso_restore_viewport(struct cso_context *ctx)
+void cso_restore_viewport(struct cso_context *ctx)
 {
   if (memcmp(&ctx->vp, &ctx->vp_saved, sizeof(ctx->vp))) {
      ctx->vp = ctx->vp_saved;
@@ -745,14 +696,12 @@ void cso_set_sample_mask(struct cso_context *ctx, unsigned sample_mask)
   }
 }

-static void
-cso_save_sample_mask(struct cso_context *ctx)
+void cso_save_sample_mask(struct cso_context *ctx)
 {
   ctx->sample_mask_saved = ctx->sample_mask;
 }

-static void
-cso_restore_sample_mask(struct cso_context *ctx)
+void cso_restore_sample_mask(struct cso_context *ctx)
 {
   cso_set_sample_mask(ctx, ctx->sample_mask_saved);
 }
@@ -765,14 +714,12 @@ void cso_set_min_samples(struct cso_context *ctx, unsigned min_samples)
   }
 }

-static void
-cso_save_min_samples(struct cso_context *ctx)
+void cso_save_min_samples(struct cso_context *ctx)
 {
   ctx->min_samples_saved = ctx->min_samples;
 }

-static void
-cso_restore_min_samples(struct cso_context *ctx)
+void cso_restore_min_samples(struct cso_context *ctx)
 {
   cso_set_min_samples(ctx, ctx->min_samples_saved);
 }
@@ -786,15 +733,13 @@ void cso_set_stencil_ref(struct cso_context *ctx,
   }
 }

-static void
-cso_save_stencil_ref(struct cso_context *ctx)
+void cso_save_stencil_ref(struct cso_context *ctx)
 {
   ctx->stencil_ref_saved = ctx->stencil_ref;
 }


-static void
-cso_restore_stencil_ref(struct cso_context *ctx)
+void cso_restore_stencil_ref(struct cso_context *ctx)
 {
   if (memcmp(&ctx->stencil_ref, &ctx->stencil_ref_saved,
              sizeof(ctx->stencil_ref))) {
@@ -819,16 +764,14 @@ void cso_set_render_condition(struct cso_context *ctx,
   }
 }

-static void
-cso_save_render_condition(struct cso_context *ctx)
+void cso_save_render_condition(struct cso_context *ctx)
 {
   ctx->render_condition_saved = ctx->render_condition;
   ctx->render_condition_cond_saved = ctx->render_condition_cond;
   ctx->render_condition_mode_saved = ctx->render_condition_mode;
 }

-static void
-cso_restore_render_condition(struct cso_context *ctx)
+void cso_restore_render_condition(struct cso_context *ctx)
 {
   cso_set_render_condition(ctx, ctx->render_condition_saved,
                            ctx->render_condition_cond_saved,
@@ -855,8 +798,7 @@ void cso_delete_geometry_shader(struct cso_context *ctx, void *handle)
   ctx->pipe->delete_gs_state(ctx->pipe, handle);
 }

-static void
-cso_save_geometry_shader(struct cso_context *ctx)
+void cso_save_geometry_shader(struct cso_context *ctx)
 {
   if (!ctx->has_geometry_shader) {
      return;
@@ -866,8 +808,7 @@ cso_save_geometry_shader(struct cso_context *ctx)
   ctx->geometry_shader_saved = ctx->geometry_shader;
 }

-static void
-cso_restore_geometry_shader(struct cso_context *ctx)
+void cso_restore_geometry_shader(struct cso_context *ctx)
 {
   if (!ctx->has_geometry_shader) {
      return;
@@ -900,8 +841,7 @@ void cso_delete_tessctrl_shader(struct cso_context *ctx, void *handle)
   ctx->pipe->delete_tcs_state(ctx->pipe, handle);
 }

-static void
-cso_save_tessctrl_shader(struct cso_context *ctx)
+void cso_save_tessctrl_shader(struct cso_context *ctx)
 {
   if (!ctx->has_tessellation) {
      return;
@@ -911,8 +851,7 @@ cso_save_tessctrl_shader(struct cso_context *ctx)
   ctx->tessctrl_shader_saved = ctx->tessctrl_shader;
 }

-static void
-cso_restore_tessctrl_shader(struct cso_context *ctx)
+void cso_restore_tessctrl_shader(struct cso_context *ctx)
 {
   if (!ctx->has_tessellation) {
      return;
@@ -945,8 +884,7 @@ void cso_delete_tesseval_shader(struct cso_context *ctx, void *handle)
   ctx->pipe->delete_tes_state(ctx->pipe, handle);
 }

-static void
-cso_save_tesseval_shader(struct cso_context *ctx)
+void cso_save_tesseval_shader(struct cso_context *ctx)
 {
   if (!ctx->has_tessellation) {
      return;
@@ -956,8 +894,7 @@ cso_save_tesseval_shader(struct cso_context *ctx)
   ctx->tesseval_shader_saved = ctx->tesseval_shader;
 }

-static void
-cso_restore_tesseval_shader(struct cso_context *ctx)
+void cso_restore_tesseval_shader(struct cso_context *ctx)
 {
   if (!ctx->has_tessellation) {
      return;
@@ -970,26 +907,6 @@ cso_restore_tesseval_shader(struct cso_context *ctx)
   ctx->tesseval_shader_saved = NULL;
 }

-void cso_set_compute_shader_handle(struct cso_context *ctx, void *handle)
-{
-   assert(ctx->has_compute_shader || !handle);
-
-   if (ctx->has_compute_shader && ctx->compute_shader != handle) {
-      ctx->compute_shader = handle;
-      ctx->pipe->bind_compute_state(ctx->pipe, handle);
-   }
-}
-
-void cso_delete_compute_shader(struct cso_context *ctx, void *handle)
-{
-    if (handle == ctx->compute_shader) {
-      /* unbind before deleting */
-      ctx->pipe->bind_compute_state(ctx->pipe, NULL);
-      ctx->compute_shader = NULL;
-   }
-   ctx->pipe->delete_compute_state(ctx->pipe, handle);
-}
-
 enum pipe_error
 cso_set_vertex_elements(struct cso_context *ctx,
                        unsigned count,
@@ -1050,8 +967,7 @@ cso_set_vertex_elements(struct cso_context *ctx,
   return PIPE_OK;
 }

-static void
-cso_save_vertex_elements(struct cso_context *ctx)
+void cso_save_vertex_elements(struct cso_context *ctx)
 {
   struct u_vbuf *vbuf = ctx->vbuf;

@@ -1064,8 +980,7 @@ cso_save_vertex_elements(struct cso_context *ctx)
   ctx->velements_saved = ctx->velements;
 }

-static void
-cso_restore_vertex_elements(struct cso_context *ctx)
+void cso_restore_vertex_elements(struct cso_context *ctx)
 {
   struct u_vbuf *vbuf = ctx->vbuf;

@@ -1117,8 +1032,7 @@ void cso_set_vertex_buffers(struct cso_context *ctx,
   ctx->pipe->set_vertex_buffers(ctx->pipe, start_slot, count, buffers);
 }

-static void
-cso_save_aux_vertex_buffer_slot(struct cso_context *ctx)
+void cso_save_aux_vertex_buffer_slot(struct cso_context *ctx)
 {
   struct u_vbuf *vbuf = ctx->vbuf;

@@ -1133,8 +1047,7 @@ cso_save_aux_vertex_buffer_slot(struct cso_context *ctx)
          sizeof(struct pipe_vertex_buffer));
 }

-static void
-cso_restore_aux_vertex_buffer_slot(struct cso_context *ctx)
+void cso_restore_aux_vertex_buffer_slot(struct cso_context *ctx)
 {
   struct u_vbuf *vbuf = ctx->vbuf;

@@ -1162,7 +1075,7 @@ cso_single_sampler(struct cso_context *ctx, unsigned shader_stage,
 {
   void *handle = NULL;

-   if (templ) {
+   if (templ != NULL) {
      unsigned key_size = sizeof(struct pipe_sampler_state);
      unsigned hash_key = cso_construct_key((void*)templ, key_size);
      struct cso_hash_iter iter =
@@ -1252,7 +1165,7 @@ cso_set_samplers(struct cso_context *ctx,
   return error;
 }

-static void
+void
 cso_save_fragment_samplers(struct cso_context *ctx)
 {
   struct sampler_info *info = &ctx->samplers[PIPE_SHADER_FRAGMENT];
@@ -1263,7 +1176,7 @@ cso_save_fragment_samplers(struct cso_context *ctx)
 }


-static void
+void
 cso_restore_fragment_samplers(struct cso_context *ctx)
 {
   struct sampler_info *info = &ctx->samplers[PIPE_SHADER_FRAGMENT];
@@ -1310,7 +1223,7 @@ cso_set_sampler_views(struct cso_context *ctx,
 }


-static void
+void
 cso_save_fragment_sampler_views(struct cso_context *ctx)
 {
   unsigned i;
@@ -1325,7 +1238,7 @@ cso_save_fragment_sampler_views(struct cso_context *ctx)
 }


-static void
+void
 cso_restore_fragment_sampler_views(struct cso_context *ctx)
 {
   unsigned i, nr_saved = ctx->nr_fragment_views_saved;
@@ -1385,7 +1298,7 @@ cso_set_stream_outputs(struct cso_context *ctx,
   ctx->nr_so_targets = num_targets;
 }

-static void
+void
 cso_save_stream_outputs(struct cso_context *ctx)
 {
   uint i;
@@ -1402,7 +1315,7 @@ cso_save_stream_outputs(struct cso_context *ctx)
   }
 }

-static void
+void
 cso_restore_stream_outputs(struct cso_context *ctx)
 {
   struct pipe_context *pipe = ctx->pipe;
@@ -1489,113 +1402,6 @@ cso_restore_constant_buffer_slot0(struct cso_context *cso,
                           NULL);
 }

-
-/**
- * Save all the CSO state items specified by the state_mask bitmask
- * of CSO_BIT_x flags.
- */
-void
-cso_save_state(struct cso_context *cso, unsigned state_mask)
-{
-   assert(cso->saved_state == 0);
-
-   cso->saved_state = state_mask;
-
-   if (state_mask & CSO_BIT_AUX_VERTEX_BUFFER_SLOT)
-      cso_save_aux_vertex_buffer_slot(cso);
-   if (state_mask & CSO_BIT_BLEND)
-      cso_save_blend(cso);
-   if (state_mask & CSO_BIT_DEPTH_STENCIL_ALPHA)
-      cso_save_depth_stencil_alpha(cso);
-   if (state_mask & CSO_BIT_FRAGMENT_SAMPLERS)
-      cso_save_fragment_samplers(cso);
-   if (state_mask & CSO_BIT_FRAGMENT_SAMPLER_VIEWS)
-      cso_save_fragment_sampler_views(cso);
-   if (state_mask & CSO_BIT_FRAGMENT_SHADER)
-      cso_save_fragment_shader(cso);
-   if (state_mask & CSO_BIT_FRAMEBUFFER)
-      cso_save_framebuffer(cso);
-   if (state_mask & CSO_BIT_GEOMETRY_SHADER)
-      cso_save_geometry_shader(cso);
-   if (state_mask & CSO_BIT_MIN_SAMPLES)
-      cso_save_min_samples(cso);
-   if (state_mask & CSO_BIT_RASTERIZER)
-      cso_save_rasterizer(cso);
-   if (state_mask & CSO_BIT_RENDER_CONDITION)
-      cso_save_render_condition(cso);
-   if (state_mask & CSO_BIT_SAMPLE_MASK)
-      cso_save_sample_mask(cso);
-   if (state_mask & CSO_BIT_STENCIL_REF)
-      cso_save_stencil_ref(cso);
-   if (state_mask & CSO_BIT_STREAM_OUTPUTS)
-      cso_save_stream_outputs(cso);
-   if (state_mask & CSO_BIT_TESSCTRL_SHADER)
-      cso_save_tessctrl_shader(cso);
-   if (state_mask & CSO_BIT_TESSEVAL_SHADER)
-      cso_save_tesseval_shader(cso);
-   if (state_mask & CSO_BIT_VERTEX_ELEMENTS)
-      cso_save_vertex_elements(cso);
-   if (state_mask & CSO_BIT_VERTEX_SHADER)
-      cso_save_vertex_shader(cso);
-   if (state_mask & CSO_BIT_VIEWPORT)
-      cso_save_viewport(cso);
-}
-
-
-/**
- * Restore the state which was saved by cso_save_state().
- */
-void
-cso_restore_state(struct cso_context *cso)
-{
-   unsigned state_mask = cso->saved_state;
-
-   assert(state_mask);
-
-   if (state_mask & CSO_BIT_AUX_VERTEX_BUFFER_SLOT)
-      cso_restore_aux_vertex_buffer_slot(cso);
-   if (state_mask & CSO_BIT_BLEND)
-      cso_restore_blend(cso);
-   if (state_mask & CSO_BIT_DEPTH_STENCIL_ALPHA)
-      cso_restore_depth_stencil_alpha(cso);
-   if (state_mask & CSO_BIT_FRAGMENT_SAMPLERS)
-      cso_restore_fragment_samplers(cso);
-   if (state_mask & CSO_BIT_FRAGMENT_SAMPLER_VIEWS)
-      cso_restore_fragment_sampler_views(cso);
-   if (state_mask & CSO_BIT_FRAGMENT_SHADER)
-      cso_restore_fragment_shader(cso);
-   if (state_mask & CSO_BIT_FRAMEBUFFER)
-      cso_restore_framebuffer(cso);
-   if (state_mask & CSO_BIT_GEOMETRY_SHADER)
-      cso_restore_geometry_shader(cso);
-   if (state_mask & CSO_BIT_MIN_SAMPLES)
-      cso_restore_min_samples(cso);
-   if (state_mask & CSO_BIT_RASTERIZER)
-      cso_restore_rasterizer(cso);
-   if (state_mask & CSO_BIT_RENDER_CONDITION)
-      cso_restore_render_condition(cso);
-   if (state_mask & CSO_BIT_SAMPLE_MASK)
-      cso_restore_sample_mask(cso);
-   if (state_mask & CSO_BIT_STENCIL_REF)
-      cso_restore_stencil_ref(cso);
-   if (state_mask & CSO_BIT_STREAM_OUTPUTS)
-      cso_restore_stream_outputs(cso);
-   if (state_mask & CSO_BIT_TESSCTRL_SHADER)
-      cso_restore_tessctrl_shader(cso);
-   if (state_mask & CSO_BIT_TESSEVAL_SHADER)
-      cso_restore_tesseval_shader(cso);
-   if (state_mask & CSO_BIT_VERTEX_ELEMENTS)
-      cso_restore_vertex_elements(cso);
-   if (state_mask & CSO_BIT_VERTEX_SHADER)
-      cso_restore_vertex_shader(cso);
-   if (state_mask & CSO_BIT_VIEWPORT)
-      cso_restore_viewport(cso);
-
-   cso->saved_state = 0;
-}
-
-
-
 /* drawing */

 void
--- a/src/gallium/auxiliary/cso_cache/cso_context.h
+++ b/src/gallium/auxiliary/cso_cache/cso_context.h
@@ -47,15 +47,22 @@ void cso_destroy_context( struct cso_context *cso );

 enum pipe_error cso_set_blend( struct cso_context *cso,
                               const struct pipe_blend_state *blend );
+void cso_save_blend(struct cso_context *cso);
+void cso_restore_blend(struct cso_context *cso);
+


 enum pipe_error cso_set_depth_stencil_alpha( struct cso_context *cso,
                                             const struct pipe_depth_stencil_alpha_state *dsa );
+void cso_save_depth_stencil_alpha(struct cso_context *cso);
+void cso_restore_depth_stencil_alpha(struct cso_context *cso);



 enum pipe_error cso_set_rasterizer( struct cso_context *cso,
                                    const struct pipe_rasterizer_state *rasterizer );
+void cso_save_rasterizer(struct cso_context *cso);
+void cso_restore_rasterizer(struct cso_context *cso);


 enum pipe_error
@@ -64,6 +71,11 @@ cso_set_samplers(struct cso_context *cso,
                 unsigned count,
                 const struct pipe_sampler_state **states);

+void
+cso_save_fragment_samplers(struct cso_context *cso);
+
+void
+cso_restore_fragment_samplers(struct cso_context *cso);

 /* Alternate interface to support state trackers that like to modify
 * samplers one at a time:
@@ -79,6 +91,9 @@ cso_single_sampler_done(struct cso_context *cso, unsigned shader_stage);
 enum pipe_error cso_set_vertex_elements(struct cso_context *ctx,
                                        unsigned count,
                                        const struct pipe_vertex_element *states);
+void cso_save_vertex_elements(struct cso_context *ctx);
+void cso_restore_vertex_elements(struct cso_context *ctx);
+

 void cso_set_vertex_buffers(struct cso_context *ctx,
                            unsigned start_slot, unsigned count,
@@ -86,6 +101,8 @@ void cso_set_vertex_buffers(struct cso_context *ctx,

 /* One vertex buffer slot is provided with the save/restore functionality.
 * cso_context chooses the slot, it can be non-zero. */
+void cso_save_aux_vertex_buffer_slot(struct cso_context *ctx);
+void cso_restore_aux_vertex_buffer_slot(struct cso_context *ctx);
 unsigned cso_get_aux_vertex_buffer_slot(struct cso_context *ctx);


@@ -93,6 +110,8 @@ void cso_set_stream_outputs(struct cso_context *ctx,
                            unsigned num_targets,
                            struct pipe_stream_output_target **targets,
                            const unsigned *offsets);
+void cso_save_stream_outputs(struct cso_context *ctx);
+void cso_restore_stream_outputs(struct cso_context *ctx);


 /*
@@ -104,81 +123,67 @@ void cso_set_stream_outputs(struct cso_context *ctx,

 void cso_set_fragment_shader_handle(struct cso_context *ctx, void *handle);
 void cso_delete_fragment_shader(struct cso_context *ctx, void *handle );
+void cso_save_fragment_shader(struct cso_context *cso);
+void cso_restore_fragment_shader(struct cso_context *cso);


 void cso_set_vertex_shader_handle(struct cso_context *ctx, void *handle);
 void cso_delete_vertex_shader(struct cso_context *ctx, void *handle );
+void cso_save_vertex_shader(struct cso_context *cso);
+void cso_restore_vertex_shader(struct cso_context *cso);


 void cso_set_geometry_shader_handle(struct cso_context *ctx, void *handle);
 void cso_delete_geometry_shader(struct cso_context *ctx, void *handle);
+void cso_save_geometry_shader(struct cso_context *cso);
+void cso_restore_geometry_shader(struct cso_context *cso);


 void cso_set_tessctrl_shader_handle(struct cso_context *ctx, void *handle);
 void cso_delete_tessctrl_shader(struct cso_context *ctx, void *handle);
+void cso_save_tessctrl_shader(struct cso_context *cso);
+void cso_restore_tessctrl_shader(struct cso_context *cso);


 void cso_set_tesseval_shader_handle(struct cso_context *ctx, void *handle);
 void cso_delete_tesseval_shader(struct cso_context *ctx, void *handle);
-
-
-void cso_set_compute_shader_handle(struct cso_context *ctx, void *handle);
-void cso_delete_compute_shader(struct cso_context *ctx, void *handle);
+void cso_save_tesseval_shader(struct cso_context *cso);
+void cso_restore_tesseval_shader(struct cso_context *cso);


 void cso_set_framebuffer(struct cso_context *cso,
                         const struct pipe_framebuffer_state *fb);
+void cso_save_framebuffer(struct cso_context *cso);
+void cso_restore_framebuffer(struct cso_context *cso);


 void cso_set_viewport(struct cso_context *cso,
                      const struct pipe_viewport_state *vp);
-void cso_set_viewport_dims(struct cso_context *ctx,
-                           float width, float height, boolean invert);
+void cso_save_viewport(struct cso_context *cso);
+void cso_restore_viewport(struct cso_context *cso);


 void cso_set_blend_color(struct cso_context *cso,
                         const struct pipe_blend_color *bc);

 void cso_set_sample_mask(struct cso_context *cso, unsigned stencil_mask);
+void cso_save_sample_mask(struct cso_context *ctx);
+void cso_restore_sample_mask(struct cso_context *ctx);

 void cso_set_min_samples(struct cso_context *cso, unsigned min_samples);
+void cso_save_min_samples(struct cso_context *ctx);
+void cso_restore_min_samples(struct cso_context *ctx);

 void cso_set_stencil_ref(struct cso_context *cso,
                         const struct pipe_stencil_ref *sr);
+void cso_save_stencil_ref(struct cso_context *cso);
+void cso_restore_stencil_ref(struct cso_context *cso);

 void cso_set_render_condition(struct cso_context *cso,
                              struct pipe_query *query,
                              boolean condition, uint mode);
-
-
-#define CSO_BIT_AUX_VERTEX_BUFFER_SLOT    0x1
-#define CSO_BIT_BLEND                     0x2
-#define CSO_BIT_DEPTH_STENCIL_ALPHA       0x4
-#define CSO_BIT_FRAGMENT_SAMPLERS         0x8
-#define CSO_BIT_FRAGMENT_SAMPLER_VIEWS   0x10
-#define CSO_BIT_FRAGMENT_SHADER          0x20
-#define CSO_BIT_FRAMEBUFFER              0x40
-#define CSO_BIT_GEOMETRY_SHADER          0x80
-#define CSO_BIT_MIN_SAMPLES             0x100
-#define CSO_BIT_RASTERIZER              0x200
-#define CSO_BIT_RENDER_CONDITION        0x400
-#define CSO_BIT_SAMPLE_MASK             0x800
-#define CSO_BIT_STENCIL_REF            0x1000
-#define CSO_BIT_STREAM_OUTPUTS         0x2000
-#define CSO_BIT_TESSCTRL_SHADER        0x4000
-#define CSO_BIT_TESSEVAL_SHADER        0x8000
-#define CSO_BIT_VERTEX_ELEMENTS       0x10000
-#define CSO_BIT_VERTEX_SHADER         0x20000
-#define CSO_BIT_VIEWPORT              0x40000
-
-#define CSO_BITS_ALL_SHADERS (CSO_BIT_VERTEX_SHADER | \
-                              CSO_BIT_FRAGMENT_SHADER | \
-                              CSO_BIT_GEOMETRY_SHADER | \
-                              CSO_BIT_TESSCTRL_SHADER | \
-                              CSO_BIT_TESSEVAL_SHADER)
-
-void cso_save_state(struct cso_context *cso, unsigned state_mask);
-void cso_restore_state(struct cso_context *cso);
+void cso_save_render_condition(struct cso_context *cso);
+void cso_restore_render_condition(struct cso_context *cso);


 /* sampler view state */
@@ -189,6 +194,12 @@ cso_set_sampler_views(struct cso_context *cso,
                      unsigned count,
                      struct pipe_sampler_view **views);

+void
+cso_save_fragment_sampler_views(struct cso_context *ctx);
+
+void
+cso_restore_fragment_sampler_views(struct cso_context *ctx);
+

 /* constant buffers */

@@ -219,6 +230,7 @@ cso_draw_arrays_instanced(struct cso_context *cso, uint mode,
                          uint start, uint count,
                          uint start_instance, uint instance_count);

+/* helper drawing function */
 void
 cso_draw_arrays(struct cso_context *cso, uint mode, uint start, uint count);

--- a/src/gallium/auxiliary/draw/draw_cliptest_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_cliptest_tmp.h
@@ -91,34 +91,34 @@ static boolean TAG(do_cliptest)( struct pt_post_vs *pvs,
         }

         for (i = 0; i < 4; i++) {
-            out->clip_pos[i] = position[i];
+            out->clip[i] = clipvertex[i];
+            out->pre_clip_pos[i] = position[i];
         }

-         /* Be careful with NaNs. Comparisons must be true for them. */
         /* Do the hardwired planes first:
          */
         if (flags & DO_CLIP_XY_GUARD_BAND) {
-            if (!(-0.50 * position[0] + position[3] >= 0)) mask |= (1<<0);
-            if (!( 0.50 * position[0] + position[3] >= 0)) mask |= (1<<1);
-            if (!(-0.50 * position[1] + position[3] >= 0)) mask |= (1<<2);
-            if (!( 0.50 * position[1] + position[3] >= 0)) mask |= (1<<3);
+            if (-0.50 * position[0] + position[3] < 0) mask |= (1<<0);
+            if ( 0.50 * position[0] + position[3] < 0) mask |= (1<<1);
+            if (-0.50 * position[1] + position[3] < 0) mask |= (1<<2);
+            if ( 0.50 * position[1] + position[3] < 0) mask |= (1<<3);
         }
         else if (flags & DO_CLIP_XY) {
-            if (!(-position[0] + position[3] >= 0)) mask |= (1<<0);
-            if (!( position[0] + position[3] >= 0)) mask |= (1<<1);
-            if (!(-position[1] + position[3] >= 0)) mask |= (1<<2);
-            if (!( position[1] + position[3] >= 0)) mask |= (1<<3);
+            if (-position[0] + position[3] < 0) mask |= (1<<0);
+            if ( position[0] + position[3] < 0) mask |= (1<<1);
+            if (-position[1] + position[3] < 0) mask |= (1<<2);
+            if ( position[1] + position[3] < 0) mask |= (1<<3);
         }

         /* Clip Z planes according to full cube, half cube or none.
          */
         if (flags & DO_CLIP_FULL_Z) {
-            if (!( position[2] + position[3] >= 0)) mask |= (1<<4);
-            if (!(-position[2] + position[3] >= 0)) mask |= (1<<5);
+            if ( position[2] + position[3] < 0) mask |= (1<<4);
+            if (-position[2] + position[3] < 0) mask |= (1<<5);
         }
         else if (flags & DO_CLIP_HALF_Z) {
-            if (!( position[2]               >= 0)) mask |= (1<<4);
-            if (!(-position[2] + position[3] >= 0)) mask |= (1<<5);
+            if ( position[2]               < 0) mask |= (1<<4);
+            if (-position[2] + position[3] < 0) mask |= (1<<5);
         }

         if (flags & DO_CLIP_USER) {
@@ -137,6 +137,7 @@ static boolean TAG(do_cliptest)( struct pt_post_vs *pvs,
               if (have_cd && num_written_clipdistance) {
                  float clipdist;
                  i = plane_idx - 6;
+                  out->have_clipdist = 1;
                  /* first four clip distance in first vector etc. */
                  if (i < 4)
                     clipdist = out->data[cd[0]][i];
@@ -145,7 +146,7 @@ static boolean TAG(do_cliptest)( struct pt_post_vs *pvs,
                  if (clipdist < 0 || util_is_inf_or_nan(clipdist))
                     mask |= 1 << plane_idx;
               } else {
-                  if (!(dot4(clipvertex, plane[plane_idx]) >= 0))
+                  if (dot4(clipvertex, plane[plane_idx]) < 0)
                     mask |= 1 << plane_idx;
               }
            }
@@ -191,6 +192,7 @@ static boolean TAG(do_cliptest)( struct pt_post_vs *pvs,

      out = (struct vertex_header *)( (char *)out + info->stride );
   }
+
   return need_pipeline != 0;
 }

--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -72,7 +72,7 @@ draw_create_context(struct pipe_context *pipe, void *context,
                    boolean try_llvm)
 {
   struct draw_context *draw = CALLOC_STRUCT( draw_context );
-   if (!draw)
+   if (draw == NULL)
      goto err_out;

   /* we need correct cpu caps for disabling denorms in draw_vbo() */
--- a/src/gallium/auxiliary/draw/draw_gs.c
+++ b/src/gallium/auxiliary/draw/draw_gs.c
@@ -734,7 +734,7 @@ draw_create_geometry_shader(struct draw_context *draw,
   if (use_llvm) {
      llvm_gs = CALLOC_STRUCT(llvm_geometry_shader);

-      if (!llvm_gs)
+      if (llvm_gs == NULL)
         return NULL;

      gs = &llvm_gs->base;
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -188,7 +188,6 @@ create_jit_sampler_type(struct gallivm_state *gallivm, const char *struct_name)
   sampler_type = LLVMStructTypeInContext(gallivm->context, elem_types,
                                          Elements(elem_types), 0);

-   (void) target; /* silence unused var warning for non-debug build */
   LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, min_lod,
                          target, sampler_type,
                          DRAW_JIT_SAMPLER_MIN_LOD);
@@ -235,8 +234,6 @@ create_jit_context_type(struct gallivm_state *gallivm,
                                 PIPE_MAX_SAMPLERS); /* samplers */
   context_type = LLVMStructTypeInContext(gallivm->context, elem_types,
                                          Elements(elem_types), 0);
-
-   (void) target; /* silence unused var warning for non-debug build */
   LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants,
                          target, context_type, DRAW_JIT_CTX_CONSTANTS);
   LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, num_vs_constants,
@@ -378,14 +375,15 @@ static LLVMTypeRef
 create_jit_vertex_header(struct gallivm_state *gallivm, int data_elems)
 {
   LLVMTargetDataRef target = gallivm->target;
-   LLVMTypeRef elem_types[3];
+   LLVMTypeRef elem_types[4];
   LLVMTypeRef vertex_header;
   char struct_name[24];

   util_snprintf(struct_name, 23, "vertex_header%d", data_elems);

   elem_types[DRAW_JIT_VERTEX_VERTEX_ID]  = LLVMIntTypeInContext(gallivm->context, 32);
-   elem_types[DRAW_JIT_VERTEX_CLIP_POS]  = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
+   elem_types[DRAW_JIT_VERTEX_CLIP]  = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
+   elem_types[DRAW_JIT_VERTEX_PRE_CLIP_POS]  = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
   elem_types[DRAW_JIT_VERTEX_DATA]  = LLVMArrayType(elem_types[1], data_elems);

   vertex_header = LLVMStructTypeInContext(gallivm->context, elem_types,
@@ -405,10 +403,12 @@ create_jit_vertex_header(struct gallivm_state *gallivm, int data_elems)
      target, vertex_header,
      DRAW_JIT_VERTEX_VERTEX_ID);
   */
-   (void) target; /* silence unused var warning for non-debug build */
-   LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip_pos,
+   LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip,
                          target, vertex_header,
-                          DRAW_JIT_VERTEX_CLIP_POS);
+                          DRAW_JIT_VERTEX_CLIP);
+   LP_CHECK_MEMBER_OFFSET(struct vertex_header, pre_clip_pos,
+                          target, vertex_header,
+                          DRAW_JIT_VERTEX_PRE_CLIP_POS);
   LP_CHECK_MEMBER_OFFSET(struct vertex_header, data,
                          target, vertex_header,
                          DRAW_JIT_VERTEX_DATA);
@@ -551,7 +551,7 @@ draw_llvm_create_variant(struct draw_llvm *llvm,
   variant = MALLOC(sizeof *variant +
                    shader->variant_key_size -
                    sizeof variant->key);
-   if (!variant)
+   if (variant == NULL)
      return NULL;

   variant->llvm = llvm;
@@ -826,7 +826,7 @@ store_aos(struct gallivm_state *gallivm,
 * struct vertex_header {
 *    unsigned clipmask:DRAW_TOTAL_CLIP_PLANES;
 *    unsigned edgeflag:1;
- *    unsigned pad:1;
+ *    unsigned have_clipdist:1;
 *    unsigned vertex_id:16;
 *    [...]
 * }
@@ -838,7 +838,7 @@ store_aos(struct gallivm_state *gallivm,
 * {
 *   return (x >> 16) |              // vertex_id
 *          ((x & 0x3fff) << 18) |   // clipmask
- *          ((x & 0x4000) << 3) |    // pad
+ *          ((x & 0x4000) << 3) |    // have_clipdist
 *          ((x & 0x8000) << 1);     // edgeflag
 * }
 */
@@ -850,23 +850,19 @@ adjust_mask(struct gallivm_state *gallivm,
   LLVMBuilderRef builder = gallivm->builder;
   LLVMValueRef vertex_id;
   LLVMValueRef clipmask;
-   LLVMValueRef pad;
+   LLVMValueRef have_clipdist;
   LLVMValueRef edgeflag;

   vertex_id = LLVMBuildLShr(builder, mask, lp_build_const_int32(gallivm, 16), "");
   clipmask  = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x3fff), "");
   clipmask  = LLVMBuildShl(builder, clipmask, lp_build_const_int32(gallivm, 18), "");
-   if (0) {
-      pad = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x4000), "");
-      pad = LLVMBuildShl(builder, pad, lp_build_const_int32(gallivm, 3), "");
-   }
+   have_clipdist = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x4000), "");
+   have_clipdist = LLVMBuildShl(builder, have_clipdist, lp_build_const_int32(gallivm, 3), "");
   edgeflag = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x8000), "");
   edgeflag = LLVMBuildShl(builder, edgeflag, lp_build_const_int32(gallivm, 1), "");

   mask = LLVMBuildOr(builder, vertex_id, clipmask, "");
-   if (0) {
-      mask = LLVMBuildOr(builder, mask, pad, "");
-   }
+   mask = LLVMBuildOr(builder, mask, have_clipdist, "");
   mask = LLVMBuildOr(builder, mask, edgeflag, "");
 #endif
   return mask;
@@ -881,7 +877,7 @@ store_aos_array(struct gallivm_state *gallivm,
                int attrib,
                int num_outputs,
                LLVMValueRef clipmask,
-                boolean need_edgeflag)
+                boolean have_clipdist)
 {
   LLVMBuilderRef builder = gallivm->builder;
   LLVMValueRef attr_index = lp_build_const_int32(gallivm, attrib);
@@ -912,15 +908,11 @@ store_aos_array(struct gallivm_state *gallivm,
       * code here.  See struct vertex_header in draw_private.h.
       */
      assert(DRAW_TOTAL_CLIP_PLANES==14);
-      /* initialize vertex id:16 = 0xffff, pad:1 = 0, edgeflag:1 = 1 */
-      if (!need_edgeflag) {
-         vertex_id_pad_edgeflag = (0xffff << 16) | (1 << DRAW_TOTAL_CLIP_PLANES);
-      }
-      else {
-         vertex_id_pad_edgeflag = (0xffff << 16);
-      }
-      val = lp_build_const_int_vec(gallivm, lp_int_type(soa_type),
-                                   vertex_id_pad_edgeflag);
+      /* initialize vertex id:16 = 0xffff, have_clipdist:1 = 0, edgeflag:1 = 1 */
+      vertex_id_pad_edgeflag = (0xffff << 16) | (1 << DRAW_TOTAL_CLIP_PLANES);
+      if (have_clipdist)
+         vertex_id_pad_edgeflag |= 1 << (DRAW_TOTAL_CLIP_PLANES+1);
+      val = lp_build_const_int_vec(gallivm, lp_int_type(soa_type), vertex_id_pad_edgeflag);
      /* OR with the clipmask */
      cliptmp = LLVMBuildOr(builder, val, clipmask, "");
      for (i = 0; i < vector_length; i++) {
@@ -950,7 +942,7 @@ convert_to_aos(struct gallivm_state *gallivm,
               LLVMValueRef clipmask,
               int num_outputs,
               struct lp_type soa_type,
-               boolean need_edgeflag)
+               boolean have_clipdist)
 {
   LLVMBuilderRef builder = gallivm->builder;
   unsigned chan, attrib, i;
@@ -1006,8 +998,7 @@ convert_to_aos(struct gallivm_state *gallivm,
                      aos,
                      attrib,
                      num_outputs,
-                      clipmask,
-                      need_edgeflag);
+                      clipmask, have_clipdist);
   }
 #if DEBUG_STORE
   lp_build_printf(gallivm, "   # storing end\n");
@@ -1023,7 +1014,7 @@ store_clip(struct gallivm_state *gallivm,
           const struct lp_type vs_type,
           LLVMValueRef io_ptr,
           LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
-           int idx)
+           boolean pre_clip_pos, int idx)
 {
   LLVMBuilderRef builder = gallivm->builder;
   LLVMValueRef soa[4];
@@ -1050,8 +1041,14 @@ store_clip(struct gallivm_state *gallivm,
   soa[2] = LLVMBuildLoad(builder, outputs[idx][2], ""); /*z0 z1 .. zn*/
   soa[3] = LLVMBuildLoad(builder, outputs[idx][3], ""); /*w0 w1 .. wn*/

-   for (i = 0; i < vs_type.length; i++) {
-      clip_ptrs[i] = draw_jit_header_clip_pos(gallivm, io_ptrs[i]);
+   if (!pre_clip_pos) {
+      for (i = 0; i < vs_type.length; i++) {
+         clip_ptrs[i] = draw_jit_header_clip(gallivm, io_ptrs[i]);
+      }
+   } else {
+      for (i = 0; i < vs_type.length; i++) {
+         clip_ptrs[i] = draw_jit_header_pre_clip_pos(gallivm, io_ptrs[i]);
+      }
   }

   lp_build_transpose_aos(gallivm, vs_type, soa, soa);
@@ -1143,7 +1140,11 @@ generate_clipmask(struct draw_llvm *llvm,
                  struct gallivm_state *gallivm,
                  struct lp_type vs_type,
                  LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
-                  struct draw_llvm_variant_key *key,
+                  boolean clip_xy,
+                  boolean clip_z,
+                  boolean clip_user,
+                  boolean clip_halfz,
+                  unsigned ucp_enable,
                  LLVMValueRef context_ptr,
                  boolean *have_clipdist)
 {
@@ -1159,9 +1160,7 @@ generate_clipmask(struct draw_llvm *llvm,
   const unsigned pos = llvm->draw->vs.position_output;
   const unsigned cv = llvm->draw->vs.clipvertex_output;
   int num_written_clipdistance = llvm->draw->vs.vertex_shader->info.num_written_clipdistance;
-   boolean have_cd = false;
-   boolean clip_user = key->clip_user;
-   unsigned ucp_enable = key->ucp_enable;
+   bool have_cd = false;
   unsigned cd[2];

   cd[0] = llvm->draw->vs.clipdistance_output[0];
@@ -1201,16 +1200,8 @@ generate_clipmask(struct draw_llvm *llvm,
      cv_w = pos_w;
   }

-   /*
-    * Be careful with the comparisons and NaNs (using llvm's unordered
-    * comparisons here).
-    */
   /* Cliptest, for hardwired planes */
-   /*
-    * XXX should take guardband into account (currently not in key).
-    * Otherwise might run the draw pipeline stages for nothing.
-    */
-   if (key->clip_xy) {
+   if (clip_xy) {
      /* plane 1 */
      test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w);
      temp = shift;
@@ -1238,9 +1229,9 @@ generate_clipmask(struct draw_llvm *llvm,
      mask = LLVMBuildOr(builder, mask, test, "");
   }

-   if (key->clip_z) {
+   if (clip_z) {
      temp = lp_build_const_int_vec(gallivm, i32_type, 16);
-      if (key->clip_halfz) {
+      if (clip_halfz) {
         /* plane 5 */
         test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, pos_z);
         test = LLVMBuildAnd(builder, test, temp, "");
@@ -1327,20 +1318,6 @@ generate_clipmask(struct draw_llvm *llvm,
         }
      }
   }
-   if (key->need_edgeflags) {
-      /*
-       * This isn't really part of clipmask but stored the same in vertex
-       * header later, so do it here.
-       */
-      unsigned edge_attr = llvm->draw->vs.edgeflag_output;
-      LLVMValueRef one = lp_build_const_vec(gallivm, f32_type, 1.0);
-      LLVMValueRef edgeflag = LLVMBuildLoad(builder, outputs[edge_attr][0], "");
-      test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_EQUAL, one, edgeflag);
-      temp = lp_build_const_int_vec(gallivm, i32_type,
-                                    1LL << DRAW_TOTAL_CLIP_PLANES);
-      test = LLVMBuildAnd(builder, test, temp, "");
-      mask = LLVMBuildOr(builder, mask, test, "");
-   }
   return mask;
 }

@@ -1352,8 +1329,7 @@ generate_clipmask(struct draw_llvm *llvm,
 static LLVMValueRef
 clipmask_booli32(struct gallivm_state *gallivm,
                 const struct lp_type vs_type,
-                 LLVMValueRef clipmask_bool_ptr,
-                 boolean edgeflag_in_clipmask)
+                 LLVMValueRef clipmask_bool_ptr)
 {
   LLVMBuilderRef builder = gallivm->builder;
   LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
@@ -1363,18 +1339,8 @@ clipmask_booli32(struct gallivm_state *gallivm,
   int i;

   /*
-    * We need to invert the edgeflag bit from the clipmask here
-    * (because the result is really if we want to run the pipeline or not
-    * and we (may) need it if edgeflag was 0).
-    */
-   if (edgeflag_in_clipmask) {
-      struct lp_type i32_type = lp_int_type(vs_type);
-      LLVMValueRef edge = lp_build_const_int_vec(gallivm, i32_type,
-                                                 1LL << DRAW_TOTAL_CLIP_PLANES);
-      clipmask_bool = LLVMBuildXor(builder, clipmask_bool, edge, "");
-   }
-   /*
-    * Could do much better with just cmp/movmskps.
+    * Can do this with log2(vector length) pack instructions and one extract
+    * (as we don't actually need a or) with sse2 which would be way better.
    */
   for (i=0; i < vs_type.length; i++) {
      temp = LLVMBuildExtractElement(builder, clipmask_bool,
@@ -1570,9 +1536,8 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
   const boolean bypass_viewport = key->has_gs || key->bypass_viewport ||
                                   llvm->draw->vs.vertex_shader->info.writes_viewport_index;
   const boolean enable_cliptest = !key->has_gs && (key->clip_xy ||
-                                                    key->clip_z ||
-                                                    key->clip_user ||
-                                                    key->need_edgeflags);
+                                                    key->clip_z  ||
+                                                    key->clip_user);
   LLVMValueRef variant_func;
   const unsigned pos = llvm->draw->vs.position_output;
   const unsigned cv = llvm->draw->vs.clipvertex_output;
@@ -1618,12 +1583,6 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
   context_ptr               = LLVMGetParam(variant_func, 0);
   io_ptr                    = LLVMGetParam(variant_func, 1);
   vbuffers_ptr              = LLVMGetParam(variant_func, 2);
-   /*
-    * XXX: stride is actually unused. The stride we use is strictly calculated
-    * from the number of outputs (including the draw_extra outputs).
-    * Should probably fix some day (we need a new vs just because of extra
-    * outputs which the generated vs won't touch).
-    */
   stride                    = LLVMGetParam(variant_func, 5 + (elts ? 1 : 0));
   vb_ptr                    = LLVMGetParam(variant_func, 6 + (elts ? 1 : 0));
   system_values.instance_id = LLVMGetParam(variant_func, 7 + (elts ? 1 : 0));
@@ -1807,7 +1766,8 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,

      if (pos != -1 && cv != -1) {
         /* store original positions in clip before further manipulation */
-         store_clip(gallivm, vs_type, io, outputs, pos);
+         store_clip(gallivm, vs_type, io, outputs, FALSE, key->clip_user ? cv : pos);
+         store_clip(gallivm, vs_type, io, outputs, TRUE, pos);

         /* do cliptest */
         if (enable_cliptest) {
@@ -1817,7 +1777,11 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
                                         gallivm,
                                         vs_type,
                                         outputs,
-                                         key,
+                                         key->clip_xy,
+                                         key->clip_z,
+                                         key->clip_user,
+                                         key->clip_halfz,
+                                         key->ucp_enable,
                                         context_ptr, &have_clipdist);
            temp = LLVMBuildOr(builder, clipmask, temp, "");
            /* store temporary clipping boolean value */
@@ -1842,15 +1806,14 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
       */
      convert_to_aos(gallivm, io, NULL, outputs, clipmask,
                     vs_info->num_outputs, vs_type,
-                     enable_cliptest && key->need_edgeflags);
+                     have_clipdist);
   }
   lp_build_loop_end_cond(&lp_loop, count, step, LLVMIntUGE);

   sampler->destroy(sampler);

   /* return clipping boolean value for function */
-   ret = clipmask_booli32(gallivm, vs_type, clipmask_bool_ptr,
-                          enable_cliptest && key->need_edgeflags);
+   ret = clipmask_booli32(gallivm, vs_type, clipmask_bool_ptr);

   LLVMBuildRet(builder, ret);

@@ -1867,8 +1830,6 @@ draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store)

   key = (struct draw_llvm_variant_key *)store;

-   memset(key, 0, offsetof(struct draw_llvm_variant_key, vertex_element[0]));
-
   key->clamp_vertex_color = llvm->draw->rasterizer->clamp_vertex_color; /**/

   /* Presumably all variants of the shader should have the same
@@ -1886,11 +1847,11 @@ draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
   key->clip_user = llvm->draw->clip_user;
   key->bypass_viewport = llvm->draw->bypass_viewport;
   key->clip_halfz = llvm->draw->rasterizer->clip_halfz;
-   /* XXX assumes edgeflag output not at 0 */
   key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE);
   key->ucp_enable = llvm->draw->rasterizer->clip_plane_enable;
   key->has_gs = llvm->draw->gs.geometry_shader != NULL;
   key->num_outputs = draw_total_vs_outputs(llvm->draw);
+   key->pad1 = 0;

   /* All variants of this shader will have the same value for
    * nr_samplers.  Not yet trying to compact away holes in the
@@ -2263,7 +2224,7 @@ draw_gs_llvm_create_variant(struct draw_llvm *llvm,
   variant = MALLOC(sizeof *variant +
                    shader->variant_key_size -
                    sizeof variant->key);
-   if (!variant)
+   if (variant == NULL)
      return NULL;

   variant->llvm = llvm;
@@ -2322,8 +2283,6 @@ draw_gs_llvm_make_variant_key(struct draw_llvm *llvm, char *store)

   key = (struct draw_gs_llvm_variant_key *)store;

-   memset(key, 0, offsetof(struct draw_gs_llvm_variant_key, samplers[0]));
-
   key->num_outputs = draw_total_gs_outputs(llvm->draw);

   /* All variants of this shader will have the same value for
--- a/src/gallium/auxiliary/draw/draw_llvm.h
+++ b/src/gallium/auxiliary/draw/draw_llvm.h
@@ -104,7 +104,8 @@ enum {

 enum {
   DRAW_JIT_VERTEX_VERTEX_ID = 0,
-   DRAW_JIT_VERTEX_CLIP_POS,
+   DRAW_JIT_VERTEX_CLIP,
+   DRAW_JIT_VERTEX_PRE_CLIP_POS,
   DRAW_JIT_VERTEX_DATA
 };

@@ -161,8 +162,11 @@ enum {
 #define draw_jit_header_id(_gallivm, _ptr)              \
   lp_build_struct_get_ptr(_gallivm, _ptr, DRAW_JIT_VERTEX_VERTEX_ID, "id")

-#define draw_jit_header_clip_pos(_gallivm, _ptr) \
-   lp_build_struct_get_ptr(_gallivm, _ptr, DRAW_JIT_VERTEX_CLIP_POS, "clip_pos")
+#define draw_jit_header_clip(_gallivm, _ptr) \
+   lp_build_struct_get_ptr(_gallivm, _ptr, DRAW_JIT_VERTEX_CLIP, "clip")
+
+#define draw_jit_header_pre_clip_pos(_gallivm, _ptr) \
+   lp_build_struct_get_ptr(_gallivm, _ptr, DRAW_JIT_VERTEX_PRE_CLIP_POS, "pre_clip_pos")

 #define draw_jit_header_data(_gallivm, _ptr)            \
   lp_build_struct_get_ptr(_gallivm, _ptr, DRAW_JIT_VERTEX_DATA, "data")
@@ -311,8 +315,12 @@ struct draw_llvm_variant_key
   unsigned need_edgeflags:1;
   unsigned has_gs:1;
   unsigned num_outputs:8;
+   /*
+    * it is important there are no holes in this struct
+    * (and all padding gets zeroed).
+    */
   unsigned ucp_enable:PIPE_MAX_CLIP_PLANES;
-   /* note padding here - must use memset */
+   unsigned pad1:24-PIPE_MAX_CLIP_PLANES;

   /* Variable number of vertex elements:
    */
@@ -328,7 +336,6 @@ struct draw_gs_llvm_variant_key
   unsigned nr_samplers:8;
   unsigned nr_sampler_views:8;
   unsigned num_outputs:8;
-   /* note padding here - must use memset */

   struct draw_sampler_static_state samplers[1];
 };
--- a/src/gallium/auxiliary/draw/draw_llvm_sample.c
+++ b/src/gallium/auxiliary/draw/draw_llvm_sample.c
@@ -283,7 +283,7 @@ draw_llvm_sampler_soa_create(const struct draw_sampler_static_state *static_stat
   struct draw_llvm_sampler_soa *sampler;

   sampler = CALLOC_STRUCT(draw_llvm_sampler_soa);
-   if (!sampler)
+   if(!sampler)
      return NULL;

   sampler->base.destroy = draw_llvm_sampler_soa_destroy;
--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -429,7 +429,7 @@ aaline_create_texture(struct aaline_stage *aaline)
                                PIPE_TRANSFER_WRITE,
                                &box, &transfer);

-      if (!data)
+      if (data == NULL)
         return FALSE;

      for (i = 0; i < size; i++) {
@@ -646,7 +646,6 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header)
   struct pipe_context *pipe = draw->pipe;
   const struct pipe_rasterizer_state *rast = draw->rasterizer;
   uint num_samplers;
-   uint num_sampler_views;
   void *r;

   assert(draw->rasterizer->line_smooth);
@@ -668,9 +667,9 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header)
   draw_aaline_prepare_outputs(draw, draw->pipeline.aaline);

   /* how many samplers? */
-   /* we'll use sampler/texture[aaline->sampler_unit] for the alpha texture */
-   num_samplers = MAX2(aaline->num_samplers, aaline->fs->sampler_unit + 1);
-   num_sampler_views = MAX2(num_samplers, aaline->num_sampler_views);
+   /* we'll use sampler/texture[pstip->sampler_unit] for the stipple */
+   num_samplers = MAX2(aaline->num_sampler_views, aaline->num_samplers);
+   num_samplers = MAX2(num_samplers, aaline->fs->sampler_unit + 1);

   aaline->state.sampler[aaline->fs->sampler_unit] = aaline->sampler_cso;
   pipe_sampler_view_reference(&aaline->state.sampler_views[aaline->fs->sampler_unit],
@@ -682,7 +681,7 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header)
                                      num_samplers, aaline->state.sampler);

   aaline->driver_set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0,
-                                    num_sampler_views, aaline->state.sampler_views);
+                                    num_samplers, aaline->state.sampler_views);

   /* Disable triangle culling, stippling, unfilled mode etc. */
   r = draw_get_rasterizer_no_cull(draw, rast->scissor, rast->flatshade);
@@ -775,7 +774,7 @@ static struct aaline_stage *
 draw_aaline_stage(struct draw_context *draw)
 {
   struct aaline_stage *aaline = CALLOC_STRUCT(aaline_stage);
-   if (!aaline)
+   if (aaline == NULL)
      return NULL;

   aaline->stage.draw = draw;
@@ -794,7 +793,8 @@ draw_aaline_stage(struct draw_context *draw)
   return aaline;

 fail:
-   aaline->stage.destroy(&aaline->stage);
+   if (aaline)
+      aaline->stage.destroy(&aaline->stage);

   return NULL;
 }
@@ -824,12 +824,12 @@ aaline_create_fs_state(struct pipe_context *pipe,
   struct aaline_stage *aaline = aaline_stage_from_pipe(pipe);
   struct aaline_fragment_shader *aafs = NULL;

-   if (!aaline)
+   if (aaline == NULL)
      return NULL;

   aafs = CALLOC_STRUCT(aaline_fragment_shader);

-   if (!aafs)
+   if (aafs == NULL)
      return NULL;

   aafs->state.tokens = tgsi_dup_tokens(fs->tokens);
@@ -847,7 +847,7 @@ aaline_bind_fs_state(struct pipe_context *pipe, void *fs)
   struct aaline_stage *aaline = aaline_stage_from_pipe(pipe);
   struct aaline_fragment_shader *aafs = (struct aaline_fragment_shader *) fs;

-   if (!aaline) {
+   if (aaline == NULL) {
      return;
   }

@@ -864,11 +864,11 @@ aaline_delete_fs_state(struct pipe_context *pipe, void *fs)
   struct aaline_stage *aaline = aaline_stage_from_pipe(pipe);
   struct aaline_fragment_shader *aafs = (struct aaline_fragment_shader *) fs;

-   if (!aafs) {
+   if (aafs == NULL) {
      return;
   }

-   if (aaline) {
+   if (aaline != NULL) {
      /* pass-through */
      aaline->driver_delete_fs_state(pipe, aafs->driver_fs);

@@ -889,7 +889,7 @@ aaline_bind_sampler_states(struct pipe_context *pipe, unsigned shader,

   assert(start == 0);

-   if (!aaline) {
+   if (aaline == NULL) {
      return;
   }

@@ -912,7 +912,7 @@ aaline_set_sampler_views(struct pipe_context *pipe, unsigned shader,
   struct aaline_stage *aaline = aaline_stage_from_pipe(pipe);
   uint i;

-   if (!aaline) {
+   if (aaline == NULL) {
      return;
   }

@@ -938,7 +938,7 @@ draw_aaline_prepare_outputs(struct draw_context *draw,
   const struct pipe_rasterizer_state *rast = draw->rasterizer;

   /* update vertex attrib info */
-   aaline->pos_slot = draw_current_shader_position_output(draw);
+   aaline->pos_slot = draw_current_shader_position_output(draw);;

   if (!rast->line_smooth)
      return;
--- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
@@ -662,7 +662,7 @@ static struct aapoint_stage *
 draw_aapoint_stage(struct draw_context *draw)
 {
   struct aapoint_stage *aapoint = CALLOC_STRUCT(aapoint_stage);
-   if (!aapoint)
+   if (aapoint == NULL)
      goto fail;

   aapoint->stage.draw = draw;
@@ -707,7 +707,7 @@ aapoint_create_fs_state(struct pipe_context *pipe,
 {
   struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
   struct aapoint_fragment_shader *aafs = CALLOC_STRUCT(aapoint_fragment_shader);
-   if (!aafs)
+   if (aafs == NULL) 
      return NULL;

   aafs->state.tokens = tgsi_dup_tokens(fs->tokens);
@@ -767,7 +767,7 @@ draw_install_aapoint_stage(struct draw_context *draw,
    * Create / install AA point drawing / prim stage
    */
   aapoint = draw_aapoint_stage( draw );
-   if (!aapoint)
+   if (aapoint == NULL)
      return FALSE;

   /* save original driver functions */
--- a/src/gallium/auxiliary/draw/draw_pipe_clip.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c
@@ -58,26 +58,19 @@
 struct clip_stage {
   struct draw_stage stage;      /**< base class */

-   unsigned pos_attr;
-   boolean have_clipdist;
-   int cv_attr;
+   /* List of the attributes to be flatshaded. */
+   uint num_flat_attribs;
+   uint flat_attribs[PIPE_MAX_SHADER_OUTPUTS];

-   /* List of the attributes to be constant interpolated. */
-   uint num_const_attribs;
-   uint8_t const_attribs[PIPE_MAX_SHADER_OUTPUTS];
-   /* List of the attributes to be linear interpolated. */
-   uint num_linear_attribs;
-   uint8_t linear_attribs[PIPE_MAX_SHADER_OUTPUTS];
-   /* List of the attributes to be perspective interpolated. */
-   uint num_perspect_attribs;
-   uint8_t perspect_attribs[PIPE_MAX_SHADER_OUTPUTS];
+   /* Mask of attributes in noperspective mode */
+   boolean noperspective_attribs[PIPE_MAX_SHADER_OUTPUTS];

   float (*plane)[4];
 };


 /** Cast wrapper */
-static inline struct clip_stage *clip_stage(struct draw_stage *stage)
+static inline struct clip_stage *clip_stage( struct draw_stage *stage )
 {
   return (struct clip_stage *)stage;
 }
@@ -103,10 +96,10 @@ draw_viewport_index(struct draw_context *draw,

 /* All attributes are float[4], so this is easy:
 */
-static void interp_attr(float dst[4],
-                        float t,
-                        const float in[4],
-                        const float out[4])
+static void interp_attr( float dst[4],
+			 float t,
+			 const float in[4],
+			 const float out[4] )
 {
   dst[0] = LINTERP( t, out[0], in[0] );
   dst[1] = LINTERP( t, out[1], in[1] );
@@ -118,28 +111,30 @@ static void interp_attr(float dst[4],
 /**
 * Copy flat shaded attributes src vertex to dst vertex.
 */
-static void copy_flat(struct draw_stage *stage,
-                      struct vertex_header *dst,
-                      const struct vertex_header *src)
+static void copy_flat( struct draw_stage *stage,
+                       struct vertex_header *dst,
+                       const struct vertex_header *src )
 {
   const struct clip_stage *clipper = clip_stage(stage);
   uint i;
-   for (i = 0; i < clipper->num_const_attribs; i++) {
-      const uint attr = clipper->const_attribs[i];
+   for (i = 0; i < clipper->num_flat_attribs; i++) {
+      const uint attr = clipper->flat_attribs[i];
      COPY_4FV(dst->data[attr], src->data[attr]);
   }
 }

 /* Interpolate between two vertices to produce a third.  
 */
-static void interp(const struct clip_stage *clip,
-                   struct vertex_header *dst,
-                   float t,
-                   const struct vertex_header *out,
-                   const struct vertex_header *in,
-                   unsigned viewport_index)
+static void interp( const struct clip_stage *clip,
+		    struct vertex_header *dst,
+		    float t,
+		    const struct vertex_header *out, 
+		    const struct vertex_header *in,
+                    unsigned viewport_index )
 {
-   const unsigned pos_attr = clip->pos_attr;
+   const unsigned nr_attrs = draw_num_shader_outputs(clip->stage.draw);
+   const unsigned pos_attr = draw_current_shader_position_output(clip->stage.draw);
+   const unsigned clip_attr = draw_current_shader_clipvertex_output(clip->stage.draw);
   unsigned j;
   float t_nopersp;

@@ -147,23 +142,20 @@ static void interp(const struct clip_stage *clip,
    */
   dst->clipmask = 0;
   dst->edgeflag = 0;        /* will get overwritten later */
-   dst->pad = 0;
+   dst->have_clipdist = in->have_clipdist;
   dst->vertex_id = UNDEFINED_VERTEX_ID;

   /* Interpolate the clip-space coords.
    */
-   if (clip->cv_attr >= 0) {
-      interp_attr(dst->data[clip->cv_attr], t,
-                  in->data[clip->cv_attr], out->data[clip->cv_attr]);
-   }
+   interp_attr(dst->clip, t, in->clip, out->clip);
   /* interpolate the clip-space position */
-   interp_attr(dst->clip_pos, t, in->clip_pos, out->clip_pos);
+   interp_attr(dst->pre_clip_pos, t, in->pre_clip_pos, out->pre_clip_pos);

   /* Do the projective divide and viewport transformation to get
    * new window coordinates:
    */
   {
-      const float *pos = dst->clip_pos;
+      const float *pos = dst->pre_clip_pos;
      const float *scale =
         clip->stage.draw->viewports[viewport_index].scale;
      const float *trans =
@@ -176,13 +168,6 @@ static void interp(const struct clip_stage *clip,
      dst->data[pos_attr][3] = oow;
   }
   
-
-   /* interp perspective attribs */
-   for (j = 0; j < clip->num_perspect_attribs; j++) {
-      const unsigned attr = clip->perspect_attribs[j];
-      interp_attr(dst->data[attr], t, in->data[attr], out->data[attr]);
-   }
-
   /**
    * Compute the t in screen-space instead of 3d space to use
    * for noperspective interpolation.
@@ -192,36 +177,43 @@ static void interp(const struct clip_stage *clip,
    * pick whatever value (the interpolated point won't be in front
    * anyway), so just use the 3d t.
    */
-   if (clip->num_linear_attribs) {
+   {
      int k;
      t_nopersp = t;
      /* find either in.x != out.x or in.y != out.y */
      for (k = 0; k < 2; k++) {
-         if (in->clip_pos[k] != out->clip_pos[k]) {
+         if (in->clip[k] != out->clip[k]) {
            /* do divide by W, then compute linear interpolation factor */
-            float in_coord = in->clip_pos[k] / in->clip_pos[3];
-            float out_coord = out->clip_pos[k] / out->clip_pos[3];
-            float dst_coord = dst->clip_pos[k] / dst->clip_pos[3];
+            float in_coord = in->clip[k] / in->clip[3];
+            float out_coord = out->clip[k] / out->clip[3];
+            float dst_coord = dst->clip[k] / dst->clip[3];
            t_nopersp = (dst_coord - out_coord) / (in_coord - out_coord);
            break;
         }
      }
-      for (j = 0; j < clip->num_linear_attribs; j++) {
-         const unsigned attr = clip->linear_attribs[j];
-         interp_attr(dst->data[attr], t_nopersp, in->data[attr], out->data[attr]);
+   }
+
+   /* Other attributes
+    */
+   for (j = 0; j < nr_attrs; j++) {
+      if (j != pos_attr && j != clip_attr) {
+         if (clip->noperspective_attribs[j])
+            interp_attr(dst->data[j], t_nopersp, in->data[j], out->data[j]);
+         else
+            interp_attr(dst->data[j], t, in->data[j], out->data[j]);
      }
   }
 }

 /**
- * Checks whether the specified triangle is empty and if it is returns
+ * Checks whether the specifed triangle is empty and if it is returns
 * true, otherwise returns false.
- * Triangle is considered null/empty if its area is equal to zero.
+ * Triangle is considered null/empty if it's area is qual to zero.
 */
 static inline boolean
-is_tri_null(const struct clip_stage *clip, const struct prim_header *header)
+is_tri_null(struct draw_context *draw, const struct prim_header *header)
 {
-   const unsigned pos_attr = clip->pos_attr;
+   const unsigned pos_attr = draw_current_shader_position_output(draw);
   float x1 = header->v[1]->data[pos_attr][0] - header->v[0]->data[pos_attr][0];
   float y1 = header->v[1]->data[pos_attr][1] - header->v[0]->data[pos_attr][1];
   float z1 = header->v[1]->data[pos_attr][2] - header->v[0]->data[pos_attr][2];
@@ -241,13 +233,12 @@ is_tri_null(const struct clip_stage *clip, const struct prim_header *header)
 * Emit a post-clip polygon to the next pipeline stage.  The polygon
 * will be convex and the provoking vertex will always be vertex[0].
 */
-static void emit_poly(struct draw_stage *stage,
-                      struct vertex_header **inlist,
-                      const boolean *edgeflags,
-                      unsigned n,
-                      const struct prim_header *origPrim)
+static void emit_poly( struct draw_stage *stage,
+		       struct vertex_header **inlist,
+                       const boolean *edgeflags,
+		       unsigned n,
+		       const struct prim_header *origPrim)
 {
-   const struct clip_stage *clipper = clip_stage(stage);
   struct prim_header header;
   unsigned i;
   ushort edge_first, edge_middle, edge_last;
@@ -287,7 +278,7 @@ static void emit_poly(struct draw_stage *stage,
         header.v[2] = inlist[0];  /* the provoking vertex */
      }

-      tri_null = is_tri_null(clipper, &header);
+      tri_null = is_tri_null(stage->draw, &header);
      /* If we generated a triangle with an area, aka. non-null triangle,
       * or if the previous triangle was also null then skip all subsequent
       * null triangles */
@@ -312,18 +303,11 @@ static void emit_poly(struct draw_stage *stage,
         debug_printf("Clipped tri: (flat-shade-first = %d)\n",
                      stage->draw->rasterizer->flatshade_first);
         for (j = 0; j < 3; j++) {
-            debug_printf("  Vert %d: clip pos: %f %f %f %f\n", j,
-                         header.v[j]->clip_pos[0],
-                         header.v[j]->clip_pos[1],
-                         header.v[j]->clip_pos[2],
-                         header.v[j]->clip_pos[3]);
-            if (clipper->cv_attr >= 0) {
-               debug_printf("  Vert %d: cv: %f %f %f %f\n", j,
-                            header.v[j]->data[clipper->cv_attr][0],
-                            header.v[j]->data[clipper->cv_attr][1],
-                            header.v[j]->data[clipper->cv_attr][2],
-                            header.v[j]->data[clipper->cv_attr][3]);
-            }
+            debug_printf("  Vert %d: clip: %f %f %f %f\n", j,
+                         header.v[j]->clip[0],
+                         header.v[j]->clip[1],
+                         header.v[j]->clip[2],
+                         header.v[j]->clip[3]);
            for (k = 0; k < draw_num_shader_outputs(stage->draw); k++) {
               debug_printf("  Vert %d: Attr %d:  %f %f %f %f\n", j, k,
                            header.v[j]->data[k][0],
@@ -333,7 +317,7 @@ static void emit_poly(struct draw_stage *stage,
            }
         }
      }
-      stage->next->tri(stage->next, &header);
+      stage->next->tri( stage->next, &header );
   }
 }

@@ -358,28 +342,15 @@ static inline float getclipdist(const struct clip_stage *clipper,
 {
   const float *plane;
   float dp;
-   if (plane_idx < 6) {
-      /* ordinary xyz view volume clipping uses pos output */
-      plane = clipper->plane[plane_idx];
-      dp = dot4(vert->clip_pos, plane);
-   }
-   else if (clipper->have_clipdist) {
+   if (vert->have_clipdist && plane_idx >= 6) {
      /* pick the correct clipdistance element from the output vectors */
      int _idx = plane_idx - 6;
      int cdi = _idx >= 4;
      int vidx = cdi ? _idx - 4 : _idx;
      dp = vert->data[draw_current_shader_clipdistance_output(clipper->stage.draw, cdi)][vidx];
   } else {
-      /*
-       * legacy user clip planes or gl_ClipVertex
-       */
      plane = clipper->plane[plane_idx];
-      if (clipper->cv_attr >= 0) {
-         dp = dot4(vert->data[clipper->cv_attr], plane);
-      }
-      else {
-         dp = dot4(vert->clip_pos, plane);
-      }
+      dp = dot4(vert->clip, plane);
   }
   return dp;
 }
@@ -387,16 +358,15 @@ static inline float getclipdist(const struct clip_stage *clipper,
 /* Clip a triangle against the viewport and user clip planes.
 */
 static void
-do_clip_tri(struct draw_stage *stage,
-            struct prim_header *header,
-            unsigned clipmask)
+do_clip_tri( struct draw_stage *stage, 
+	     struct prim_header *header,
+	     unsigned clipmask )
 {
   struct clip_stage *clipper = clip_stage( stage );
   struct vertex_header *a[MAX_CLIPPED_VERTICES];
   struct vertex_header *b[MAX_CLIPPED_VERTICES];
   struct vertex_header **inlist = a;
   struct vertex_header **outlist = b;
-   struct vertex_header *prov_vertex;
   unsigned tmpnr = 0;
   unsigned n = 3;
   unsigned i;
@@ -410,38 +380,16 @@ do_clip_tri(struct draw_stage *stage,
   inlist[1] = header->v[1];
   inlist[2] = header->v[2];

-   /*
-    * For d3d10, we need to take this from the leading (first) vertex.
-    * For GL, we could do anything (as long as we advertize
-    * GL_UNDEFINED_VERTEX for the VIEWPORT_INDEX_PROVOKING_VERTEX query),
-    * but it needs to be consistent with what other parts (i.e. driver)
-    * will do, and that seems easier with GL_PROVOKING_VERTEX logic.
-    */
-   if (stage->draw->rasterizer->flatshade_first) {
-      prov_vertex = inlist[0];
-   }
-   else {
-      prov_vertex = inlist[2];
-   }
-   viewport_index = draw_viewport_index(clipper->stage.draw, prov_vertex);
+   viewport_index = draw_viewport_index(clipper->stage.draw, inlist[0]);

   if (DEBUG_CLIP) {
-      const float *v0 = header->v[0]->clip_pos;
-      const float *v1 = header->v[1]->clip_pos;
-      const float *v2 = header->v[2]->clip_pos;
-      debug_printf("Clip triangle pos:\n");
+      const float *v0 = header->v[0]->clip;
+      const float *v1 = header->v[1]->clip;
+      const float *v2 = header->v[2]->clip;
+      debug_printf("Clip triangle:\n");
      debug_printf(" %f, %f, %f, %f\n", v0[0], v0[1], v0[2], v0[3]);
      debug_printf(" %f, %f, %f, %f\n", v1[0], v1[1], v1[2], v1[3]);
      debug_printf(" %f, %f, %f, %f\n", v2[0], v2[1], v2[2], v2[3]);
-      if (clipper->cv_attr >= 0) {
-         const float *v0 = header->v[0]->data[clipper->cv_attr];
-         const float *v1 = header->v[1]->data[clipper->cv_attr];
-         const float *v2 = header->v[2]->data[clipper->cv_attr];
-         debug_printf("Clip triangle cv:\n");
-         debug_printf(" %f, %f, %f, %f\n", v0[0], v0[1], v0[2], v0[3]);
-         debug_printf(" %f, %f, %f, %f\n", v1[0], v1[1], v1[2], v1[3]);
-         debug_printf(" %f, %f, %f, %f\n", v2[0], v2[1], v2[2], v2[3]);
-      }
   }

   /*
@@ -477,7 +425,7 @@ do_clip_tri(struct draw_stage *stage,
      inEdges[n] = inEdges[0];

      for (i = 1; i <= n; i++) {
-         struct vertex_header *vert = inlist[i];
+	 struct vertex_header *vert = inlist[i];
         boolean *edge = &inEdges[i];

         float dp = getclipdist(clipper, vert, plane_idx);
@@ -485,16 +433,16 @@ do_clip_tri(struct draw_stage *stage,
         if (util_is_inf_or_nan(dp))
            return; //discard nan

-         if (dp_prev >= 0.0f) {
+	 if (dp_prev >= 0.0f) {
            assert(outcount < MAX_CLIPPED_VERTICES);
            if (outcount >= MAX_CLIPPED_VERTICES)
               return;
            outEdges[outcount] = *edge_prev;
-            outlist[outcount++] = vert_prev;
-         }
+	    outlist[outcount++] = vert_prev;
+	 }

-         if (DIFFERENT_SIGNS(dp, dp_prev)) {
-            struct vertex_header *new_vert;
+	 if (DIFFERENT_SIGNS(dp, dp_prev)) {
+	    struct vertex_header *new_vert;
            boolean *new_edge;

            assert(tmpnr < MAX_CLIPPED_VERTICES + 1);
@@ -507,19 +455,19 @@ do_clip_tri(struct draw_stage *stage,
               return;

            new_edge = &outEdges[outcount];
-            outlist[outcount++] = new_vert;
+	    outlist[outcount++] = new_vert;

-            if (dp < 0.0f) {
-               /* Going out of bounds.  Avoid division by zero as we
-                * know dp != dp_prev from DIFFERENT_SIGNS, above.
-                */
-               float t = dp / (dp - dp_prev);
-               interp( clipper, new_vert, t, vert, vert_prev, viewport_index );
-
-               /* Whether or not to set edge flag for the new vert depends
+	    if (dp < 0.0f) {
+	       /* Going out of bounds.  Avoid division by zero as we
+		* know dp != dp_prev from DIFFERENT_SIGNS, above.
+		*/
+	       float t = dp / (dp - dp_prev);
+	       interp( clipper, new_vert, t, vert, vert_prev, viewport_index );
+	       
+	       /* Whether or not to set edge flag for the new vert depends
                * on whether it's a user-defined clipping plane.  We're
                * copying NVIDIA's behaviour here.
-                */
+		*/
               if (is_user_clip_plane) {
                  /* we want to see an edge along the clip plane */
                  *new_edge = TRUE;
@@ -530,31 +478,31 @@ do_clip_tri(struct draw_stage *stage,
                  *new_edge = *edge_prev;
                  new_vert->edgeflag = FALSE;
               }
-            }
+	    }
            else {
-               /* Coming back in.
-                */
-               float t = dp_prev / (dp_prev - dp);
-               interp( clipper, new_vert, t, vert_prev, vert, viewport_index );
+	       /* Coming back in.
+		*/
+	       float t = dp_prev / (dp_prev - dp);
+	       interp( clipper, new_vert, t, vert_prev, vert, viewport_index );

-               /* Copy starting vert's edgeflag:
-                */
-               new_vert->edgeflag = vert_prev->edgeflag;
+	       /* Copy starting vert's edgeflag:
+		*/
+	       new_vert->edgeflag = vert_prev->edgeflag;
               *new_edge = *edge_prev;
-            }
-         }
+	    }
+	 }

-         vert_prev = vert;
+	 vert_prev = vert;
         edge_prev = edge;
-         dp_prev = dp;
+	 dp_prev = dp;
      }

      /* swap in/out lists */
      {
-         struct vertex_header **tmp = inlist;
-         inlist = outlist;
-         outlist = tmp;
-         n = outcount;
+	 struct vertex_header **tmp = inlist;
+	 inlist = outlist;
+	 outlist = tmp;
+	 n = outcount;
      }
      {
         boolean *tmp = inEdges;
@@ -564,10 +512,10 @@ do_clip_tri(struct draw_stage *stage,

   }

-   /* If constant interpolated, copy provoking vertex attrib to polygon vertex[0]
+   /* If flat-shading, copy provoking vertex color to polygon vertex[0]
    */
   if (n >= 3) {
-      if (clipper->num_const_attribs) {
+      if (clipper->num_flat_attribs) {
         if (stage->draw->rasterizer->flatshade_first) {
            if (inlist[0] != header->v[0]) {
               assert(tmpnr < MAX_CLIPPED_VERTICES + 1);
@@ -587,10 +535,10 @@ do_clip_tri(struct draw_stage *stage,
            }
         }
      }
-
+      
      /* Emit the polygon as triangles to the setup stage:
       */
-      emit_poly(stage, inlist, inEdges, n, header);
+      emit_poly( stage, inlist, inEdges, n, header );
   }
 }

@@ -598,28 +546,17 @@ do_clip_tri(struct draw_stage *stage,
 /* Clip a line against the viewport and user clip planes.
 */
 static void
-do_clip_line(struct draw_stage *stage,
-             struct prim_header *header,
-             unsigned clipmask)
+do_clip_line( struct draw_stage *stage,
+	      struct prim_header *header,
+	      unsigned clipmask )
 {
-   const struct clip_stage *clipper = clip_stage(stage);
+   const struct clip_stage *clipper = clip_stage( stage );
   struct vertex_header *v0 = header->v[0];
   struct vertex_header *v1 = header->v[1];
-   struct vertex_header *prov_vertex;
   float t0 = 0.0F;
   float t1 = 0.0F;
   struct prim_header newprim;
-   int viewport_index;
-
-   newprim.flags = header->flags;
-
-   if (stage->draw->rasterizer->flatshade_first) {
-      prov_vertex = v0;
-   }
-   else {
-      prov_vertex = v1;
-   }
-   viewport_index = draw_viewport_index(clipper->stage.draw, prov_vertex);
+   int viewport_index = draw_viewport_index(clipper->stage.draw, v0);

   while (clipmask) {
      const unsigned plane_idx = ffs(clipmask)-1;
@@ -630,17 +567,17 @@ do_clip_line(struct draw_stage *stage,
         return; //discard nan

      if (dp1 < 0.0F) {
-         float t = dp1 / (dp1 - dp0);
+	 float t = dp1 / (dp1 - dp0);
         t1 = MAX2(t1, t);
      } 

      if (dp0 < 0.0F) {
-         float t = dp0 / (dp0 - dp1);
+	 float t = dp0 / (dp0 - dp1);
         t0 = MAX2(t0, t);
      }

      if (t0 + t1 >= 1.0F)
-         return; /* discard */
+	 return; /* discard */

      clipmask &= ~(1 << plane_idx);  /* turn off this plane's bit */
   }
@@ -678,7 +615,8 @@ do_clip_line(struct draw_stage *stage,


 static void
-clip_point(struct draw_stage *stage, struct prim_header *header)
+clip_point( struct draw_stage *stage, 
+            struct prim_header *header )
 {
   if (header->v[0]->clipmask == 0)
      stage->next->point( stage->next, header );
@@ -692,7 +630,8 @@ clip_point(struct draw_stage *stage, struct prim_header *header)
 * the guard band and not just outside the vp.)
 */
 static void
-clip_point_guard_xy(struct draw_stage *stage, struct prim_header *header)
+clip_point_guard_xy( struct draw_stage *stage,
+                     struct prim_header *header )
 {
   unsigned clipmask = header->v[0]->clipmask;
   if ((clipmask & 0xffffffff) == 0)
@@ -708,9 +647,9 @@ clip_point_guard_xy(struct draw_stage *stage, struct prim_header *header)
          * automatically). These would usually be captured by depth clip
          * too but this can be disabled.
          */
-         if (header->v[0]->clip_pos[3] <= 0.0f ||
-             util_is_inf_or_nan(header->v[0]->clip_pos[0]) ||
-             util_is_inf_or_nan(header->v[0]->clip_pos[1]))
+         if (header->v[0]->clip[3] <= 0.0f ||
+             util_is_inf_or_nan(header->v[0]->clip[0]) ||
+             util_is_inf_or_nan(header->v[0]->clip[1]))
            return;
      }
      stage->next->point(stage->next, header);
@@ -719,7 +658,8 @@ clip_point_guard_xy(struct draw_stage *stage, struct prim_header *header)


 static void
-clip_first_point(struct draw_stage *stage, struct prim_header *header)
+clip_first_point( struct draw_stage *stage,
+                  struct prim_header *header )
 {
   stage->point = stage->draw->guard_band_points_xy ? clip_point_guard_xy : clip_point;
   stage->point(stage, header);
@@ -727,7 +667,8 @@ clip_first_point(struct draw_stage *stage, struct prim_header *header)


 static void
-clip_line(struct draw_stage *stage, struct prim_header *header)
+clip_line( struct draw_stage *stage,
+	   struct prim_header *header )
 {
   unsigned clipmask = (header->v[0]->clipmask | 
                        header->v[1]->clipmask);
@@ -745,7 +686,8 @@ clip_line(struct draw_stage *stage, struct prim_header *header)


 static void
-clip_tri(struct draw_stage *stage, struct prim_header *header)
+clip_tri( struct draw_stage *stage,
+          struct prim_header *header )
 {
   unsigned clipmask = (header->v[0]->clipmask | 
                        header->v[1]->clipmask | 
@@ -773,24 +715,12 @@ find_interp(const struct draw_fragment_shader *fs, int *indexed_interp,
   if (semantic_name == TGSI_SEMANTIC_COLOR ||
       semantic_name == TGSI_SEMANTIC_BCOLOR) {
      interp = indexed_interp[semantic_index];
-   } else if (semantic_name == TGSI_SEMANTIC_POSITION ||
-              semantic_name == TGSI_SEMANTIC_CLIPVERTEX) {
-      /* these inputs are handled specially always */
-      return -1;
   } else {
      /* Otherwise, search in the FS inputs, with a decent default
       * if we don't find it.
-       * This probably only matters for layer, vpindex, culldist, maybe
-       * front_face.
       */
      uint j;
-      if (semantic_name == TGSI_SEMANTIC_LAYER ||
-          semantic_name == TGSI_SEMANTIC_VIEWPORT_INDEX) {
-         interp = TGSI_INTERPOLATE_CONSTANT;
-      }
-      else {
-         interp = TGSI_INTERPOLATE_PERSPECTIVE;
-      }
+      interp = TGSI_INTERPOLATE_PERSPECTIVE;
      if (fs) {
         for (j = 0; j < fs->info.num_inputs; j++) {
            if (semantic_name == fs->info.input_semantic_name[j] &&
@@ -808,23 +738,13 @@ find_interp(const struct draw_fragment_shader *fs, int *indexed_interp,
 * primitive that really requires clipping.
 */
 static void 
-clip_init_state(struct draw_stage *stage)
+clip_init_state( struct draw_stage *stage )
 {
-   struct clip_stage *clipper = clip_stage(stage);
+   struct clip_stage *clipper = clip_stage( stage );
   const struct draw_context *draw = stage->draw;
   const struct draw_fragment_shader *fs = draw->fs.fragment_shader;
   const struct tgsi_shader_info *info = draw_get_shader_info(draw);
   uint i, j;
-   int indexed_interp[2];
-
-   clipper->pos_attr = draw_current_shader_position_output(draw);
-   clipper->have_clipdist = draw_current_shader_num_written_clipdistances(draw) > 0;
-   if (draw_current_shader_clipvertex_output(draw) != clipper->pos_attr) {
-      clipper->cv_attr = (int)draw_current_shader_clipvertex_output(draw);
-   }
-   else {
-      clipper->cv_attr = -1;
-   }

   /* We need to know for each attribute what kind of interpolation is
    * done on it (flat, smooth or noperspective).  But the information
@@ -845,6 +765,7 @@ clip_init_state(struct draw_stage *stage)
   /* First pick up the interpolation mode for
    * gl_Color/gl_SecondaryColor, with the correct default.
    */
+   int indexed_interp[2];
   indexed_interp[0] = indexed_interp[1] = draw->rasterizer->flatshade ?
      TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE;

@@ -857,33 +778,29 @@ clip_init_state(struct draw_stage *stage)
      }
   }

-   /* Then resolve the interpolation mode for every output attribute. */
+   /* Then resolve the interpolation mode for every output attribute.
+    *
+    * Given how the rest of the code, the most efficient way is to
+    * have a vector of flat-mode attributes, and a mask for
+    * noperspective attributes.
+    */

-   clipper->num_const_attribs = 0;
-   clipper->num_linear_attribs = 0;
-   clipper->num_perspect_attribs = 0;
+   clipper->num_flat_attribs = 0;
+   memset(clipper->noperspective_attribs, 0, sizeof(clipper->noperspective_attribs));
   for (i = 0; i < info->num_outputs; i++) {
      /* Find the interpolation mode for a specific attribute */
      int interp = find_interp(fs, indexed_interp,
                               info->output_semantic_name[i],
                               info->output_semantic_index[i]);
-      switch (interp) {
-      case TGSI_INTERPOLATE_CONSTANT:
-         clipper->const_attribs[clipper->num_const_attribs] = i;
-         clipper->num_const_attribs++;
-         break;
-      case TGSI_INTERPOLATE_LINEAR:
-         clipper->linear_attribs[clipper->num_linear_attribs] = i;
-         clipper->num_linear_attribs++;
-         break;
-      case TGSI_INTERPOLATE_PERSPECTIVE:
-         clipper->perspect_attribs[clipper->num_perspect_attribs] = i;
-         clipper->num_perspect_attribs++;
-         break;
-      default:
-         assert(interp == -1);
-         break;
-      }
+      /* If it's flat, add it to the flat vector.  Otherwise update
+       * the noperspective mask.
+       */
+
+      if (interp == TGSI_INTERPOLATE_CONSTANT) {
+         clipper->flat_attribs[clipper->num_flat_attribs] = i;
+         clipper->num_flat_attribs++;
+      } else
+         clipper->noperspective_attribs[i] = interp == TGSI_INTERPOLATE_LINEAR;
   }
   /* Search the extra vertex attributes */
   for (j = 0; j < draw->extra_shader_outputs.num; j++) {
@@ -891,47 +808,39 @@ clip_init_state(struct draw_stage *stage)
      int interp = find_interp(fs, indexed_interp,
                               draw->extra_shader_outputs.semantic_name[j],
                               draw->extra_shader_outputs.semantic_index[j]);
-      switch (interp) {
-      case TGSI_INTERPOLATE_CONSTANT:
-         clipper->const_attribs[clipper->num_const_attribs] = i + j;
-         clipper->num_const_attribs++;
-         break;
-      case TGSI_INTERPOLATE_LINEAR:
-         clipper->linear_attribs[clipper->num_linear_attribs] = i + j;
-         clipper->num_linear_attribs++;
-         break;
-      case TGSI_INTERPOLATE_PERSPECTIVE:
-         clipper->perspect_attribs[clipper->num_perspect_attribs] = i + j;
-         clipper->num_perspect_attribs++;
-         break;
-      default:
-         assert(interp == -1);
-         break;
-      }
+      /* If it's flat, add it to the flat vector.  Otherwise update
+       * the noperspective mask.
+       */
+      if (interp == TGSI_INTERPOLATE_CONSTANT) {
+         clipper->flat_attribs[clipper->num_flat_attribs] = i + j;
+         clipper->num_flat_attribs++;
+      } else
+         clipper->noperspective_attribs[i + j] = interp == TGSI_INTERPOLATE_LINEAR;
   }
-
+   
   stage->tri = clip_tri;
   stage->line = clip_line;
 }



-static void clip_first_tri(struct draw_stage *stage,
-                           struct prim_header *header)
+static void clip_first_tri( struct draw_stage *stage,
+			    struct prim_header *header )
 {
   clip_init_state( stage );
   stage->tri( stage, header );
 }

-static void clip_first_line(struct draw_stage *stage,
-                            struct prim_header *header)
+static void clip_first_line( struct draw_stage *stage,
+			     struct prim_header *header )
 {
   clip_init_state( stage );
   stage->line( stage, header );
 }


-static void clip_flush(struct draw_stage *stage, unsigned flags)
+static void clip_flush( struct draw_stage *stage, 
+			     unsigned flags )
 {
   stage->tri = clip_first_tri;
   stage->line = clip_first_line;
@@ -939,13 +848,13 @@ static void clip_flush(struct draw_stage *stage, unsigned flags)
 }


-static void clip_reset_stipple_counter(struct draw_stage *stage)
+static void clip_reset_stipple_counter( struct draw_stage *stage )
 {
   stage->next->reset_stipple_counter( stage->next );
 }


-static void clip_destroy(struct draw_stage *stage)
+static void clip_destroy( struct draw_stage *stage )
 {
   draw_free_temp_verts( stage );
   FREE( stage );
@@ -956,10 +865,10 @@ static void clip_destroy(struct draw_stage *stage)
 * Allocate a new clipper stage.
 * \return pointer to new stage object
 */
-struct draw_stage *draw_clip_stage(struct draw_context *draw)
+struct draw_stage *draw_clip_stage( struct draw_context *draw )
 {
   struct clip_stage *clipper = CALLOC_STRUCT(clip_stage);
-   if (!clipper)
+   if (clipper == NULL)
      goto fail;

   clipper->stage.draw = draw;
--- a/src/gallium/auxiliary/draw/draw_pipe_cull.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_cull.c
@@ -251,7 +251,7 @@ static void cull_destroy( struct draw_stage *stage )
 struct draw_stage *draw_cull_stage( struct draw_context *draw )
 {
   struct cull_stage *cull = CALLOC_STRUCT(cull_stage);
-   if (!cull)
+   if (cull == NULL)
      goto fail;

   cull->stage.draw = draw;
--- a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
@@ -309,7 +309,7 @@ static void flatshade_destroy( struct draw_stage *stage )
 struct draw_stage *draw_flatshade_stage( struct draw_context *draw )
 {
   struct flat_stage *flatshade = CALLOC_STRUCT(flat_stage);
-   if (!flatshade)
+   if (flatshade == NULL)
      goto fail;

   flatshade->stage.draw = draw;
--- a/src/gallium/auxiliary/draw/draw_pipe_offset.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c
@@ -231,7 +231,7 @@ static void offset_destroy( struct draw_stage *stage )
 struct draw_stage *draw_offset_stage( struct draw_context *draw )
 {
   struct offset_stage *offset = CALLOC_STRUCT(offset_stage);
-   if (!offset)
+   if (offset == NULL)
      goto fail;

   offset->stage.draw = draw;
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -43,10 +43,10 @@
 #include "util/u_format.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
-#include "util/u_pstipple.h"
 #include "util/u_sampler.h"

 #include "tgsi/tgsi_transform.h"
+#include "tgsi/tgsi_dump.h"

 #include "draw_context.h"
 #include "draw_pipe.h"
@@ -114,6 +114,178 @@ struct pstip_stage
 };


+
+/**
+ * Subclass of tgsi_transform_context, used for transforming the
+ * user's fragment shader to add the extra texture sample and fragment kill
+ * instructions.
+ */
+struct pstip_transform_context {
+   struct tgsi_transform_context base;
+   uint tempsUsed;  /**< bitmask */
+   int wincoordInput;
+   int maxInput;
+   uint samplersUsed;  /**< bitfield of samplers used */
+   bool hasSview;
+   int freeSampler;  /** an available sampler for the pstipple */
+   int texTemp;  /**< temp registers */
+   int numImmed;
+};
+
+
+/**
+ * TGSI declaration transform callback.
+ * Look for a free sampler, a free input attrib, and two free temp regs.
+ */
+static void
+pstip_transform_decl(struct tgsi_transform_context *ctx,
+                     struct tgsi_full_declaration *decl)
+{
+   struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx;
+
+   if (decl->Declaration.File == TGSI_FILE_SAMPLER) {
+      uint i;
+      for (i = decl->Range.First;
+           i <= decl->Range.Last; i++) {
+         pctx->samplersUsed |= 1 << i;
+      }
+   }
+   else if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
+      pctx->hasSview = true;
+   }
+   else if (decl->Declaration.File == TGSI_FILE_INPUT) {
+      pctx->maxInput = MAX2(pctx->maxInput, (int) decl->Range.Last);
+      if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION)
+         pctx->wincoordInput = (int) decl->Range.First;
+   }
+   else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
+      uint i;
+      for (i = decl->Range.First;
+           i <= decl->Range.Last; i++) {
+         pctx->tempsUsed |= (1 << i);
+      }
+   }
+
+   ctx->emit_declaration(ctx, decl);
+}
+
+
+/**
+ * TGSI immediate declaration transform callback.
+ * We're just counting the number of immediates here.
+ */
+static void
+pstip_transform_immed(struct tgsi_transform_context *ctx,
+                      struct tgsi_full_immediate *immed)
+{
+   struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx;
+   ctx->emit_immediate(ctx, immed); /* emit to output shader */
+   pctx->numImmed++;
+}
+
+
+/**
+ * Find the lowest zero bit in the given word, or -1 if bitfield is all ones.
+ */
+static int
+free_bit(uint bitfield)
+{
+   return ffs(~bitfield) - 1;
+}
+
+
+/**
+ * TGSI transform prolog callback.
+ */
+static void
+pstip_transform_prolog(struct tgsi_transform_context *ctx)
+{
+   struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx;
+   uint i;
+   int wincoordInput;
+
+   /* find free sampler */
+   pctx->freeSampler = free_bit(pctx->samplersUsed);
+   if (pctx->freeSampler >= PIPE_MAX_SAMPLERS)
+      pctx->freeSampler = PIPE_MAX_SAMPLERS - 1;
+
+   if (pctx->wincoordInput < 0)
+      wincoordInput = pctx->maxInput + 1;
+   else
+      wincoordInput = pctx->wincoordInput;
+
+   /* find one free temp reg */
+   for (i = 0; i < 32; i++) {
+      if ((pctx->tempsUsed & (1 << i)) == 0) {
+      /* found a free temp */
+      if (pctx->texTemp < 0)
+         pctx->texTemp  = i;
+      else
+         break;
+      }
+   }
+   assert(pctx->texTemp >= 0);
+
+   if (pctx->wincoordInput < 0) {
+      /* declare new position input reg */
+      tgsi_transform_input_decl(ctx, wincoordInput,
+                                TGSI_SEMANTIC_POSITION, 1,
+                                TGSI_INTERPOLATE_LINEAR);
+   }
+
+   /* declare new sampler */
+   tgsi_transform_sampler_decl(ctx, pctx->freeSampler);
+
+   /* if the src shader has SVIEW decl's for each SAMP decl, we
+    * need to continue the trend and ensure there is a matching
+    * SVIEW for the new SAMP we just created
+    */
+   if (pctx->hasSview) {
+      tgsi_transform_sampler_view_decl(ctx,
+                                       pctx->freeSampler,
+                                       TGSI_TEXTURE_2D,
+                                       TGSI_RETURN_TYPE_FLOAT);
+   }
+
+   /* declare new temp regs */
+   tgsi_transform_temp_decl(ctx, pctx->texTemp);
+
+   /* emit immediate = {1/32, 1/32, 1, 1}
+    * The index/position of this immediate will be pctx->numImmed
+    */
+   tgsi_transform_immediate_decl(ctx, 1.0/32.0, 1.0/32.0, 1.0, 1.0);
+
+   /* 
+    * Insert new MUL/TEX/KILL_IF instructions at start of program
+    * Take gl_FragCoord, divide by 32 (stipple size), sample the
+    * texture and kill fragment if needed.
+    *
+    * We'd like to use non-normalized texcoords to index into a RECT
+    * texture, but we can only use GL_REPEAT wrap mode with normalized
+    * texcoords.  Darn.
+    */
+
+   /* MUL texTemp, INPUT[wincoord], 1/32; */
+   tgsi_transform_op2_inst(ctx, TGSI_OPCODE_MUL,
+                           TGSI_FILE_TEMPORARY, pctx->texTemp,
+                           TGSI_WRITEMASK_XYZW,
+                           TGSI_FILE_INPUT, wincoordInput,
+                           TGSI_FILE_IMMEDIATE, pctx->numImmed);
+
+   /* TEX texTemp, texTemp, sampler; */
+   tgsi_transform_tex_2d_inst(ctx,
+                              TGSI_FILE_TEMPORARY, pctx->texTemp,
+                              TGSI_FILE_TEMPORARY, pctx->texTemp,
+                              pctx->freeSampler);
+
+   /* KILL_IF -texTemp.wwww;   # if -texTemp < 0, KILL fragment */
+   tgsi_transform_kill_inst(ctx,
+                            TGSI_FILE_TEMPORARY, pctx->texTemp,
+                            TGSI_SWIZZLE_W, TRUE);
+}
+
+
+
 /**
 * Generate the frag shader we'll use for doing polygon stipple.
 * This will be the user's shader prefixed with a TEX and KIL instruction.
@@ -121,27 +293,40 @@ struct pstip_stage
 static boolean
 generate_pstip_fs(struct pstip_stage *pstip)
 {
-   struct pipe_context *pipe = pstip->pipe;
-   struct pipe_screen *screen = pipe->screen;
   const struct pipe_shader_state *orig_fs = &pstip->fs->state;
   /*struct draw_context *draw = pstip->stage.draw;*/
   struct pipe_shader_state pstip_fs;
-   enum tgsi_file_type wincoord_file;
-
-   wincoord_file = screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL) ?
-                   TGSI_FILE_SYSTEM_VALUE : TGSI_FILE_INPUT;
+   struct pstip_transform_context transform;
+   const uint newLen = tgsi_num_tokens(orig_fs->tokens) + NUM_NEW_TOKENS;

   pstip_fs = *orig_fs; /* copy to init */
-   pstip_fs.tokens = util_pstipple_create_fragment_shader(orig_fs->tokens,
-                                                          &pstip->fs->sampler_unit,
-                                                          0,
-                                                          wincoord_file);
+   pstip_fs.tokens = tgsi_alloc_tokens(newLen);
   if (pstip_fs.tokens == NULL)
      return FALSE;

+   memset(&transform, 0, sizeof(transform));
+   transform.wincoordInput = -1;
+   transform.maxInput = -1;
+   transform.texTemp = -1;
+   transform.base.prolog = pstip_transform_prolog;
+   transform.base.transform_declaration = pstip_transform_decl;
+   transform.base.transform_immediate = pstip_transform_immed;
+
+   tgsi_transform_shader(orig_fs->tokens,
+                         (struct tgsi_token *) pstip_fs.tokens,
+                         newLen, &transform.base);
+
+#if 0 /* DEBUG */
+   tgsi_dump(orig_fs->tokens, 0);
+   tgsi_dump(pstip_fs.tokens, 0);
+#endif
+
+   assert(pstip->fs);
+
+   pstip->fs->sampler_unit = transform.freeSampler;
   assert(pstip->fs->sampler_unit < PIPE_MAX_SAMPLERS);

-   pstip->fs->pstip_fs = pstip->driver_create_fs_state(pipe, &pstip_fs);
+   pstip->fs->pstip_fs = pstip->driver_create_fs_state(pstip->pipe, &pstip_fs);
   
   FREE((void *)pstip_fs.tokens);

@@ -152,6 +337,113 @@ generate_pstip_fs(struct pstip_stage *pstip)
 }


+/**
+ * Load texture image with current stipple pattern.
+ */
+static void
+pstip_update_texture(struct pstip_stage *pstip)
+{
+   static const uint bit31 = 1 << 31;
+   struct pipe_context *pipe = pstip->pipe;
+   struct pipe_transfer *transfer;
+   const uint *stipple = pstip->state.stipple->stipple;
+   uint i, j;
+   ubyte *data;
+
+   data = pipe_transfer_map(pipe, pstip->texture, 0, 0,
+                                PIPE_TRANSFER_WRITE, 0, 0, 32, 32, &transfer);
+
+   /*
+    * Load alpha texture.
+    * Note: 0 means keep the fragment, 255 means kill it.
+    * We'll negate the texel value and use KILL_IF which kills if value
+    * is negative.
+    */
+   for (i = 0; i < 32; i++) {
+      for (j = 0; j < 32; j++) {
+         if (stipple[i] & (bit31 >> j)) {
+            /* fragment "on" */
+            data[i * transfer->stride + j] = 0;
+         }
+         else {
+            /* fragment "off" */
+            data[i * transfer->stride + j] = 255;
+         }
+      }
+   }
+
+   /* unmap */
+   pipe_transfer_unmap(pipe, transfer);
+}
+
+
+/**
+ * Create the texture map we'll use for stippling.
+ */
+static boolean
+pstip_create_texture(struct pstip_stage *pstip)
+{
+   struct pipe_context *pipe = pstip->pipe;
+   struct pipe_screen *screen = pipe->screen;
+   struct pipe_resource texTemp;
+   struct pipe_sampler_view viewTempl;
+
+   memset(&texTemp, 0, sizeof(texTemp));
+   texTemp.target = PIPE_TEXTURE_2D;
+   texTemp.format = PIPE_FORMAT_A8_UNORM; /* XXX verify supported by driver! */
+   texTemp.last_level = 0;
+   texTemp.width0 = 32;
+   texTemp.height0 = 32;
+   texTemp.depth0 = 1;
+   texTemp.array_size = 1;
+   texTemp.bind = PIPE_BIND_SAMPLER_VIEW;
+
+   pstip->texture = screen->resource_create(screen, &texTemp);
+   if (pstip->texture == NULL)
+      return FALSE;
+
+   u_sampler_view_default_template(&viewTempl,
+                                   pstip->texture,
+                                   pstip->texture->format);
+   pstip->sampler_view = pipe->create_sampler_view(pipe,
+                                                   pstip->texture,
+                                                   &viewTempl);
+   if (!pstip->sampler_view) {
+      return FALSE;
+   }
+
+   return TRUE;
+}
+
+
+/**
+ * Create the sampler CSO that'll be used for stippling.
+ */
+static boolean
+pstip_create_sampler(struct pstip_stage *pstip)
+{
+   struct pipe_sampler_state sampler;
+   struct pipe_context *pipe = pstip->pipe;
+
+   memset(&sampler, 0, sizeof(sampler));
+   sampler.wrap_s = PIPE_TEX_WRAP_REPEAT;
+   sampler.wrap_t = PIPE_TEX_WRAP_REPEAT;
+   sampler.wrap_r = PIPE_TEX_WRAP_REPEAT;
+   sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+   sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
+   sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+   sampler.normalized_coords = 1;
+   sampler.min_lod = 0.0f;
+   sampler.max_lod = 0.0f;
+
+   pstip->sampler_cso = pipe->create_sampler_state(pipe, &sampler);
+   if (pstip->sampler_cso == NULL)
+      return FALSE;
+   
+   return TRUE;
+}
+
+
 /**
 * When we're about to draw our first stipple polygon in a batch, this function
 * is called to tell the driver to bind our modified fragment shader.
@@ -185,7 +477,6 @@ pstip_first_tri(struct draw_stage *stage, struct prim_header *header)
   struct pipe_context *pipe = pstip->pipe;
   struct draw_context *draw = stage->draw;
   uint num_samplers;
-   uint num_sampler_views;

   assert(stage->draw->rasterizer->poly_stipple_enable);

@@ -199,8 +490,8 @@ pstip_first_tri(struct draw_stage *stage, struct prim_header *header)

   /* how many samplers? */
   /* we'll use sampler/texture[pstip->sampler_unit] for the stipple */
-   num_samplers = MAX2(pstip->num_samplers, pstip->fs->sampler_unit + 1);
-   num_sampler_views = MAX2(pstip->num_sampler_views, num_samplers);
+   num_samplers = MAX2(pstip->num_sampler_views, pstip->num_samplers);
+   num_samplers = MAX2(num_samplers, pstip->fs->sampler_unit + 1);

   /* plug in our sampler, texture */
   pstip->state.samplers[pstip->fs->sampler_unit] = pstip->sampler_cso;
@@ -215,7 +506,7 @@ pstip_first_tri(struct draw_stage *stage, struct prim_header *header)
                                     num_samplers, pstip->state.samplers);

   pstip->driver_set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0,
-                                   num_sampler_views, pstip->state.sampler_views);
+                                   num_samplers, pstip->state.sampler_views);

   draw->suspend_flushing = FALSE;

@@ -286,7 +577,7 @@ static struct pstip_stage *
 draw_pstip_stage(struct draw_context *draw, struct pipe_context *pipe)
 {
   struct pstip_stage *pstip = CALLOC_STRUCT(pstip_stage);
-   if (!pstip)
+   if (pstip == NULL)
      goto fail;

   pstip->pipe = pipe;
@@ -430,8 +721,7 @@ pstip_set_polygon_stipple(struct pipe_context *pipe,
   /* pass-through */
   pstip->driver_set_polygon_stipple(pstip->pipe, stipple);

-   util_pstipple_update_stipple_texture(pstip->pipe, pstip->texture,
-                                        pstip->state.stipple->stipple);
+   pstip_update_texture(pstip);
 }


@@ -452,7 +742,7 @@ draw_install_pstipple_stage(struct draw_context *draw,
    * Create / install pgon stipple drawing / prim stage
    */
   pstip = draw_pstip_stage( draw, pipe );
-   if (!pstip)
+   if (pstip == NULL)
      goto fail;

   draw->pipeline.pstipple = &pstip->stage;
@@ -467,17 +757,10 @@ draw_install_pstipple_stage(struct draw_context *draw,
   pstip->driver_set_polygon_stipple = pipe->set_polygon_stipple;

   /* create special texture, sampler state */
-   pstip->texture = util_pstipple_create_stipple_texture(pipe, NULL);
-   if (!pstip->texture)
+   if (!pstip_create_texture(pstip))
      goto fail;

-   pstip->sampler_view = util_pstipple_create_sampler_view(pipe,
-                                                           pstip->texture);
-   if (!pstip->sampler_view)
-      goto fail;
-
-   pstip->sampler_cso = util_pstipple_create_sampler(pipe);
-   if (!pstip->sampler_cso)
+   if (!pstip_create_sampler(pstip))
      goto fail;

   /* override the driver's functions */
--- a/src/gallium/auxiliary/draw/draw_pipe_stipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c
@@ -235,7 +235,7 @@ stipple_destroy( struct draw_stage *stage )
 struct draw_stage *draw_stipple_stage( struct draw_context *draw )
 {
   struct stipple_stage *stipple = CALLOC_STRUCT(stipple_stage);
-   if (!stipple)
+   if (stipple == NULL)
      goto fail;

   stipple->stage.draw = draw;
--- a/src/gallium/auxiliary/draw/draw_pipe_twoside.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_twoside.c
@@ -165,7 +165,7 @@ static void twoside_destroy( struct draw_stage *stage )
 struct draw_stage *draw_twoside_stage( struct draw_context *draw )
 {
   struct twoside_stage *twoside = CALLOC_STRUCT(twoside_stage);
-   if (!twoside)
+   if (twoside == NULL)
      goto fail;

   twoside->stage.draw = draw;
--- a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
@@ -86,33 +86,27 @@ inject_front_face_info(struct draw_stage *stage,
 }

   
-static void point(struct draw_stage *stage,
-                  struct prim_header *header,
-                  struct vertex_header *v0)
+static void point( struct draw_stage *stage,
+		   struct vertex_header *v0 )
 {
   struct prim_header tmp;
-   tmp.det = header->det;
-   tmp.flags = 0;
   tmp.v[0] = v0;
-   stage->next->point(stage->next, &tmp);
+   stage->next->point( stage->next, &tmp );
 }

-static void line(struct draw_stage *stage,
-                 struct prim_header *header,
-                 struct vertex_header *v0,
-                 struct vertex_header *v1)
+static void line( struct draw_stage *stage,
+		  struct vertex_header *v0,
+		  struct vertex_header *v1 )
 {
   struct prim_header tmp;
-   tmp.det = header->det;
-   tmp.flags = 0;
   tmp.v[0] = v0;
   tmp.v[1] = v1;
-   stage->next->line(stage->next, &tmp);
+   stage->next->line( stage->next, &tmp );
 }


-static void points(struct draw_stage *stage,
-                   struct prim_header *header)
+static void points( struct draw_stage *stage,
+		    struct prim_header *header )
 {
   struct vertex_header *v0 = header->v[0];
   struct vertex_header *v1 = header->v[1];
@@ -120,41 +114,27 @@ static void points(struct draw_stage *stage,

   inject_front_face_info(stage, header);

-   if ((header->flags & DRAW_PIPE_EDGE_FLAG_0) && v0->edgeflag)
-      point(stage, header, v0);
-   if ((header->flags & DRAW_PIPE_EDGE_FLAG_1) && v1->edgeflag)
-      point(stage, header, v1);
-   if ((header->flags & DRAW_PIPE_EDGE_FLAG_2) && v2->edgeflag)
-      point(stage, header, v2);
+   if ((header->flags & DRAW_PIPE_EDGE_FLAG_0) && v0->edgeflag) point( stage, v0 );
+   if ((header->flags & DRAW_PIPE_EDGE_FLAG_1) && v1->edgeflag) point( stage, v1 );
+   if ((header->flags & DRAW_PIPE_EDGE_FLAG_2) && v2->edgeflag) point( stage, v2 );
 }


-static void lines(struct draw_stage *stage,
-                  struct prim_header *header)
+static void lines( struct draw_stage *stage,
+		   struct prim_header *header )
 {
   struct vertex_header *v0 = header->v[0];
   struct vertex_header *v1 = header->v[1];
   struct vertex_header *v2 = header->v[2];

   if (header->flags & DRAW_PIPE_RESET_STIPPLE)
-      /*
-       * XXX could revisit this. The only stage which cares is the line
-       * stipple stage. Could just emit correct reset flags here and not
-       * bother about all the calling through reset_stipple_counter
-       * stages. Though technically it is necessary if line stipple is
-       * handled by the driver, but this is not actually hooked up when
-       * using vbuf (vbuf stage reset_stipple_counter does nothing).
-       */
-      stage->next->reset_stipple_counter(stage->next);
+      stage->next->reset_stipple_counter( stage->next );

   inject_front_face_info(stage, header);

-   if ((header->flags & DRAW_PIPE_EDGE_FLAG_2) && v2->edgeflag)
-      line(stage, header, v2, v0);
-   if ((header->flags & DRAW_PIPE_EDGE_FLAG_0) && v0->edgeflag)
-      line(stage, header, v0, v1);
-   if ((header->flags & DRAW_PIPE_EDGE_FLAG_1) && v1->edgeflag)
-      line(stage, header, v1, v2);
+   if ((header->flags & DRAW_PIPE_EDGE_FLAG_2) && v2->edgeflag) line( stage, v2, v0 );
+   if ((header->flags & DRAW_PIPE_EDGE_FLAG_0) && v0->edgeflag) line( stage, v0, v1 );
+   if ((header->flags & DRAW_PIPE_EDGE_FLAG_1) && v1->edgeflag) line( stage, v1, v2 );
 }


@@ -275,7 +255,7 @@ draw_unfilled_prepare_outputs( struct draw_context *draw,
 struct draw_stage *draw_unfilled_stage( struct draw_context *draw )
 {
   struct unfilled_stage *unfilled = CALLOC_STRUCT(unfilled_stage);
-   if (!unfilled)
+   if (unfilled == NULL)
      goto fail;

   unfilled->stage.draw = draw;
--- a/src/gallium/auxiliary/draw/draw_pipe_util.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_util.c
@@ -78,7 +78,7 @@ boolean draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr )
      unsigned i;
      ubyte *store = (ubyte *) MALLOC( MAX_VERTEX_SIZE * nr );

-      if (!store)
+      if (store == NULL)
         return FALSE;

      stage->tmp = (struct vertex_header **) MALLOC( sizeof(struct vertex_header *) * nr );
--- a/src/gallium/auxiliary/draw/draw_pipe_validate.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_validate.c
@@ -326,7 +326,7 @@ static void validate_destroy( struct draw_stage *stage )
 struct draw_stage *draw_validate_stage( struct draw_context *draw )
 {
   struct draw_stage *stage = CALLOC_STRUCT(draw_stage);
-   if (!stage)
+   if (stage == NULL)
      return NULL;

   stage->draw = draw;
--- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
@@ -74,10 +74,9 @@ struct vbuf_stage {
   unsigned max_indices;
   unsigned nr_indices;

-   /* Cache point size somewhere its address won't change:
+   /* Cache point size somewhere it's address won't change:
    */
   float point_size;
-   float zero4[4];

   struct translate_cache *cache;
 };
@@ -206,7 +205,6 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim )
   struct translate_key hw_key;
   unsigned dst_offset;
   unsigned i;
-   const struct vertex_info *vinfo;

   vbuf->render->set_primitive(vbuf->render, prim);

@@ -217,33 +215,27 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim )
    * state change.
    */
   vbuf->vinfo = vbuf->render->get_vertex_info(vbuf->render);
-   vinfo = vbuf->vinfo;
-   vbuf->vertex_size = vinfo->size * sizeof(float);
+   vbuf->vertex_size = vbuf->vinfo->size * sizeof(float);

   /* Translate from pipeline vertices to hw vertices.
    */
   dst_offset = 0;

-   for (i = 0; i < vinfo->num_attribs; i++) {
+   for (i = 0; i < vbuf->vinfo->num_attribs; i++) {
      unsigned emit_sz = 0;
      unsigned src_buffer = 0;
      enum pipe_format output_format;
-      unsigned src_offset = (vinfo->attrib[i].src_index * 4 * sizeof(float) );
+      unsigned src_offset = (vbuf->vinfo->attrib[i].src_index * 4 * sizeof(float) );

-      output_format = draw_translate_vinfo_format(vinfo->attrib[i].emit);
-      emit_sz = draw_translate_vinfo_size(vinfo->attrib[i].emit);
+      output_format = draw_translate_vinfo_format(vbuf->vinfo->attrib[i].emit);
+      emit_sz = draw_translate_vinfo_size(vbuf->vinfo->attrib[i].emit);

      /* doesn't handle EMIT_OMIT */
      assert(emit_sz != 0);

-      if (vinfo->attrib[i].emit == EMIT_1F_PSIZE) {
-         src_buffer = 1;
-         src_offset = 0;
-      }
-      else if (vinfo->attrib[i].src_index == DRAW_ATTR_NONEXIST) {
-         /* elements which don't exist will get assigned zeros */
-         src_buffer = 2;
-         src_offset = 0;
+      if (vbuf->vinfo->attrib[i].emit == EMIT_1F_PSIZE) {
+	 src_buffer = 1;
+	 src_offset = 0;
      }

      hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL;
@@ -257,7 +249,7 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim )
      dst_offset += emit_sz;
   }

-   hw_key.nr_elements = vinfo->num_attribs;
+   hw_key.nr_elements = vbuf->vinfo->num_attribs;
   hw_key.output_stride = vbuf->vertex_size;

   /* Don't bother with caching at this stage:
@@ -269,7 +261,6 @@ vbuf_start_prim( struct vbuf_stage *vbuf, uint prim )
      vbuf->translate = translate_cache_find(vbuf->cache, &hw_key);

      vbuf->translate->set_buffer(vbuf->translate, 1, &vbuf->point_size, 0, ~0);
-      vbuf->translate->set_buffer(vbuf->translate, 2, &vbuf->zero4[0], 0, ~0);
   }

   vbuf->point_size = vbuf->stage.draw->rasterizer->point_size;
@@ -435,9 +426,9 @@ struct draw_stage *draw_vbuf_stage( struct draw_context *draw,
                                    struct vbuf_render *render )
 {
   struct vbuf_stage *vbuf = CALLOC_STRUCT(vbuf_stage);
-   if (!vbuf)
+   if (vbuf == NULL)
      goto fail;
-
+   
   vbuf->stage.draw = draw;
   vbuf->stage.name = "vbuf";
   vbuf->stage.point = vbuf_first_point;
@@ -446,30 +437,29 @@ struct draw_stage *draw_vbuf_stage( struct draw_context *draw,
   vbuf->stage.flush = vbuf_flush;
   vbuf->stage.reset_stipple_counter = vbuf_reset_stipple_counter;
   vbuf->stage.destroy = vbuf_destroy;
-
+   
   vbuf->render = render;
   vbuf->max_indices = MIN2(render->max_indices, UNDEFINED_VERTEX_ID-1);

-   vbuf->indices = (ushort *) align_malloc(vbuf->max_indices *
-                    sizeof(vbuf->indices[0]),
-                    16);
+   vbuf->indices = (ushort *) align_malloc( vbuf->max_indices * 
+					    sizeof(vbuf->indices[0]), 
+					    16 );
   if (!vbuf->indices)
      goto fail;

   vbuf->cache = translate_cache_create();
-   if (!vbuf->cache)
+   if (!vbuf->cache) 
      goto fail;
-
+      
+   
   vbuf->vertices = NULL;
   vbuf->vertex_ptr = vbuf->vertices;
-
-   vbuf->zero4[0] = vbuf->zero4[1] = vbuf->zero4[2] = vbuf->zero4[3] = 0.0f;
-
+   
   return &vbuf->stage;

-fail:
+ fail:
   if (vbuf)
      vbuf_destroy(&vbuf->stage);
-
+   
   return NULL;
 }
--- a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
@@ -202,7 +202,7 @@ static void wideline_destroy( struct draw_stage *stage )
 struct draw_stage *draw_wide_line_stage( struct draw_context *draw )
 {
   struct wideline_stage *wide = CALLOC_STRUCT(wideline_stage);
-   if (!wide)
+   if (wide == NULL)
      goto fail;

   wide->stage.draw = draw;
--- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
@@ -315,7 +315,7 @@ static void widepoint_destroy( struct draw_stage *stage )
 struct draw_stage *draw_wide_point_stage( struct draw_context *draw )
 {
   struct widepoint_stage *wide = CALLOC_STRUCT(widepoint_stage);
-   if (!wide)
+   if (wide == NULL)
      goto fail;

   wide->stage.draw = draw;
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -86,10 +86,11 @@ struct draw_vertex_buffer {
 struct vertex_header {
   unsigned clipmask:DRAW_TOTAL_CLIP_PLANES;
   unsigned edgeflag:1;
-   unsigned pad:1;
+   unsigned have_clipdist:1;
   unsigned vertex_id:16;

-   float clip_pos[4];
+   float clip[4];
+   float pre_clip_pos[4];

   /* This will probably become float (*data)[4] soon:
    */
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -109,7 +109,7 @@ draw_pt_arrays(struct draw_context *draw,

   frontend = draw->pt.frontend;

-   if (frontend) {
+   if (frontend ) {
      if (draw->pt.prim != prim || draw->pt.opt != opt) {
         /* In certain conditions switching primitives requires us to flush
          * and validate the different stages. One example is when smooth
@@ -524,7 +524,7 @@ draw_vbo(struct draw_context *draw,
 #endif
   {
      if (index_limit == 0) {
-         /* one of the buffers is too small to do any valid drawing */
+      /* one of the buffers is too small to do any valid drawing */
         debug_warning("draw: VBO too small to draw anything\n");
         util_fpstate_set(fpstate);
         return;
--- a/src/gallium/auxiliary/draw/draw_pt_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_emit.c
@@ -44,9 +44,6 @@ struct pt_emit {
   unsigned prim;

   const struct vertex_info *vinfo;
-
-   float zero4[4];
-
 };


@@ -63,7 +60,7 @@ draw_pt_emit_prepare(struct pt_emit *emit,

   /* XXX: need to flush to get prim_vbuf.c to release its allocation??
    */
-   draw_do_flush(draw, DRAW_FLUSH_BACKEND);
+   draw_do_flush( draw, DRAW_FLUSH_BACKEND );

   /* XXX: may need to defensively reset this later on as clipping can
    * clobber this state in the render backend.
@@ -83,7 +80,7 @@ draw_pt_emit_prepare(struct pt_emit *emit,
      unsigned emit_sz = 0;
      unsigned src_buffer = 0;
      unsigned output_format;
-      unsigned src_offset = vinfo->attrib[i].src_index * 4 * sizeof(float);
+      unsigned src_offset = (vinfo->attrib[i].src_index * 4 * sizeof(float) );

      output_format = draw_translate_vinfo_format(vinfo->attrib[i].emit);
      emit_sz = draw_translate_vinfo_size(vinfo->attrib[i].emit);
@@ -92,13 +89,8 @@ draw_pt_emit_prepare(struct pt_emit *emit,
      assert(emit_sz != 0);

      if (vinfo->attrib[i].emit == EMIT_1F_PSIZE) {
-         src_buffer = 1;
-         src_offset = 0;
-      }
-      else if (vinfo->attrib[i].src_index == DRAW_ATTR_NONEXIST) {
-         /* elements which don't exist will get assigned zeros */
-         src_buffer = 2;
-         src_offset = 0;
+	 src_buffer = 1;
+	 src_offset = 0;
      }

      hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL;
@@ -119,8 +111,6 @@ draw_pt_emit_prepare(struct pt_emit *emit,
       translate_key_compare(&emit->translate->key, &hw_key) != 0) {
      translate_key_sanitize(&hw_key);
      emit->translate = translate_cache_find(emit->cache, &hw_key);
-
-      emit->translate->set_buffer(emit->translate, 2, &emit->zero4[0], 0, ~0);
   }

   if (!vinfo->size)
@@ -148,7 +138,7 @@ draw_pt_emit(struct pt_emit *emit,

   /* XXX: need to flush to get prim_vbuf.c to release its allocation??
    */
-   draw_do_flush(draw, DRAW_FLUSH_BACKEND);
+   draw_do_flush( draw, DRAW_FLUSH_BACKEND );

   if (vertex_count == 0)
      return;
@@ -162,31 +152,31 @@ draw_pt_emit(struct pt_emit *emit,
                             (ushort)translate->key.output_stride,
                             (ushort)vertex_count);

-   hw_verts = render->map_vertices(render);
+   hw_verts = render->map_vertices( render );
   if (!hw_verts) {
      debug_warn_once("map of vertex buffer failed (out of memory?)");
      return;
   }

   translate->set_buffer(translate,
-                         0,
-                         vertex_data,
-                         stride,
-                         ~0);
+			 0,
+			 vertex_data,
+			 stride,
+			 ~0);

   translate->set_buffer(translate,
-                         1,
-                         &draw->rasterizer->point_size,
-                         0,
-                         ~0);
+			 1,
+			 &draw->rasterizer->point_size,
+			 0,
+			 ~0);

   /* fetch/translate vertex attribs to fill hw_verts[] */
   translate->run(translate,
-                  0,
-                  vertex_count,
-                  0,
-                  0,
-                  hw_verts);
+		  0,
+		  vertex_count,
+                  draw->start_instance,
+                  draw->instance_id,
+		  hw_verts );

   render->unmap_vertices(render, 0, vertex_count - 1);

@@ -222,7 +212,7 @@ draw_pt_emit_linear(struct pt_emit *emit,
 #endif
   /* XXX: need to flush to get prim_vbuf.c to release its allocation??
    */
-   draw_do_flush(draw, DRAW_FLUSH_BACKEND);
+   draw_do_flush( draw, DRAW_FLUSH_BACKEND );

   /* XXX: and work out some way to coordinate the render primitive
    * between vbuf.c and here...
@@ -234,35 +224,35 @@ draw_pt_emit_linear(struct pt_emit *emit,
                                  (ushort)count))
      goto fail;

-   hw_verts = render->map_vertices(render);
+   hw_verts = render->map_vertices( render );
   if (!hw_verts)
      goto fail;

   translate->set_buffer(translate, 0,
-                         vertex_data, stride, count - 1);
+			 vertex_data, stride, count - 1);

   translate->set_buffer(translate, 1,
-                         &draw->rasterizer->point_size,
-                         0, ~0);
+			 &draw->rasterizer->point_size,
+			 0, ~0);

   translate->run(translate,
                  0,
                  count,
-                  0,
-                  0,
+                  draw->start_instance,
+                  draw->instance_id,
                  hw_verts);

   if (0) {
      unsigned i;
      for (i = 0; i < count; i++) {
         debug_printf("\n\n%s vertex %d:\n", __FUNCTION__, i);
-         draw_dump_emitted_vertex(emit->vinfo,
-                                  (const uint8_t *)hw_verts +
-                                  translate->key.output_stride * i);
+         draw_dump_emitted_vertex( emit->vinfo,
+                                   (const uint8_t *)hw_verts +
+                                   translate->key.output_stride * i );
      }
   }

-   render->unmap_vertices(render, 0, count - 1);
+   render->unmap_vertices( render, 0, count - 1 );

   for (start = i = 0;
        i < prim_info->primitive_count;
@@ -272,7 +262,7 @@ draw_pt_emit_linear(struct pt_emit *emit,
                          start,
                          prim_info->primitive_lengths[i]);
   }
-
+   
   render->release_vertices(render);

   return;
@@ -297,8 +287,6 @@ draw_pt_emit_create(struct draw_context *draw)
      return NULL;
   }

-   emit->zero4[0] = emit->zero4[1] = emit->zero4[2] = emit->zero4[3] = 0.0f;
-
   return emit;
 }

--- a/src/gallium/auxiliary/draw/draw_pt_fetch.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c
@@ -71,10 +71,12 @@ draw_pt_fetch_prepare(struct pt_fetch *fetch,

   fetch->vertex_size = vertex_size;

-   /* Leave the clipmask/edgeflags/pad/vertex_id,
-    * clip[] and whatever else in the header untouched.
+   /* Leave the clipmask/edgeflags/pad/vertex_id untouched
    */
-   dst_offset = offsetof(struct vertex_header, data);
+   dst_offset += 1 * sizeof(float);
+   /* Just leave the clip[] and pre_clip_pos[] array untouched.
+    */
+   dst_offset += 8 * sizeof(float);

   if (instance_id_index != ~0) {
      num_extra_inputs++;
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
@@ -376,7 +376,7 @@ static void fetch_emit_destroy( struct draw_pt_middle_end *middle )
 struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw )
 {
   struct fetch_emit_middle_end *fetch_emit = CALLOC_STRUCT( fetch_emit_middle_end );
-   if (!fetch_emit)
+   if (fetch_emit == NULL)
      return NULL;

   fetch_emit->cache = translate_cache_create();
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
@@ -453,7 +453,6 @@ llvm_pipeline_generic(struct draw_pt_middle_end *middle,
                               draw->vs.vertex_shader->info.writes_viewport_index)) {
         clipped = draw_pt_post_vs_run( fpme->post_vs, vert_info, prim_info );
      }
-      /* "clipped" also includes non-one edgeflag */
      if (clipped) {
         opt |= PT_PIPELINE;
      }
--- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c
+++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
@@ -58,7 +58,7 @@ initialize_vertex_header(struct vertex_header *header)
 {
   header->clipmask = 0;
   header->edgeflag = 1;
-   header->pad = 0;
+   header->have_clipdist = 0;
   header->vertex_id = UNDEFINED_VERTEX_ID;
 }

--- a/src/gallium/auxiliary/draw/draw_pt_so_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_so_emit.c
@@ -275,7 +275,7 @@ void draw_pt_so_emit( struct pt_so_emit *emit,
   emit->generated_primitives = 0;
   emit->input_vertex_stride = input_verts->stride;
   if (emit->use_pre_clip_pos)
-      emit->pre_clip_pos = input_verts->verts->clip_pos;
+      emit->pre_clip_pos = input_verts->verts->pre_clip_pos;

   emit->inputs = (const float (*)[4])input_verts->verts->data;

--- a/src/gallium/auxiliary/draw/draw_vertex.h
+++ b/src/gallium/auxiliary/draw/draw_vertex.h
@@ -44,7 +44,6 @@
 #include "util/u_debug.h"
 #include "util/u_memory.h"

-#define DRAW_ATTR_NONEXIST 255

 /**
 * Vertex attribute emit modes
@@ -61,6 +60,18 @@ enum attrib_emit {
 };


+/**
+ * Attribute interpolation mode
+ */
+enum interp_mode {
+   INTERP_NONE,      /**< never interpolate vertex header info */
+   INTERP_POS,       /**< special case for frag position */
+   INTERP_CONSTANT,
+   INTERP_LINEAR,
+   INTERP_PERSPECTIVE
+};
+
+
 /**
 * Information about hardware/rasterization vertex layout.
 */
@@ -74,7 +85,8 @@ struct vertex_info
    * memcmp() comparisons.
    */
   struct {
-      unsigned emit:8;             /**< EMIT_x */
+      unsigned interp_mode:4;      /**< INTERP_x */
+      unsigned emit:4;             /**< EMIT_x */
      unsigned src_index:8;          /**< map to post-xform attribs */
   } attrib[PIPE_MAX_SHADER_OUTPUTS];
 };
@@ -112,18 +124,20 @@ draw_vinfo_copy( struct vertex_info *dst,
 static inline uint
 draw_emit_vertex_attr(struct vertex_info *vinfo,
                      enum attrib_emit emit, 
+                      enum interp_mode interp, /* only used by softpipe??? */
                      int src_index)
 {
   const uint n = vinfo->num_attribs;

   /* If the src_index is negative, meaning it hasn't been found
-    * we'll assign it all zeros later - set to DRAW_ATTR_NONEXIST */
+    * lets just redirect it to the first output slot */
   if (src_index < 0) {
-      src_index = DRAW_ATTR_NONEXIST;
+      src_index = 0;
   }

   assert(n < Elements(vinfo->attrib));
   vinfo->attrib[n].emit = emit;
+   vinfo->attrib[n].interp_mode = interp;
   vinfo->attrib[n].src_index = src_index;
   vinfo->num_attribs++;
   return n;
--- a/src/gallium/auxiliary/draw/draw_vs.c
+++ b/src/gallium/auxiliary/draw/draw_vs.c
@@ -200,7 +200,7 @@ draw_vs_lookup_variant( struct draw_vertex_shader *vs,
   /* Else have to create a new one: 
    */
   variant = vs->create_variant( vs, key );
-   if (!variant)
+   if (variant == NULL)
      return NULL;

   /* Add it to our list, could be smarter: 
--- a/src/gallium/auxiliary/draw/draw_vs_exec.c
+++ b/src/gallium/auxiliary/draw/draw_vs_exec.c
@@ -225,7 +225,7 @@ draw_create_vs_exec(struct draw_context *draw,
 {
   struct exec_vertex_shader *vs = CALLOC_STRUCT( exec_vertex_shader );

-   if (!vs)
+   if (vs == NULL) 
      return NULL;

   /* we make a private copy of the tokens */
--- a/src/gallium/auxiliary/draw/draw_vs_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c
@@ -86,7 +86,7 @@ draw_create_vs_llvm(struct draw_context *draw,
 {
   struct llvm_vertex_shader *vs = CALLOC_STRUCT( llvm_vertex_shader );

-   if (!vs)
+   if (vs == NULL)
      return NULL;

   /* we make a private copy of the tokens */
--- a/src/gallium/auxiliary/draw/draw_vs_variant.c
+++ b/src/gallium/auxiliary/draw/draw_vs_variant.c
@@ -302,7 +302,7 @@ draw_vs_create_variant_generic( struct draw_vertex_shader *vs,
   struct translate_key fetch, emit;

   struct draw_vs_variant_generic *vsvg = CALLOC_STRUCT( draw_vs_variant_generic );
-   if (!vsvg)
+   if (vsvg == NULL)
      return NULL;

   vsvg->base.key = *key;
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -194,7 +194,7 @@ lp_build_min_simple(struct lp_build_context *bld,
      }
   }

-   if (intrinsic) {
+   if(intrinsic) {
      /* We need to handle nan's for floating point numbers. If one of the
       * inputs is nan the other should be returned (required by both D3D10+
       * and OpenCL).
@@ -376,7 +376,7 @@ lp_build_max_simple(struct lp_build_context *bld,
     }
   }

-   if (intrinsic) {
+   if(intrinsic) {
      if (util_cpu_caps.has_sse && type.floating &&
          nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED &&
          nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN &&
@@ -518,7 +518,7 @@ lp_build_add(struct lp_build_context *bld,
         }
      }
   
-      if (intrinsic)
+      if(intrinsic)
         return lp_build_intrinsic_binary(builder, intrinsic, lp_build_vec_type(bld->gallivm, bld->type), a, b);
   }

@@ -810,7 +810,7 @@ lp_build_sub(struct lp_build_context *bld,
         }
      }
   
-      if (intrinsic)
+      if(intrinsic)
         return lp_build_intrinsic_binary(builder, intrinsic, lp_build_vec_type(bld->gallivm, bld->type), a, b);
   }

@@ -3287,7 +3287,7 @@ lp_build_log2_approx(struct lp_build_context *bld,
      logexp = LLVMBuildSIToFP(builder, logexp, vec_type, "");
   }

-   if (p_log2) {
+   if(p_log2) {
      /* mant = 1 + (float) mantissa(x) */
      mant = LLVMBuildAnd(builder, i, mantmask, "");
      mant = LLVMBuildOr(builder, mant, one, "");
@@ -3335,15 +3335,15 @@ lp_build_log2_approx(struct lp_build_context *bld,
      }
   }

-   if (p_exp) {
+   if(p_exp) {
      exp = LLVMBuildBitCast(builder, exp, vec_type, "");
      *p_exp = exp;
   }

-   if (p_floor_log2)
+   if(p_floor_log2)
      *p_floor_log2 = logexp;

-   if (p_log2)
+   if(p_log2)
      *p_log2 = res;
 }

--- a/src/gallium/auxiliary/gallivm/lp_bld_const.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_const.c
@@ -373,7 +373,7 @@ lp_build_const_aos(struct gallivm_state *gallivm,

   lp_build_elem_type(gallivm, type);

-   if (!swizzle)
+   if(swizzle == NULL)
      swizzle = default_swizzle;

   elems[swizzle[0]] = lp_build_const_elem(gallivm, type, r);
--- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
@@ -130,7 +130,6 @@ lp_build_half_to_float(struct gallivm_state *gallivm,
 *
 * Convert float32 to half floats, preserving Infs and NaNs,
 * with rounding towards zero (trunc).
- * XXX: For GL, would prefer rounding towards nearest(-even).
 */
 LLVMValueRef
 lp_build_float_to_half(struct gallivm_state *gallivm,
@@ -144,15 +143,6 @@ lp_build_float_to_half(struct gallivm_state *gallivm,
   struct lp_type i16_type = lp_type_int_vec(16, 16 * length);
   LLVMValueRef result;

-   /*
-    * Note: Newer llvm versions (3.6 or so) support fptrunc to 16 bits
-    * directly, without any (x86 or generic) intrinsics.
-    * Albeit the rounding mode cannot be specified (and is undefined,
-    * though in practice on x86 seems to do nearest-even but it may
-    * be dependent on instruction set support), so is essentially
-    * useless.
-    */
-
   if (util_cpu_caps.has_f16c &&
       (length == 4 || length == 8)) {
      struct lp_type i168_type = lp_type_int_vec(16, 16 * 8);
@@ -197,11 +187,7 @@ lp_build_float_to_half(struct gallivm_state *gallivm,
        LLVMValueRef index = LLVMConstInt(i32t, i, 0);
        LLVMValueRef f32 = LLVMBuildExtractElement(builder, src, index, "");
 #if 0
-        /*
-         * XXX: not really supported by backends.
-         * Even if they would now, rounding mode cannot be specified and
-         * is undefined.
-         */
+        /* XXX: not really supported by backends */
        LLVMValueRef f16 = lp_build_intrinsic_unary(builder, "llvm.convert.to.fp16", i16t, f32);
 #else
        LLVMValueRef f16 = LLVMBuildCall(builder, func, &f32, 1, "");
@@ -472,7 +458,7 @@ int lp_build_conv_auto(struct gallivm_state *gallivm,
   {
      /* Special case 4x4f --> 1x16ub */
      if (src_type.length == 4 &&
-            (util_cpu_caps.has_sse2 || util_cpu_caps.has_altivec))
+          util_cpu_caps.has_sse2)
      {
         num_dsts = (num_srcs + 3) / 4;
         dst_type->length = num_srcs * 4 >= 16 ? 16 : num_srcs * 4;
@@ -559,7 +545,7 @@ lp_build_conv(struct gallivm_state *gallivm,
       ((dst_type.length == 16 && 4 * num_dsts == num_srcs) ||
        (num_dsts == 1 && dst_type.length * num_srcs == 16 && num_srcs != 3)) &&

-       (util_cpu_caps.has_sse2 || util_cpu_caps.has_altivec))
+       util_cpu_caps.has_sse2)
   {
      struct lp_build_context bld;
      struct lp_type int16_type, int32_type;
--- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
@@ -26,9 +26,6 @@
 **************************************************************************/

 #include <stddef.h>
-#include <fstream>
-#include <sstream>
-#include <iomanip>

 #include <llvm-c/Core.h>
 #include <llvm-c/Disassembler.h>
@@ -128,7 +125,7 @@ lp_debug_dump_value(LLVMValueRef value)
 * - http://blog.llvm.org/2010/04/intro-to-llvm-mc-project.html
 */
 static size_t
-disassemble(const void* func, std::stringstream &buffer)
+disassemble(const void* func)
 {
   const uint8_t *bytes = (const uint8_t *)func;

@@ -146,8 +143,8 @@ disassemble(const void* func, std::stringstream &buffer)
   char outline[1024];

   if (!D) {
-      buffer << "error: could not create disassembler for triple "
-             << Triple.c_str() << '\n';
+      _debug_printf("error: couldn't create disassembler for triple %s\n",
+                    Triple.c_str());
      return 0;
   }

@@ -161,13 +158,13 @@ disassemble(const void* func, std::stringstream &buffer)
       * so that between runs.
       */

-      buffer << std::setw(6) << (unsigned long)pc << ":\t";
+      _debug_printf("%6lu:\t", (unsigned long)pc);

      Size = LLVMDisasmInstruction(D, (uint8_t *)bytes + pc, extent - pc, 0, outline,
                                   sizeof outline);

      if (!Size) {
-         buffer << "invalid\n";
+         _debug_printf("invalid\n");
         pc += 1;
         break;
      }
@@ -179,11 +176,10 @@ disassemble(const void* func, std::stringstream &buffer)
      if (0) {
         unsigned i;
         for (i = 0; i < Size; ++i) {
-            buffer << std::hex << std::setfill('0') << std::setw(2)
-                   << static_cast<int> (bytes[pc + i]);
+            _debug_printf("%02x ", bytes[pc + i]);
         }
         for (; i < 16; ++i) {
-            buffer << std::dec << "   ";
+            _debug_printf("   ");
         }
      }

@@ -191,7 +187,9 @@ disassemble(const void* func, std::stringstream &buffer)
       * Print the instruction.
       */

-      buffer << std::setw(Size) << outline << '\n';
+      _debug_printf("%*s", Size, outline);
+
+      _debug_printf("\n");

      /*
       * Stop disassembling on return statements, if there is no record of a
@@ -200,11 +198,9 @@ disassemble(const void* func, std::stringstream &buffer)
       * XXX: This currently assumes x86
       */

-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
      if (Size == 1 && bytes[pc] == 0xc3) {
         break;
      }
-#endif

      /*
       * Advance.
@@ -213,12 +209,12 @@ disassemble(const void* func, std::stringstream &buffer)
      pc += Size;

      if (pc >= extent) {
-         buffer << "disassembly larger than " << extent << " bytes, aborting\n";
+         _debug_printf("disassembly larger than %ull bytes, aborting\n", extent);
         break;
      }
   }

-   buffer << '\n';
+   _debug_printf("\n");

   LLVMDisasmDispose(D);

@@ -226,8 +222,7 @@ disassemble(const void* func, std::stringstream &buffer)
    * Print GDB command, useful to verify output.
    */
   if (0) {
-      buffer << "disassemble " << static_cast<const void*>(bytes) << ' '
-             << static_cast<const void*>(bytes + pc) << '\n';
+      _debug_printf("disassemble %p %p\n", bytes, bytes + pc);
   }

   return pc;
@@ -236,14 +231,8 @@ disassemble(const void* func, std::stringstream &buffer)

 extern "C" void
 lp_disassemble(LLVMValueRef func, const void *code) {
-   std::stringstream buffer;
-   std::string s;
-
-   buffer << LLVMGetValueName(func) << ":\n";
-   disassemble(code, buffer);
-   s = buffer.str();
-   _debug_printf("%s", s.c_str());
-   _debug_printf("\n");
+   _debug_printf("%s:\n", LLVMGetValueName(func));
+   disassemble(code);
 }


@@ -259,10 +248,9 @@ extern "C" void
 lp_profile(LLVMValueRef func, const void *code)
 {
 #if defined(__linux__) && defined(PROFILE)
-   std::stringstream buffer;
-   static std::ofstream perf_asm_file;
   static boolean first_time = TRUE;
   static FILE *perf_map_file = NULL;
+   static int perf_asm_fd = -1;
   if (first_time) {
      /*
       * We rely on the disassembler for determining a function's size, but
@@ -276,16 +264,17 @@ lp_profile(LLVMValueRef func, const void *code)
         util_snprintf(filename, sizeof filename, "/tmp/perf-%llu.map", (unsigned long long)pid);
         perf_map_file = fopen(filename, "wt");
         util_snprintf(filename, sizeof filename, "/tmp/perf-%llu.map.asm", (unsigned long long)pid);
-         perf_asm_file.open(filename);
+         mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
+         perf_asm_fd = open(filename, O_WRONLY | O_CREAT, mode);
      }
      first_time = FALSE;
   }
   if (perf_map_file) {
      const char *symbol = LLVMGetValueName(func);
      unsigned long addr = (uintptr_t)code;
-      buffer << symbol << ":\n";
-      unsigned long size = disassemble(code, buffer);
-      perf_asm_file << buffer.rdbuf() << std::flush;
+      llvm::raw_fd_ostream Out(perf_asm_fd, false);
+      Out << symbol << ":\n";
+      unsigned long size = disassemble(code);
      fprintf(perf_map_file, "%lx %lx %s\n", addr, size, symbol);
      fflush(perf_map_file);
   }
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -118,10 +118,8 @@ create_pass_manager(struct gallivm_state *gallivm)
    * simple, or constant propagation into them, etc.
    */

-#if HAVE_LLVM < 0x0309
   // Old versions of LLVM get the DataLayout from the pass manager.
   LLVMAddTargetData(gallivm->target, gallivm->passmgr);
-#endif

   /* Setting the module's DataLayout to an empty string will cause the
    * ExecutionEngine to copy to the DataLayout string from its target
--- a/Show More
+++ b/Show More