android: enlarge cache_resources

android: get rid of HAL pixelformats 5551 and 4444
android: set driver name to debug.mesa.driver
2014-06-05 10:51:43 +08:00 · 2014-01-23 09:24:32 +08:00 · 2014-01-23 09:21:39 +08:00 · 2013-07-24 01:16:55 +08:00 · 2013-07-24 01:16:54 +08:00 · 2013-07-24 01:16:52 +08:00
840 changed files with 38835 additions and 56783 deletions
--- a/Android.common.mk
+++ b/Android.common.mk
@@ -35,7 +35,7 @@ LOCAL_C_INCLUDES += \

 # define ANDROID_VERSION (e.g., 4.0.x => 0x0400)
 LOCAL_CFLAGS += \
-	-DPACKAGE_VERSION=\"9.2.0-rc2\" \
+	-DPACKAGE_VERSION=\"9.2.0-devel\" \
 	-DPACKAGE_BUGREPORT=\"https://bugs.freedesktop.org/enter_bug.cgi?product=Mesa\" \
 	-DANDROID_VERSION=0x0$(MESA_ANDROID_MAJOR_VERSION)0$(MESA_ANDROID_MINOR_VERSION)

--- a/CleanSpec.mk
+++ b/CleanSpec.mk
@@ -0,0 +1,49 @@
+# Copyright (C) 2013 The Android-x86 Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# If you don't need to do a full clean build but would like to touch
+# a file or delete some intermediate files, add a clean step to the end
+# of the list.  These steps will only be run once, if they haven't been
+# run before.
+#
+# E.g.:
+#     $(call add-clean-step, touch -c external/sqlite/sqlite3.h)
+#     $(call add-clean-step, rm -rf $(PRODUCT_OUT)/obj/STATIC_LIBRARIES/libz_intermediates)
+#
+# Always use "touch -c" and "rm -f" or "rm -rf" to gracefully deal with
+# files that are missing or have been moved.
+#
+# Use $(PRODUCT_OUT) to get to the "out/target/product/blah/" directory.
+# Use $(OUT_DIR) to refer to the "out" directory.
+#
+# If you need to re-do something that's already mentioned, just copy
+# the command and add it to the bottom of the list.  E.g., if a change
+# that you made last week required touching a file and a change you
+# made today requires touching the same file, just copy the old
+# touch step and add it to the end of the list.
+#
+# ************************************************
+# NEWER CLEAN STEPS MUST BE AT THE END OF THE LIST
+# ************************************************
+
+$(call add-clean-step, rm -rf $(PRODUCT_OUT)/obj/STATIC_LIBRARIES/libmesa_*_intermediates)
+$(call add-clean-step, rm -rf $(PRODUCT_OUT)/obj/SHARED_LIBRARIES/libdrm_*intermediates)
+$(call add-clean-step, rm -rf $(PRODUCT_OUT)/obj/SHARED_LIBRARIES/i9?5_dri_intermediates)
+$(call add-clean-step, rm -rf $(PRODUCT_OUT)/obj/SHARED_LIBRARIES/libglapi_intermediates)
+$(call add-clean-step, rm -rf $(PRODUCT_OUT)/obj/SHARED_LIBRARIES/gralloc.drm_intermediates)
+$(call add-clean-step, rm -rf $(PRODUCT_OUT)/obj/SHARED_LIBRARIES/libgralloc_drm_intermediates)
+$(call add-clean-step, rm -rf $(OUT_DIR)/host/$(HOST_OS)-$(HOST_ARCH)/obj/EXECUTABLES/mesa_*_intermediates)
+$(call add-clean-step, rm -rf $(OUT_DIR)/host/$(HOST_OS)-$(HOST_ARCH)/obj/EXECUTABLES/glsl_compiler_intermediates)
+$(call add-clean-step, rm -rf $(OUT_DIR)/host/$(HOST_OS)-$(HOST_ARCH)/obj/STATIC_LIBRARIES/libmesa_glsl_utils_intermediates)
--- a/Makefile.am
+++ b/Makefile.am
@@ -57,6 +57,12 @@ EXTRA_FILES = \
 	src/glsl/glcpp/glcpp-lex.c			\
 	src/glsl/glcpp/glcpp-parse.c			\
 	src/glsl/glcpp/glcpp-parse.h			\
+	src/mesa/main/api_exec_es1.c			\
+	src/mesa/main/api_exec_es1_dispatch.h		\
+	src/mesa/main/api_exec_es1_remap_helper.h	\
+	src/mesa/main/api_exec_es2.c			\
+	src/mesa/main/api_exec_es2_dispatch.h		\
+	src/mesa/main/api_exec_es2_remap_helper.h	\
 	src/mesa/program/lex.yy.c			\
 	src/mesa/program/program_parse.tab.c		\
 	src/mesa/program/program_parse.tab.h		\
--- a/2
+++ b/2
@@ -70,7 +70,7 @@ if env['gles']:
 # Environment setup

 env.Append(CPPDEFINES = [
-    ('PACKAGE_VERSION', '\\"9.2.0-rc2\\"'),
+    ('PACKAGE_VERSION', '\\"9.2.0-devel\\"'),
    ('PACKAGE_BUGREPORT', '\\"https://bugs.freedesktop.org/enter_bug.cgi?product=Mesa\\"'),
 ])

--- a/bin/get-pick-list.sh
+++ b/bin/get-pick-list.sh
@@ -14,7 +14,7 @@ git log --reverse --grep="cherry picked from commit" origin/master..HEAD |\
 	sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked

 # Grep for commits that were marked as a candidate for the stable tree.
-git log --reverse --pretty=%H -i --grep='^\([[:space:]]*NOTE: .*[Cc]andidate\|CC:.*mesa-stable\)' HEAD..origin/master |\
+git log --reverse --pretty=%H -i --grep='^[[:space:]]*NOTE: .*[Cc]andidate' HEAD..origin/master |\
 while read sha
 do
 	# Check to see whether the patch is on the ignore list.
--- a/configure.ac
+++ b/configure.ac
@@ -6,7 +6,7 @@ dnl Tell the user about autoconf.html in the --help output
 m4_divert_once([HELP_END], [
 See docs/autoconf.html for more details on the options for Mesa.])

-AC_INIT([Mesa], [9.2.0-rc2],
+AC_INIT([Mesa], [9.2.0-devel],
    [https://bugs.freedesktop.org/enter_bug.cgi?product=Mesa])
 AC_CONFIG_AUX_DIR([bin])
 AC_CONFIG_MACRO_DIR([m4])
@@ -31,7 +31,7 @@ AC_SUBST([OSMESA_VERSION])

 dnl Versions for external dependencies
 LIBDRM_REQUIRED=2.4.24
-LIBDRM_RADEON_REQUIRED=2.4.46
+LIBDRM_RADEON_REQUIRED=2.4.45
 LIBDRM_INTEL_REQUIRED=2.4.38
 LIBDRM_NVVIEUX_REQUIRED=2.4.33
 LIBDRM_NOUVEAU_REQUIRED="2.4.33 libdrm >= 2.4.41"
@@ -100,7 +100,6 @@ AC_MSG_RESULT([$acv_mesa_CLANG])

 dnl If we're using GCC, make sure that it is at least version 3.3.0.  Older
 dnl versions are explictly not supported.
-GEN_ASM_OFFSETS=no
 if test "x$GCC" = xyes -a "x$acv_mesa_CLANG" = xno; then
    AC_MSG_CHECKING([whether gcc version is sufficient])
    major=0
@@ -118,12 +117,7 @@ if test "x$GCC" = xyes -a "x$acv_mesa_CLANG" = xno; then
    else
        AC_MSG_RESULT([yes])
    fi
-
-    if test "x$cross_compiling" = xyes; then
-        GEN_ASM_OFFSETS=yes
-    fi
 fi
-AM_CONDITIONAL([GEN_ASM_OFFSETS], test "x$GEN_ASM_OFFSETS" = xyes)

 dnl Make sure the pkg-config macros are defined
 m4_ifndef([PKG_PROG_PKG_CONFIG],
@@ -444,7 +438,7 @@ test "x$enable_asm" = xno && AC_MSG_RESULT([no])
 # disable if cross compiling on x86/x86_64 since we must run gen_matypes
 if test "x$enable_asm" = xyes && test "x$cross_compiling" = xyes; then
    case "$host_cpu" in
-    i?86 | x86_64 | amd64)
+    i?86 | x86_64)
        enable_asm=no
        AC_MSG_RESULT([no, cross compiling])
        ;;
@@ -455,7 +449,7 @@ if test "x$enable_asm" = xyes; then
    case "$host_cpu" in
    i?86)
        case "$host_os" in
-        linux* | *freebsd* | dragonfly* | *netbsd* | openbsd*)
+        linux* | *freebsd* | dragonfly* | *netbsd*)
            test "x$enable_64bit" = xyes && asm_arch=x86_64 || asm_arch=x86
            ;;
        gnu*)
@@ -463,9 +457,9 @@ if test "x$enable_asm" = xyes; then
            ;;
        esac
        ;;
-    x86_64|amd64)
+    x86_64)
        case "$host_os" in
-        linux* | *freebsd* | dragonfly* | *netbsd* | openbsd*)
+        linux* | *freebsd* | dragonfly* | *netbsd*)
            test "x$enable_32bit" = xyes && asm_arch=x86 || asm_arch=x86_64
            ;;
        esac
@@ -484,7 +478,7 @@ if test "x$enable_asm" = xyes; then
        DEFINES="$DEFINES -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_SSE_ASM"
        AC_MSG_RESULT([yes, x86])
        ;;
-    x86_64|amd64)
+    x86_64)
        DEFINES="$DEFINES -DUSE_X86_64_ASM"
        AC_MSG_RESULT([yes, x86_64])
        ;;
@@ -579,11 +573,6 @@ AC_ARG_ENABLE([osmesa],
        [enable OSMesa library @<:@default=disabled@:>@])],
    [enable_osmesa="$enableval"],
    [enable_osmesa=no])
-AC_ARG_ENABLE([gallium-osmesa],
-    [AS_HELP_STRING([--enable-gallium-osmesa],
-        [enable Gallium implementation of the OSMesa library @<:@default=disabled@:>@])],
-    [enable_gallium_osmesa="$enableval"],
-    [enable_gallium_osmesa=no])
 AC_ARG_ENABLE([egl],
    [AS_HELP_STRING([--disable-egl],
        [disable EGL library @<:@default=enabled@:>@])],
@@ -774,13 +763,7 @@ if test "x$enable_dri" = xyes; then
    GALLIUM_STATE_TRACKERS_DIRS="dri $GALLIUM_STATE_TRACKERS_DIRS"
 fi

-if test "x$enable_gallium_osmesa" = xyes; then
-    if test -z "$with_gallium_drivers"; then
-        AC_MSG_ERROR([Cannot enable gallium_osmesa without Gallium])
-    fi
-    if test "x$enable_osmesa" = xyes; then
-        AC_MSG_ERROR([Cannot enable both classic and Gallium OSMesa implementations])
-    fi
+if test "x$enable_osmesa" = xyes; then
    GALLIUM_STATE_TRACKERS_DIRS="osmesa $GALLIUM_STATE_TRACKERS_DIRS"
    GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS osmesa"
 fi
@@ -983,7 +966,7 @@ if test "x$enable_dri" = xyes; then
        DEFINES="$DEFINES -DHAVE_ALIAS"

        case "$host_cpu" in
-        x86_64|amd64)
+        x86_64)
            if test "x$DRI_DIRS" = "xyes"; then
                DRI_DIRS="i915 i965 nouveau r200 radeon swrast"
            fi
@@ -1002,7 +985,7 @@ if test "x$enable_dri" = xyes; then
            ;;
        esac
        ;;
-    freebsd* | dragonfly* | *netbsd* | openbsd*)
+    freebsd* | dragonfly* | *netbsd*)
        DEFINES="$DEFINES -DHAVE_PTHREAD -DUSE_EXTERNAL_DXTN_LIB=1"
        DEFINES="$DEFINES -DHAVE_ALIAS"

@@ -1146,7 +1129,7 @@ x16|x32)
    ;;
 esac

-if test "x$enable_osmesa" = xyes -o "x$enable_gallium_osmesa" = xyes; then
+if test "x$enable_osmesa" = xyes; then
    # only link libraries with osmesa if shared
    if test "$enable_static" = no; then
        OSMESA_LIB_DEPS="-lm $PTHREAD_LIBS $SELINUX_LIBS $DLOPEN_LIBS"
@@ -1507,13 +1490,6 @@ AC_SUBST([EGL_NATIVE_PLATFORM])
 AC_SUBST([EGL_PLATFORMS])
 AC_SUBST([EGL_CFLAGS])

-# If we don't have the X11 platform, set this define so we don't try to include
-# the X11 headers.
-if ! echo "$egl_platforms" | grep -q 'x11'; then
-    DEFINES="$DEFINES -DMESA_EGL_NO_X11_HEADERS"
-    GL_PC_CFLAGS="$GL_PC_CFLAGS -DMESA_EGL_NO_X11_HEADERS"
-fi
-
 AC_ARG_WITH([egl-driver-dir],
    [AS_HELP_STRING([--with-egl-driver-dir=DIR],
                    [directory for EGL drivers [[default=${libdir}/egl]]])],
@@ -1590,7 +1566,7 @@ if test "x$with_gallium_drivers" = x; then
 fi
 if test "x$enable_gallium_llvm" = xauto; then
    case "$host_cpu" in
-    i*86|x86_64|amd64) enable_gallium_llvm=yes;;
+    i*86|x86_64) enable_gallium_llvm=yes;;
    esac
 fi
 if test "x$enable_gallium_llvm" = xyes; then
@@ -1601,53 +1577,42 @@ if test "x$enable_gallium_llvm" = xyes; then
    fi

    if test "x$LLVM_CONFIG" != xno; then
-        LLVM_VERSION=`$LLVM_CONFIG --version | sed 's/svn.*//g'`
-        LLVM_LDFLAGS=`$LLVM_CONFIG --ldflags`
-        LLVM_BINDIR=`$LLVM_CONFIG --bindir`
-        LLVM_CPPFLAGS=`strip_unwanted_llvm_flags "$LLVM_CONFIG --cppflags"`
-        LLVM_CFLAGS=$LLVM_CPPFLAGS   # CPPFLAGS seem to be sufficient
-        LLVM_CXXFLAGS=`strip_unwanted_llvm_flags "$LLVM_CONFIG --cxxflags"`
-        LLVM_INCLUDEDIR=`$LLVM_CONFIG --includedir`
-        LLVM_LIBDIR=`$LLVM_CONFIG --libdir`
-
-        AC_COMPUTE_INT([LLVM_VERSION_MAJOR], [LLVM_VERSION_MAJOR],
-            [#include "${LLVM_INCLUDEDIR}/llvm/Config/llvm-config.h"])
-        AC_COMPUTE_INT([LLVM_VERSION_MINOR], [LLVM_VERSION_MINOR],
-            [#include "${LLVM_INCLUDEDIR}/llvm/Config/llvm-config.h"])
-
-        if test "x${LLVM_VERSION_MAJOR}" != x; then
-            LLVM_VERSION_INT="${LLVM_VERSION_MAJOR}0${LLVM_VERSION_MINOR}"
-        else
-            LLVM_VERSION_INT=`echo $LLVM_VERSION | sed -e 's/\([[0-9]]\)\.\([[0-9]]\)/\10\2/g'`
-        fi
-
+	LLVM_VERSION=`$LLVM_CONFIG --version | sed 's/svn.*//g'`
+	LLVM_VERSION_INT=`echo $LLVM_VERSION | sed -e 's/\([[0-9]]\)\.\([[0-9]]\)/\10\2/g'`
        LLVM_COMPONENTS="engine bitwriter"
-        if $LLVM_CONFIG --components | grep -qw 'mcjit'; then
+        if $LLVM_CONFIG --components | grep -q '\<mcjit\>'; then
            LLVM_COMPONENTS="${LLVM_COMPONENTS} mcjit"
        fi

        if test "x$enable_opencl" = xyes; then
            LLVM_COMPONENTS="${LLVM_COMPONENTS} ipo linker instrumentation"
            # LLVM 3.3 >= 177971 requires IRReader
-            if $LLVM_CONFIG --components | grep -qw 'irreader'; then
+            if $LLVM_CONFIG --components | grep -q '\<irreader\>'; then
                LLVM_COMPONENTS="${LLVM_COMPONENTS} irreader"
            fi
        fi
-        DEFINES="${DEFINES} -DHAVE_LLVM=0x0$LLVM_VERSION_INT"
-        MESA_LLVM=1
+	LLVM_LDFLAGS=`$LLVM_CONFIG --ldflags`
+	LLVM_BINDIR=`$LLVM_CONFIG --bindir`
+	LLVM_CPPFLAGS=`strip_unwanted_llvm_flags "$LLVM_CONFIG --cppflags"`
+	LLVM_CFLAGS=$LLVM_CPPFLAGS   # CPPFLAGS seem to be sufficient
+	LLVM_CXXFLAGS=`strip_unwanted_llvm_flags "$LLVM_CONFIG --cxxflags"`
+	LLVM_INCLUDEDIR=`$LLVM_CONFIG --includedir`
+	LLVM_LIBDIR=`$LLVM_CONFIG --libdir`
+	DEFINES="${DEFINES} -DHAVE_LLVM=0x0$LLVM_VERSION_INT"
+	MESA_LLVM=1

-        dnl Check for Clang internal headers
+	dnl Check for Clang interanl headers
        if test "x$enable_opencl" = xyes; then
            if test "x$CLANG_LIBDIR" = x; then
                CLANG_LIBDIR=${LLVM_LIBDIR}
            fi
            CLANG_RESOURCE_DIR=$CLANG_LIBDIR/clang/${LLVM_VERSION}
-            AS_IF([test ! -f "$CLANG_RESOURCE_DIR/include/stddef.h"],
-                [AC_MSG_ERROR([Could not find clang internal header stddef.h in $CLANG_RESOURCE_DIR Use --with-clang-libdir to specify the correct path to the clang libraries.])])
+            AC_CHECK_FILE("$CLANG_RESOURCE_DIR/include/stddef.h",,
+                AC_MSG_ERROR([Could not find clang internal header stddef.h in $CLANG_RESOURCE_DIR Use --with-clang-libdir to specify the correct path to the clang libraries.]))
        fi
    else
-        MESA_LLVM=0
-        LLVM_VERSION_INT=0
+	MESA_LLVM=0
+	LLVM_VERSION_INT=0
    fi
 else
    MESA_LLVM=0
@@ -1722,7 +1687,7 @@ gallium_check_st() {
 gallium_require_llvm() {
    if test "x$MESA_LLVM" = x0; then
        case "$host_cpu" in
-        i*86|x86_64|amd64) AC_MSG_ERROR([LLVM is required to build $1 on x86 and x86_64]);;
+        i*86|x86_64) AC_MSG_ERROR([LLVM is required to build $1 on x86 and x86_64]);;
        esac
    fi
 }
@@ -1744,7 +1709,7 @@ radeon_llvm_check() {
    if test "$LLVM_VERSION_INT" -lt "${LLVM_REQUIRED_VERSION_MAJOR}0${LLVM_REQUIRED_VERSION_MINOR}"; then
        AC_MSG_ERROR([LLVM $LLVM_REQUIRED_VERSION_MAJOR.$LLVM_REQUIRED_VERSION_MINOR or newer is required for r600g and radeonsi.])
    fi
-    if test true && $LLVM_CONFIG --targets-built | grep -qvw 'R600' ; then
+    if test true && $LLVM_CONFIG --targets-built | grep -qv '\<R600\>' ; then
        AC_MSG_ERROR([LLVM R600 Target not enabled.  You can enable it when building the LLVM
                      sources with the --enable-experimental-targets=R600
                      configure flag])
@@ -1881,7 +1846,7 @@ if test "x$MESA_LLVM" != x0; then
    if test "x$with_llvm_shared_libs" = xyes; then
        dnl We can't use $LLVM_VERSION because it has 'svn' stripped out,
        LLVM_SO_NAME=LLVM-`$LLVM_CONFIG --version`
-        AS_IF([test -f "$LLVM_LIBDIR/lib$LLVM_SO_NAME.so"], [llvm_have_one_so=yes])
+        AC_CHECK_FILE("$LLVM_LIBDIR/lib$LLVM_SO_NAME.so", llvm_have_one_so=yes,)

        if test "x$llvm_have_one_so" = xyes; then
            dnl LLVM was built using auto*, so there is only one shared object.
@@ -1889,8 +1854,8 @@ if test "x$MESA_LLVM" != x0; then
        else
            dnl If LLVM was built with CMake, there will be one shared object per
            dnl component.
-            AS_IF([test ! -f "$LLVM_LIBDIR/libLLVMTarget.so"],
-                    [AC_MSG_ERROR([Could not find llvm shared libraries:
+            AC_CHECK_FILE("$LLVM_LIBDIR/libLLVMTarget.so",,
+                    AC_MSG_ERROR([Could not find llvm shared libraries:
 	Please make sure you have built llvm with the --enable-shared option
 	and that your llvm libraries are installed in $LLVM_LIBDIR
 	If you have installed your llvm libraries to a different directory you
@@ -1901,7 +1866,7 @@ if test "x$MESA_LLVM" != x0; then
 		--enable-opencl
 	If you do not want to build with llvm shared libraries and instead want to
 	use llvm static libraries then remove these options from your configure
-	invocation and reconfigure.])])
+	invocation and reconfigure.]))

           dnl We don't need to update LLVM_LIBS in this case because the LLVM
           dnl install uses a shared object for each compoenent and we have
@@ -1973,11 +1938,9 @@ AC_SUBST([ELF_LIB])

 AM_CONDITIONAL(NEED_LIBPROGRAM, test "x$with_gallium_drivers" != x -o \
                                     "x$enable_xlib_glx" = xyes -o \
-                                     "x$enable_osmesa" = xyes -o \
-                                     "x$enable_gallium_osmesa" = xyes)
+                                     "x$enable_osmesa" = xyes)
 AM_CONDITIONAL(HAVE_X11_DRIVER, test "x$enable_xlib_glx" = xyes)
 AM_CONDITIONAL(HAVE_OSMESA, test "x$enable_osmesa" = xyes)
-AM_CONDITIONAL(HAVE_GALLIUM_OSMESA, test "x$enable_gallium_osmesa" = xyes)

 AM_CONDITIONAL(HAVE_X86_ASM, echo "$DEFINES" | grep 'X86_ASM' >/dev/null 2>&1)
 AM_CONDITIONAL(HAVE_X86_64_ASM, echo "$DEFINES" | grep 'X86_64_ASM' >/dev/null 2>&1)
@@ -2164,17 +2127,11 @@ echo "        OpenVG:          $enable_openvg"

 dnl Driver info
 echo ""
-case "x$enable_osmesa$enable_gallium_osmesa" in
-xnoyes)
-        echo "        OSMesa:          lib$OSMESA_LIB (Gallium)"
-        ;;
-xyesno)
+if test "x$enable_osmesa" != xno; then
        echo "        OSMesa:          lib$OSMESA_LIB"
-        ;;
-xnono)
+else
        echo "        OSMesa:          no"
-        ;;
-esac
+fi

 if test "x$enable_dri" != xno; then
        # cleanup the drivers var
--- a/docs/README.WIN32
+++ b/docs/README.WIN32
@@ -1,6 +1,6 @@
 File: docs/README.WIN32

-Last updated: 21 June 2013
+Last updated: 23 April 2011


 Quick Start
@@ -30,23 +30,6 @@ At this time, only the gallium GDI driver is known to work.
 Source code also exists in the tree for other drivers in
 src/mesa/drivers/windows, but the status of this code is unknown.

-Recipe
------
-
-Building on windows requires several open-source packages. These are
-steps that work as of this writing.
-
-1) install python 2.7
-2) install scons (latest)
-3) install mingw, flex, and bison
-4) install libxml2 from here: http://www.lfd.uci.edu/~gohlke/pythonlibs
-  get libxml2-python-2.9.1.win-amd64-py2.7.exe
-5) install pywin32 from here: http://www.lfd.uci.edu/~gohlke/pythonlibs
-  get pywin32-218.4.win-amd64-py2.7.exe
-6) install git
-7) download mesa from git
-  see http://www.mesa3d.org/repository.html
-8) run scons

 General
 -------
--- a/docs/extensions.html
+++ b/docs/extensions.html
@@ -32,7 +32,7 @@ The specifications follow.
 <li><a href="specs/MESA_pixmap_colormap.spec">MESA_pixmap_colormap.spec</a>
 <li><a href="specs/OLD/MESA_program_debug.spec">MESA_program_debug.spec</a> (obsolete)
 <li><a href="specs/MESA_release_buffers.spec">MESA_release_buffers.spec</a>
-<li><a href="specs/OLD/MESA_resize_buffers.spec">MESA_resize_buffers.spec</a> (obsolete)
+<li><a href="specs/MESA_resize_buffers.spec">MESA_resize_buffers.spec</a>
 <li><a href="specs/MESA_set_3dfx_mode.spec">MESA_set_3dfx_mode.spec</a>
 <li><a href="specs/MESA_shader_debug.spec">MESA_shader_debug.spec</a>
 <li><a href="specs/OLD/MESA_sprite_point.spec">MESA_sprite_point.spec</a> (obsolete)
--- a/docs/index.html
+++ b/docs/index.html
@@ -16,24 +16,6 @@

 <h1>News</h1>

-<h2>August 1, 2013</h2>
-<p>
-<a href="relnotes/9.1.6.html">Mesa 9.1.6</a> is released.
-This is a bug fix release.
-</p>
-
-<h2>July 17, 2013</h2>
-<p>
-<a href="relnotes/9.1.5.html">Mesa 9.1.5</a> is released.
-This is a bug fix release.
-</p>
-
-<h2>July 1, 2013</h2>
-<p>
-<a href="relnotes/9.1.4.html">Mesa 9.1.4</a> is released.
-This is a bug fix release.
-</p>
-
 <h2>May 21, 2013</h2>
 <p>
 <a href="relnotes/9.1.3.html">Mesa 9.1.3</a> is released.
--- a/docs/relnotes.html
+++ b/docs/relnotes.html
@@ -22,9 +22,6 @@ The release notes summarize what's new or changed in each Mesa release.

 <ul>
 <li><a href="relnotes/9.2.html">9.2 release notes</a>
-<li><a href="relnotes/9.1.6.html">9.1.6 release notes</a>
-<li><a href="relnotes/9.1.5.html">9.1.5 release notes</a>
-<li><a href="relnotes/9.1.4.html">9.1.4 release notes</a>
 <li><a href="relnotes/9.1.3.html">9.1.3 release notes</a>
 <li><a href="relnotes/9.1.2.html">9.1.2 release notes</a>
 <li><a href="relnotes/9.1.1.html">9.1.1 release notes</a>
--- a/docs/relnotes/9.1.4.html
+++ b/docs/relnotes/9.1.4.html
@@ -1,321 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html lang="en">
-<head>
-  <meta http-equiv="content-type" content="text/html; charset=utf-8">
-  <title>Mesa Release Notes</title>
-  <link rel="stylesheet" type="text/css" href="../mesa.css">
-</head>
-<body>
-
-<div class="header">
-  <h1>The Mesa 3D Graphics Library</h1>
-</div>
-
-<iframe src="../contents.html"></iframe>
-<div class="content">
-
-<h1>Mesa 9.1.4 Release Notes / July 1st, 2013</h1>
-
-<p>
-Mesa 9.1.4 is a bug fix release which fixes bugs found since the 9.1.3 release.
-</p>
-<p>
-Mesa 9.1 implements the OpenGL 3.1 API, but the version reported by
-glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
-glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
-Some drivers don't support all the features required in OpenGL 3.1.  OpenGL
-3.1 is <strong>only</strong> available if requested at context creation
-because GL_ARB_compatibility is not supported.
-</p>
-
-<h2>MD5 checksums</h2>
-<pre>
-a2c4e25d0e27918bc67f61bae04d0cb8  MesaLib-9.1.4.tar.bz2
-8c7e9ce5b05cb2223f0587396dd9dc08  MesaLib-9.1.4.tar.gz
-020459c5793d4279bdcb2daa1f7dd9f6  MesaLib-9.1.4.zip
-</pre>
-
-<h2>New features</h2>
-<p>None.</p>
-
-<h2>Bug fixes</h2>
-
-<p>This list is likely incomplete.</p>
-
-<ul>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=37871">Bug 37871</a> - [bisected i965] Bus error (core dumped) on oglc texdecaltile</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=42182">Bug 42182</a> - egl/opengles1/tri_x11 renders wrong</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=44958">Bug 44958</a> - [SNB IVB HSW] mesa demo test texleak bus error</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=53494">Bug 53494</a> - [snb] crash in texsubimage to a large atlas in clutter</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=60518">Bug 60518</a> - glDrawElements segfault when compiled into display list</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=61821">Bug 61821</a> - src/mesa/drivers/dri/common/xmlpool.h:96:29: fatal error: xmlpool/options.h</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=63520">Bug 63520</a> - r300g regression (RV380): Strange rendering of light sources in Penumbra  (bisected)</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=63701">Bug 63701</a> - [HSW] support new haswell graphics [8086:0a2e]</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=64727">Bug 64727</a> - [gm45, bisected] some piglit glsl 1.10 built-in-functions tests crash</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=64745">Bug 64745</a> - [llvmpipe] SIGSEGV src/gallium/state_trackers/glx/xlib/glx_api.c:1374</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=64934">Bug 64934</a> - [llvmpipe] SIGSEGV src/gallium/state_trackers/glx/xlib/glx_api.c:1363</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=65173">Bug 65173</a> - segfault in _mesa_get_format_datatype and _mesa_get_color_read_type when state dumping with glretrace</li>
-
-</ul>
-
-<h2>Changes</h2>
-<p>The full set of changes can be viewed by using the following GIT command:</p>
-
-<pre>
-  git log mesa-9.1.3..mesa-9.1.4
-</pre>
-
-<p>Alan Coopersmith (2):</p>
-<ul>
-  <li>integer overflow in XF86DRIOpenConnection() [CVE-2013-1993 1/2]</li>
-  <li>integer overflow in XF86DRIGetClientDriverName() [CVE-2013-1993 2/2]</li>
-</ul>
-
-<p>Alex Deucher (3):</p>
-<ul>
-  <li>radeonsi: add support for hainan chips</li>
-  <li>radeonsi: add Hainan pci ids</li>
-  <li>winsys/radeon: add env var to disable VM on Cayman/Trinity</li>
-</ul>
-pp
-<p>Andreas Boll (1):</p>
-<ul>
-  <li>glapi: Add some missing static_dispatch="false" annotations to es_EXT.xml</li>
-</ul>
-
-<p>Anuj Phogat (1):</p>
-<ul>
-  <li>intel: Add a null pointer check before dereferencing the pointer</li>
-</ul>
-
-<p>Armin K (1):</p>
-<ul>
-  <li>gallivm: Fix build with LLVM 3.3</li>
-</ul>
-
-<p>Brian Paul (9):</p>
-<ul>
-  <li>mesa: fix the compressed TexSubImage size checking code</li>
-  <li>st/mesa: generate GL_OUT_OF_MEMORY if we can't create the index buffer</li>
-  <li>mesa: fix error checking of DXT sRGB formats in _mesa_base_tex_format()</li>
-  <li>st/glx/xlib: check for null ctx pointer in glXIsDirect()</li>
-  <li>xlib: check for null ctx pointer in glXIsDirect()</li>
-  <li>st/glx: add null ctx check in glXDestroyContext()</li>
-  <li>xlib: add null ctx check in glXDestroyContext()</li>
-  <li>meta: move vertex array enables for mipmap generation</li>
-  <li>mesa: handle missing read buffer in _mesa_get_color_read_format/type()</li>
-</ul>
-
-<p>Bryan Cain (1):</p>
-<ul>
-  <li>nv50: initialize kick_notify callback in nv50_create</li>
-</ul>
-
-<p>Chad Versace (3):</p>
-<ul>
-  <li>egl/android: Fix error condition for EGL_ANDROID_image_native_buffer</li>
-  <li>i965: Fix glColorPointer(GL_FIXED)</li>
-  <li>intel: Return early if miptree allocation fails</li>
-</ul>
-
-<p>Chia-I Wu (1):</p>
-<ul>
-  <li>u_vbuf: fix index buffer leak</li>
-</ul>
-
-<p>Chris Forbes (8):</p>
-<ul>
-  <li>mesa: add accessor for effective stencil ref</li>
-  <li>intel: Use accessor for stencil reference values</li>
-  <li>nouveau: Use accessor for stencil reference values</li>
-  <li>radeon: Use accessor for stencil reference values</li>
-  <li>st: Use accessor for stencil reference values</li>
-  <li>swrast: Use accessor for stencil reference values</li>
-  <li>mesa: Stop clamping stencil reference value at specification time</li>
-  <li>mesa: Use accessor for stencil reference values in glGet</li>
-</ul>
-
-<p>Chí-Thanh Christopher Nguyễn (1):</p>
-<ul>
-  <li>targets/dri-i915: Force c++ linker in all cases</li>
-</ul>
-
-<p>Daniel Martin (1):</p>
-<ul>
-  <li>Fix build of swrast only without libdrm</li>
-</ul>
-
-<p>Dave Airlie (1):</p>
-<ul>
-  <li>i965: fix problem with constant out of bounds access (v3)</li>
-</ul>
-
-<p>Eric Anholt (10):</p>
-<ul>
-  <li>mesa: Make core Mesa allocate the texture renderbuffer wrapper.</li>
-  <li>mesa: Make gl_renderbuffers backed by EGL images use FinishRenderTexture.</li>
-  <li>i965/fs: Bake regs_written into the IR instead of recomputing it later.</li>
-  <li>i965/vs: Fix implied_mrf_writes() for integer division pre-gen6.</li>
-  <li>intel: Add support for writing to our linear-temporary-CPU-map case.</li>
-  <li>intel: Do temporary CPU maps of textures that are too big to GTT map.</li>
-  <li>intel: Avoid making tiled miptrees we won't be able to blit.</li>
-  <li>intel: Fix MRT handling of glBitmap().</li>
-  <li>intel: Fix format handling of blit glBitmap()</li>
-  <li>i965: Shut up the last release build warning.</li>
-</ul>
-
-<p>Fabian Bieler (2):</p>
-<ul>
-  <li>mesa/st: Don't copy propagate from swizzles.</li>
-  <li>mesa/program: Don't copy propagate from swizzles.</li>
-</ul>
-
-<p>Frank Henigman (1):</p>
-<ul>
-  <li>intel: initialize fs_visitor::params_remap in constructor</li>
-</ul>
-
-<p>Ian Romanick (2):</p>
-<ul>
-  <li>docs: Add 9.1.3 release md5sums</li>
-  <li>mesa: Bump version to 9.1.4</li>
-</ul>
-
-<p>José Fonseca (1):</p>
-<ul>
-  <li>scons: Fix implicit python dependency discovery on Windows.</li>
-</ul>
-
-<p>Kenneth Graunke (17):</p>
-<ul>
-  <li>mesa: Add i965 varying index patches to .cherry-ignore.</li>
-  <li>i965: Turn brw-&gt;urb.vs_size and gs_size into local variables.</li>
-  <li>i965: Use a variable for the push constant size in kB.</li>
-  <li>i965: Update URB partitioning code for Haswell's GT3 variant.</li>
-  <li>i965: Add chipset limits for the Haswell GT3 variant.</li>
-  <li>i965: Enable the Bay Trail platform.</li>
-  <li>mesa: Add a reverted commit to cherry-ignore.</li>
-  <li>vbo: Ignore PRIMITIVE_RESTART_FIXED_INDEX for glDrawArrays().</li>
-  <li>mesa: Add a helper function for determining the restart index.</li>
-  <li>vbo: Use the new primitive restart index helper function.</li>
-  <li>i965: Use the correct restart index for fixed index mode on Haswell.</li>
-  <li>mesa: Cherry-ignore a patch that got picked but squashed.</li>
-  <li>i965: Fix can_cut_index_handle_restart_index() for byte/short types.</li>
-  <li>st/mesa: Go back to using ctx-&gt;Array.RestartIndex, not _RestartIndex.</li>
-  <li>mesa: Ignore fixed-index primitive restart in ArrayElement().</li>
-  <li>mesa: Delete the ctx-&gt;Array._RestartIndex derived state.</li>
-  <li>glsl: Bail on parsing if the #version directive is bogus.</li>
-</ul>
-
-<p>Lauri Kasanen (1):</p>
-<ul>
-  <li>r600g: Correctly initialize the shader key, v2</li>
-</ul>
-
-<p>Maarten Lankhorst (4):</p>
-<ul>
-  <li>nvc0: fix up video buffer alignment requirements</li>
-  <li>nvc0: kill assert in ppp code</li>
-  <li>nvc0: set rsvd_kick correctly</li>
-  <li>nvc0: allow frame dropping in h264</li>
-</ul>
-
-<p>Marek Olšák (7):</p>
-<ul>
-  <li>radeonsi: increase array size for shader inputs and outputs</li>
-  <li>vbo: fix possible use-after-free segfault after a VAO is deleted</li>
-  <li>glsl: fix the value of gl_MaxFragmentUniformVectors</li>
-  <li>st/mesa: initialize all program constants and UBO limits</li>
-  <li>st/mesa: initialize Const.MaxColorAttachments</li>
-  <li>st/mesa: fix a couple of issues in st_bind_ubos</li>
-  <li>mesa: declare UniformBufferBindings as an array with a static size</li>
-</ul>
-
-<p>Matt Turner (3):</p>
-<ul>
-  <li>configure.ac: Remove redundant checks of enable_dri.</li>
-  <li>configure.ac: Build dricommon for DRI gallium drivers</li>
-  <li>i965: NULL check depth_mt to quiet static analysis.</li>
-</ul>
-
-<p>Michel Dänzer (3):</p>
-<ul>
-  <li>radeonsi: Fix handling of TGSI_SEMANTIC_PSIZE</li>
-  <li>radeonsi: Fix user clip planes</li>
-  <li>mesa: Note that two radeonsi fixes cannot be backported after all</li>
-</ul>
-
-<p>Mike Stroyan (1):</p>
-<ul>
-  <li>configure.ac: Build dricommon for gallium swrast</li>
-</ul>
-
-<p>Naohiro Aota (1):</p>
-<ul>
-  <li>xmlpool/build: Make sure to set mo properly</li>
-</ul>
-
-<p>Paul Berry (2):</p>
-<ul>
-  <li>glsl: Fix error checking on "flat" keyword to match GLSL ES 3.00, GLSL 1.50.</li>
-  <li>i965/gen7.5: Allow HW primitive restart for all primitive types.</li>
-</ul>
-
-<p>Paulo Zanoni (1):</p>
-<ul>
-  <li>i965: make GT3 machines work as GT3 instead of GT2</li>
-</ul>
-
-<p>Rodrigo Vivi (2):</p>
-<ul>
-  <li>i965: Add missing Haswell GT3 Desktop to IS_HSW_GT3 check.</li>
-  <li>i965: Adding more reserved PCI IDs for Haswell.</li>
-</ul>
-
-<p>Roland Scheidegger (1):</p>
-<ul>
-  <li>gallivm: fix out-of-bounds access with mirror_clamp_to_edge address mode</li>
-</ul>
-
-<p>Stéphane Marchesin (2):</p>
-<ul>
-  <li>st/xlib: Fix upside down coordinates for CopySubBuffer</li>
-  <li>st/xlib: Flush the front buffer before doing CopySubBuffer</li>
-</ul>
-
-<p>Sven Joachim (1):</p>
-<ul>
-  <li>mesa: Fix ieee fp on Alpha</li>
-</ul>
-
-<p>Tapani Pälli (1):</p>
-<ul>
-  <li>mesa: fix type comparison errors in sub-texture error checking code</li>
-</ul>
-
-<p>Tom Stellard (2):</p>
-<ul>
-  <li>gallivm: Fix build with LLVM &gt;= r180063</li>
-  <li>r300g/compiler: Prevent regalloc from swizzling texture operands v2</li>
-</ul>
-
-<p>Vinson Lee (1):</p>
-<ul>
-  <li>radeon: Initialize variables in radeon_llvm_context_init.</li>
-</ul>
-
-</div>
-</body>
-</html>
--- a/docs/relnotes/9.1.5.html
+++ b/docs/relnotes/9.1.5.html
@@ -1,140 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html lang="en">
-<head>
-  <meta http-equiv="content-type" content="text/html; charset=utf-8">
-  <title>Mesa Release Notes</title>
-  <link rel="stylesheet" type="text/css" href="../mesa.css">
-</head>
-<body>
-
-<div class="header">
-  <h1>The Mesa 3D Graphics Library</h1>
-</div>
-
-<iframe src="../contents.html"></iframe>
-<div class="content">
-
-<h1>Mesa 9.1.5 Release Notes / July 17, 2013</h1>
-
-<p>
-Mesa 9.1.5 is a bug fix release which fixes bugs found since the 9.1.4 release.
-</p>
-<p>
-Mesa 9.1 implements the OpenGL 3.1 API, but the version reported by
-glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
-glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
-Some drivers don't support all the features required in OpenGL 3.1.  OpenGL
-3.1 is <strong>only</strong> available if requested at context creation
-because GL_ARB_compatibility is not supported.
-</p>
-
-<h2>MD5 checksums</h2>
-<pre>
-4ed2af5943141a85a21869053a2fc2eb  MesaLib-9.1.5.tar.bz2
-47181066acf3231d74e027b2033f9455  MesaLib-9.1.5.tar.gz
-4c9c6615bd99215325250f87ed34058f  MesaLib-9.1.5.zip
-</pre>
-
-<h2>New features</h2>
-<p>None.</p>
-
-<h2>Bug fixes</h2>
-
-<p>This list is likely incomplete.</p>
-
-<ul>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=58384">Bug 58384</a> - [i965 Bisected]Oglc max_values(advanced.fragmentProgram.GL_MAX_PROGRAM_ENV_PARAMETERS_ARB) segfault</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=62647">Bug 62647</a> - Wrong rendering of Dota 2 on Wine (apitrace attached) - Intel IVB HD4000</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=63674">Bug 63674</a> - [IVB]frozen at the first frame when run Unigine-heaven 4.0</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=65910">Bug 65910</a> - Killing weston-launch causes segv in desktop-shell</li>
-
-</ul>
-
-<h2>Changes</h2>
-<p>The full set of changes can be viewed by using the following GIT command:</p>
-
-<pre>
-  git log mesa-9.1.4..mesa-9.1.5
-</pre>
-
-<p>Anuj Phogat (1):</p>
-<ul>
-  <li>mesa: Return ZeroVec/dummyReg instead of NULL pointer</li>
-</ul>
-
-<p>Brian Paul (1):</p>
-<ul>
-  <li>svga: check for NaN shader immediates</li>
-</ul>
-
-<p>Carl Worth (3):</p>
-<ul>
-  <li>cherry-ignore: Ignore previously backported patch</li>
-  <li>cherry-ignore: Drop two patches which we've decided not to include</li>
-  <li>mesa: Bump version to 9.1.5</li>
-</ul>
-
-<p>Chris Forbes (1):</p>
-<ul>
-  <li>i965: fix alpha test for MRT</li>
-</ul>
-
-<p>Christoph Bumiller (1):</p>
-<ul>
-  <li>r600g: x/y coordinates must be divided by block dim in dma blit</li>
-</ul>
-
-<p>Eric Anholt (1):</p>
-<ul>
-  <li>ra: Fix register spilling.</li>
-</ul>
-
-<p>Ian Romanick (6):</p>
-<ul>
-  <li>docs: Add 9.1.4 release md5sums</li>
-  <li>glsl: Add a gl_shader_program parameter to _mesa_uniform_{merge,split}_location_offset</li>
-  <li>glsl: Add gl_shader_program::UniformLocationBaseScale</li>
-  <li>glsl: Generate smaller values for uniform locations</li>
-  <li>i965: Be more careful with the interleaved user array upload optimization</li>
-  <li>glsl: Move all var decls to the front of the IR list in reverse order</li>
-</ul>
-
-<p>Kenneth Graunke (1):</p>
-<ul>
-  <li>glsl/builtins: Fix ARB_texture_cube_map_array built-in availability.</li>
-</ul>
-
-<p>Kristian Høgsberg (1):</p>
-<ul>
-  <li>wayland: Handle global_remove event as well</li>
-</ul>
-
-<p>Matt Turner (1):</p>
-<ul>
-  <li>register_allocate: Fix the type of best_benefit.</li>
-</ul>
-
-<p>Paul Berry (1):</p>
-<ul>
-  <li>glsl ES: Fix magnitude of gl_MaxVertexUniformVectors.</li>
-</ul>
-
-<p>Richard Sandiford (3):</p>
-<ul>
-  <li>st/xlib Fix XIMage bytes-per-pixel calculation</li>
-  <li>st/xlib: Fix XImage stride calculation</li>
-  <li>st/dri/sw: Fix pitch calculation in drisw_update_tex_buffer</li>
-</ul>
-
-<p>Vinson Lee (1):</p>
-<ul>
-  <li>swrast: Fix memory leak.</li>
-</ul>
-
-</div>
-</body>
-</html>
--- a/docs/relnotes/9.1.6.html
+++ b/docs/relnotes/9.1.6.html
@@ -1,168 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html lang="en">
-<head>
-  <meta http-equiv="content-type" content="text/html; charset=utf-8">
-  <title>Mesa Release Notes</title>
-  <link rel="stylesheet" type="text/css" href="../mesa.css">
-</head>
-<body>
-
-<div class="header">
-  <h1>The Mesa 3D Graphics Library</h1>
-</div>
-
-<iframe src="../contents.html"></iframe>
-<div class="content">
-
-<h1>Mesa 9.1.6 Release Notes / August 1, 2013</h1>
-
-<p>
-Mesa 9.1.6 is a bug fix release which fixes bugs found since the 9.1.5 release.
-</p>
-<p>
-Mesa 9.1 implements the OpenGL 3.1 API, but the version reported by
-glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
-glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
-Some drivers don't support all the features required in OpenGL 3.1.  OpenGL
-3.1 is <strong>only</strong> available if requested at context creation
-because GL_ARB_compatibility is not supported.
-</p>
-
-<h2>MD5 checksums</h2>
-<pre>
-443a2a352667294b53d56cb1a74114e9  MesaLib-9.1.6.tar.bz2
-08d3069cccd6821e5f33e0840bca0718  MesaLib-9.1.6.tar.gz
-90aa7a6d9878cdbfcb055312f356d6b9  MesaLib-9.1.6.zip
-</pre>
-
-<h2>New features</h2>
-<p>None.</p>
-
-<h2>Bug fixes</h2>
-
-<p>This list is likely incomplete.</p>
-
-<ul>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=47824">Bug 47824</a> - osmesa using --enable-shared-glapi depends on libgl</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=62362">Bug 62362</a> - Crash when using Wayland EGL platform</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=63435">Bug 63435</a> - [Regression since 9.0] Flickering in EGL OpenGL full-screen window with swap interval 1</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=64087">Bug 64087</a> - Webgl conformance shader-with-non-reserved-words crash when mesa is compiled without --enable-debug</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=64330">Bug 64330</a> - WebGL snake demo crash in loop_analysis.cpp:506: bool is_loop_terminator(ir_if*): assertion „inst != __null“ failed.</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=65236">Bug 65236</a> - [i965] Rendering artifacts in VDrift/GL2</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=66558">Bug 66558</a> - RS690: 3D artifacts when playing SuperTuxKart</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=66847">Bug 66847</a> - compilation broken with llvm 3.3</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=66850">Bug 66850</a> - glGenerateMipmap crashes when using GL_TEXTURE_2D_ARRAY with compressed internal format</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=66921">Bug 66921</a> - [r300g] Heroes of Newerth: HiZ related corruption</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=67283">Bug 67283</a> - VDPAU doesn't work on hybrid laptop through DRI_PRIME</li>
-
-</ul>
-
-<h2>Changes</h2>
-<p>The full set of changes can be viewed by using the following GIT command:</p>
-
-<pre>
-  git log mesa-9.1.5..mesa-9.1.6
-</pre>
-
-<p>Andreas Boll (1):</p>
-<ul>
-  <li>configure.ac: Require llvm-3.2 for r600g/radeonsi llvm backends</li>
-</ul>
-
-<p>Brian Paul (4):</p>
-<ul>
-  <li>mesa: handle 2D texture arrays in get_tex_rgba_compressed()</li>
-  <li>meta: handle 2D texture arrays in decompress_texture_image()</li>
-  <li>mesa: implement mipmap generation for compressed 2D array textures</li>
-  <li>mesa: improve free() cleanup in generate_mipmap_compressed()</li>
-</ul>
-
-<p>Carl Worth (7):</p>
-<ul>
-  <li>docs: Add 9.1.5 release md5sums</li>
-  <li>Merge 'origin/9.1' into stable</li>
-  <li>cherry-ignore: Drop 13 patches from the pick list</li>
-  <li>get-pick-list.sh: Include commits mentionining "CC: mesa-stable..." in pick list</li>
-  <li>get-pick-list: Allow for non-whitespace between "CC:" and "mesa-stable"</li>
-  <li>get-pick-list: Ignore commits which CC mesa-stable unless they say "9.1"</li>
-  <li>Bump version to 9.1.6</li>
-</ul>
-
-<p>Chris Forbes (5):</p>
-<ul>
-  <li>i965/Gen4: Zero extra coordinates for ir_tex</li>
-  <li>i965/vs: Fix flaky texture swizzling</li>
-  <li>i965/vs: set up sampler state pointer for Gen4/5.</li>
-  <li>i965/vs: Put lod parameter in the correct place for Gen4</li>
-  <li>i965/vs: Gen4/5: enable front colors if back colors are written</li>
-</ul>
-
-<p>Christoph Bumiller (1):</p>
-<ul>
-  <li>nv50,nvc0: s/uint16/uint32 for constant buffer offset</li>
-</ul>
-
-<p>Dave Airlie (1):</p>
-<ul>
-  <li>gallium/vl: add prime support</li>
-</ul>
-
-<p>Eric Anholt (1):</p>
-<ul>
-  <li>egl: Restore "bogus" DRI2 invalidate event code.</li>
-</ul>
-
-<p>Jeremy Huddleston Sequoia (1):</p>
-<ul>
-  <li>Apple: glFlush() is not needed with CGLFlushDrawable()</li>
-</ul>
-
-<p>Kenneth Graunke (1):</p>
-<ul>
-  <li>glsl: Classify "layout" like other identifiers.</li>
-</ul>
-
-<p>Kristian Høgsberg (1):</p>
-<ul>
-  <li>egl-wayland: Fix left-over wl_display_roundtrip() usage</li>
-</ul>
-
-<p>Maarten Lankhorst (2):</p>
-<ul>
-  <li>osmesa: link against static libglapi library too to get the gl exports</li>
-  <li>nvc0: force use of correct firmware file</li>
-</ul>
-
-<p>Marek Olšák (4):</p>
-<ul>
-  <li>r300g/swtcl: fix geometry corruption by uploading indices to a buffer</li>
-  <li>r300g/swtcl: fix a lockup in MSAA resolve</li>
-  <li>Revert "r300g: allow HiZ with a 16-bit zbuffer"</li>
-  <li>r600g: increase array size for shader inputs and outputs</li>
-</ul>
-
-<p>Matt Turner (2):</p>
-<ul>
-  <li>i965: NULL check prog on shader compilation failure.</li>
-  <li>i965/vs: Print error if vertex shader fails to compile.</li>
-</ul>
-
-<p>Paul Berry (1):</p>
-<ul>
-  <li>glsl: Handle empty if statement encountered during loop analysis.</li>
-</ul>
-
-</div>
-</body>
-</html>
--- a/docs/relnotes/9.2.html
+++ b/docs/relnotes/9.2.html
@@ -48,11 +48,9 @@ Note: some of the new features are only available with certain drivers.
 <li>GL_ARB_texture_multisample</li>
 <li>GL_ARB_texture_storage_multisample</li>
 <li>GL_ARB_texture_query_lod</li>
-<li>Enable GL_ARB_texture_storage on radeon, r200, and nouveau</li>
 <li>Added new freedreno gallium driver</li>
 <li>OSMesa interface for gallium llvmpipe/softpipe drivers</li>
 <li>Gallium Heads-Up Display (HUD) feature for performance monitoring</li>
-<li>Added support for UVD (2.2 and 3.0) video decoding on r600g and radeonsi through VDPAU (requires Kernel 3.10 or later)</li>
 </ul>


@@ -65,14 +63,6 @@ Note: some of the new features are only available with certain drivers.

 <ul>
 <li>Removed d3d1x state tracker (unused, unmaintained and broken)</li>
-<li>Removed GL_EXT_clip_volume_hint because no driver had enabled it since
-2007.</li>
-<li>Removed GL_MESA_resize_buffers because it was only really implemented by
-the (unsupported) GDI driver.</li>
-<li>GL_EXT_separate_shader_objects has been removed from all Gallium drivers,
-    because it disallows a critical GLSL shader optimization.
-    GL_ARB_separate_shader_objects doesn't have this issue.</li>
-<li>i965 Gen6+ requires Kernel 3.6 or later. (92d2f5a)</li>
 </ul>

 </div>
--- a/docs/specs/OLD/MESA_resize_buffers.spec
+++ b/docs/specs/OLD/MESA_resize_buffers.spec
@@ -12,7 +12,7 @@ Contact

 Status

-    Obsolete.
+    Shipping (since Mesa version 2.2)

 Version

--- a/include/EGL/eglplatform.h
+++ b/include/EGL/eglplatform.h
@@ -109,8 +109,8 @@ typedef void                        *EGLNativeDisplayType;
 #ifdef MESA_EGL_NO_X11_HEADERS

 typedef void            *EGLNativeDisplayType;
-typedef khronos_uintptr_t EGLNativePixmapType;
-typedef khronos_uintptr_t EGLNativeWindowType;
+typedef khronos_uint32_t EGLNativePixmapType;
+typedef khronos_uint32_t EGLNativeWindowType;

 #else

--- a/include/GL/glext.h
+++ b/include/GL/glext.h
--- a/include/GL/internal/dri_interface.h
+++ b/include/GL/internal/dri_interface.h
@@ -552,8 +552,6 @@ struct __DRIuseInvalidateExtensionRec {
 #define __DRI_ATTRIB_RGBA_BIT			0x01	
 #define __DRI_ATTRIB_COLOR_INDEX_BIT		0x02
 #define __DRI_ATTRIB_LUMINANCE_BIT		0x04
-#define __DRI_ATTRIB_FLOAT_BIT			0x08
-#define __DRI_ATTRIB_UNSIGNED_FLOAT_BIT		0x10

 /* __DRI_ATTRIB_CONFIG_CAVEAT */
 #define __DRI_ATTRIB_SLOW_BIT			0x01
@@ -985,6 +983,7 @@ struct __DRIdri2ExtensionRec {
 #define __DRI_IMAGE_FOURCC_YUV410	0x39565559
 #define __DRI_IMAGE_FOURCC_YUV411	0x31315559
 #define __DRI_IMAGE_FOURCC_YUV420	0x32315559
+#define __DRI_IMAGE_FOURCC_YVU420	0x32315659
 #define __DRI_IMAGE_FOURCC_YUV422	0x36315559
 #define __DRI_IMAGE_FOURCC_YUV444	0x34325559
 #define __DRI_IMAGE_FOURCC_NV12		0x3231564e
--- a/include/pci_ids/radeonsi_pci_ids.h
+++ b/include/pci_ids/radeonsi_pci_ids.h
@@ -70,29 +70,3 @@ CHIPSET(0x6664, HAINAN_6664, HAINAN)
 CHIPSET(0x6665, HAINAN_6665, HAINAN)
 CHIPSET(0x6667, HAINAN_6667, HAINAN)
 CHIPSET(0x666F, HAINAN_666F, HAINAN)
-
-CHIPSET(0x6640, BONAIRE_6640, BONAIRE)
-CHIPSET(0x6641, BONAIRE_6641, BONAIRE)
-CHIPSET(0x6649, BONAIRE_6649, BONAIRE)
-CHIPSET(0x6650, BONAIRE_6650, BONAIRE)
-CHIPSET(0x6651, BONAIRE_6651, BONAIRE)
-CHIPSET(0x6658, BONAIRE_6658, BONAIRE)
-CHIPSET(0x665C, BONAIRE_665C, BONAIRE)
-CHIPSET(0x665D, BONAIRE_665D, BONAIRE)
-
-CHIPSET(0x9830, KABINI_9830, KABINI)
-CHIPSET(0x9831, KABINI_9831, KABINI)
-CHIPSET(0x9832, KABINI_9832, KABINI)
-CHIPSET(0x9833, KABINI_9833, KABINI)
-CHIPSET(0x9834, KABINI_9834, KABINI)
-CHIPSET(0x9835, KABINI_9835, KABINI)
-CHIPSET(0x9836, KABINI_9836, KABINI)
-CHIPSET(0x9837, KABINI_9837, KABINI)
-CHIPSET(0x9838, KABINI_9838, KABINI)
-CHIPSET(0x9839, KABINI_9839, KABINI)
-CHIPSET(0x983A, KABINI_983A, KABINI)
-CHIPSET(0x983B, KABINI_983B, KABINI)
-CHIPSET(0x983C, KABINI_983C, KABINI)
-CHIPSET(0x983D, KABINI_983D, KABINI)
-CHIPSET(0x983E, KABINI_983E, KABINI)
-CHIPSET(0x983F, KABINI_983F, KABINI)
--- a/m4/ax_prog_flex.m4
+++ b/m4/ax_prog_flex.m4
@@ -53,7 +53,7 @@ AC_DEFUN([AX_PROG_FLEX], [
  AC_REQUIRE([AC_PROG_EGREP])

  AC_CACHE_CHECK([if flex is the lexer generator],[ax_cv_prog_flex],[
-    AS_IF([$LEX --version 2>/dev/null | $EGREP -q '^\<flex\>'],
+    AS_IF([$LEX --version 2>/dev/null | $EGREP -q '^flex '],
      [ax_cv_prog_flex=yes], [ax_cv_prog_flex=no])
  ])
  AS_IF([test "$ax_cv_prog_flex" = "yes"],
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -29,10 +29,6 @@ if HAVE_DRI_GLX
 SUBDIRS += glx
 endif

-if HAVE_EGL_PLATFORM_WAYLAND
-SUBDIRS += egl/wayland
-endif
-
 if HAVE_GBM
 SUBDIRS += gbm
 endif
--- a/src/egl/Makefile.am
+++ b/src/egl/Makefile.am
@@ -21,4 +21,8 @@

 SUBDIRS=

+if HAVE_EGL_PLATFORM_WAYLAND
+SUBDIRS += wayland
+endif
+
 SUBDIRS += drivers main
--- a/src/egl/drivers/dri2/Makefile.am
+++ b/src/egl/drivers/dri2/Makefile.am
@@ -28,7 +28,6 @@ AM_CFLAGS = \
 	-I$(top_srcdir)/src/egl/wayland/wayland-drm \
 	-I$(top_builddir)/src/egl/wayland/wayland-drm \
 	$(DEFINES) \
-	$(VISIBILITY_CFLAGS) \
 	$(LIBDRM_CFLAGS) \
 	$(LIBUDEV_CFLAGS) \
 	$(LIBKMS_CFLAGS) \
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -75,7 +75,7 @@ EGLint dri2_to_egl_attribute_map[] = {
   0,				/* __DRI_ATTRIB_TRANSPARENT_GREEN_VALUE */
   0,				/* __DRI_ATTRIB_TRANSPARENT_BLUE_VALUE */
   0,				/* __DRI_ATTRIB_TRANSPARENT_ALPHA_VALUE */
-   0,				/* __DRI_ATTRIB_FLOAT_MODE (deprecated) */
+   0,				/* __DRI_ATTRIB_FLOAT_MODE */
   0,				/* __DRI_ATTRIB_RED_MASK */
   0,				/* __DRI_ATTRIB_GREEN_MASK */
   0,				/* __DRI_ATTRIB_BLUE_MASK */
@@ -141,7 +141,7 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,
 	 else if (value & __DRI_ATTRIB_LUMINANCE_BIT)
 	    value = EGL_LUMINANCE_BUFFER;
 	 else
-	    return NULL;
+	    /* not valid */;
 	 _eglSetConfigKey(&base, EGL_COLOR_BUFFER_TYPE, value);
 	 break;	 

--- a/src/egl/drivers/dri2/platform_android.c
+++ b/src/egl/drivers/dri2/platform_android.c
@@ -38,6 +38,7 @@
 #include <xf86drm.h>
 #include <i915_drm.h>
 #include <radeon_drm.h>
+#include <gralloc_drm.h>

 #include "egl_dri2.h"
 #include "gralloc_drm.h"
@@ -56,9 +57,9 @@ get_format_bpp(int native)
   case HAL_PIXEL_FORMAT_RGB_888:
      bpp = 3;
      break;
+   case HAL_PIXEL_FORMAT_DRM_NV12:
+   case HAL_PIXEL_FORMAT_YV12:
   case HAL_PIXEL_FORMAT_RGB_565:
-   case HAL_PIXEL_FORMAT_RGBA_5551:
-   case HAL_PIXEL_FORMAT_RGBA_4444:
      bpp = 2;
      break;
   default:
@@ -339,6 +340,7 @@ dri2_create_image_android_native_buffer(_EGLDisplay *disp, _EGLContext *ctx,
   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
   struct dri2_egl_image *dri2_img;
   int name;
+   uint32_t offsets[3], strides[3], handles[3], tmp;
   EGLint format;

   if (ctx != NULL) {
@@ -367,6 +369,12 @@ dri2_create_image_android_native_buffer(_EGLDisplay *disp, _EGLContext *ctx,

   /* see the table in droid_add_configs_for_visuals */
   switch (buf->format) {
+   case HAL_PIXEL_FORMAT_DRM_NV12:
+       format = __DRI_IMAGE_FOURCC_NV12;
+       break;
+   case HAL_PIXEL_FORMAT_YV12:
+      format = __DRI_IMAGE_FOURCC_YVU420;
+      break;
   case HAL_PIXEL_FORMAT_BGRA_8888:
      format = __DRI_IMAGE_FORMAT_ARGB8888;
      break;
@@ -380,8 +388,6 @@ dri2_create_image_android_native_buffer(_EGLDisplay *disp, _EGLContext *ctx,
      format = __DRI_IMAGE_FORMAT_XBGR8888;
      break;
   case HAL_PIXEL_FORMAT_RGB_888:
-   case HAL_PIXEL_FORMAT_RGBA_5551:
-   case HAL_PIXEL_FORMAT_RGBA_4444:
      /* unsupported */
   default:
      _eglLog(_EGL_WARNING, "unsupported native buffer format 0x%x", buf->format);
@@ -400,14 +406,70 @@ dri2_create_image_android_native_buffer(_EGLDisplay *disp, _EGLContext *ctx,
      return NULL;
   }

-   dri2_img->dri_image =
-      dri2_dpy->image->createImageFromName(dri2_dpy->dri_screen,
-					   buf->width,
-					   buf->height,
-					   format,
-					   name,
-					   buf->stride,
-					   dri2_img);
+   switch (format) {
+   case __DRI_IMAGE_FORMAT_ARGB8888:
+   case __DRI_IMAGE_FORMAT_RGB565:
+   case __DRI_IMAGE_FORMAT_ABGR8888:
+   case __DRI_IMAGE_FORMAT_XBGR8888:
+       dri2_img->dri_image =
+          dri2_dpy->image->createImageFromName(dri2_dpy->dri_screen,
+                           buf->width,
+                           buf->height,
+                           format,
+                           name,
+                           buf->stride,
+                           dri2_img);
+       break;
+   case __DRI_IMAGE_FOURCC_YVU420:
+       offsets[0] = offsets[1] = offsets[2] = 0;
+       strides[0] = strides[1] = strides[2] = 0;
+
+       gralloc_drm_resolve_format(buf->handle, &strides[0], &offsets[0],
+                                  &handles[0]);
+
+       /* u anv v are given in wrong order than what we need here thus this:*/
+       tmp = offsets[1];
+       offsets[1] = offsets[2];
+       offsets[2] = tmp;
+       tmp = strides[1];
+       strides[1] = strides[2];
+       strides[2] = tmp;
+
+       dri2_img->dri_image =
+          dri2_dpy->image->createImageFromNames(dri2_dpy->dri_screen,
+                           buf->width,
+                           buf->height,
+                           format,
+                           &name, 1,
+                           (int*)strides,
+                           (int*)offsets,
+                           dri2_img);
+       break;
+   case __DRI_IMAGE_FOURCC_NV12:
+       offsets[0] = offsets[1] = offsets[2] = 0;
+       strides[0] = strides[1] = strides[2] = 0;
+
+       gralloc_drm_resolve_format(buf->handle, &strides[0], &offsets[0],
+                                  &handles[0]);
+
+
+       dri2_img->dri_image =
+          dri2_dpy->image->createImageFromNames(dri2_dpy->dri_screen,
+                        buf->width,
+                        buf->height,
+                        format,
+                        &name, 1,
+                        (int*)strides,
+                        (int*)offsets,
+                        dri2_img);
+       break;
+   default:
+       /* We should never arrive here */
+      _eglLog(_EGL_WARNING, "unsupported native buffer format 0x%x",
+              buf->format);
+      break;
+   }
+
   if (!dri2_img->dri_image) {
      free(dri2_img);
      _eglError(EGL_BAD_ALLOC, "droid_create_image_mesa_drm");
--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -715,15 +715,8 @@ registry_handle_global(void *data, struct wl_registry *registry, uint32_t name,
   }
 }

-static void
-registry_handle_global_remove(void *data, struct wl_registry *registry,
-			      uint32_t name)
-{
-}
-
 static const struct wl_registry_listener registry_listener = {
-   registry_handle_global,
-   registry_handle_global_remove
+	registry_handle_global
 };

 EGLBoolean
--- a/src/egl/drivers/dri2/platform_x11.c
+++ b/src/egl/drivers/dri2/platform_x11.c
@@ -212,7 +212,7 @@ dri2_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
 			dri2_surf->drawable, s.data->root,
 			dri2_surf->base.Width, dri2_surf->base.Height);
   } else {
-      dri2_surf->drawable = window;
+      dri2_surf->drawable = (xcb_drawable_t)window;
   }

   if (dri2_dpy->dri2) {
@@ -743,20 +743,6 @@ dri2_swap_buffers_msc(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw,
      free(reply);
   }

-   /* Since we aren't watching for the server's invalidate events like we're
-    * supposed to (due to XCB providing no mechanism for filtering the events
-    * the way xlib does), and SwapBuffers is a common cause of invalidate
-    * events, just shove one down to the driver, even though we haven't told
-    * the driver that we're the kind of loader that provides reliable
-    * invalidate events.  This causes the driver to request buffers again at
-    * its next draw, so that we get the correct buffers if a pageflip
-    * happened.  The driver should still be using the viewport hack to catch
-    * window resizes.
-    */
-   if (dri2_dpy->flush &&
-       dri2_dpy->flush->base.version >= 3 && dri2_dpy->flush->invalidate)
-      (*dri2_dpy->flush->invalidate)(dri2_surf->dri_drawable);
-
   return swap_count;
 }

@@ -850,10 +836,10 @@ dri2_copy_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf,
   (*dri2_dpy->flush->flush)(dri2_surf->dri_drawable);

   gc = xcb_generate_id(dri2_dpy->conn);
-   xcb_create_gc(dri2_dpy->conn, gc, target, 0, NULL);
+   xcb_create_gc(dri2_dpy->conn, gc, (xcb_drawable_t)target, 0, NULL);
   xcb_copy_area(dri2_dpy->conn,
 		  dri2_surf->drawable,
-		  target,
+		  (xcb_drawable_t)target,
 		  gc,
 		  0, 0,
 		  0, 0,
--- a/src/egl/drivers/glx/Makefile.am
+++ b/src/egl/drivers/glx/Makefile.am
@@ -22,7 +22,6 @@
 AM_CFLAGS = \
 	-I$(top_srcdir)/include \
 	-I$(top_srcdir)/src/egl/main \
-	$(VISIBILITY_CFLAGS) \
 	$(X11_CFLAGS) \
 	$(DEFINES)

--- a/src/egl/main/Android.mk
+++ b/src/egl/main/Android.mk
@@ -121,11 +121,13 @@ endif
 # r300g/r600g/radeonsi
 ifneq ($(filter r300g r600g radeonsi, $(MESA_GPU_DRIVERS)),)
 gallium_DRIVERS += libmesa_winsys_radeon
+LOCAL_SHARED_LIBRARIES += libdrm_radeon
 ifneq ($(filter r300g, $(MESA_GPU_DRIVERS)),)
 gallium_DRIVERS += libmesa_pipe_r300
 endif
 ifneq ($(filter r600g, $(MESA_GPU_DRIVERS)),)
-gallium_DRIVERS += libmesa_pipe_r600
+gallium_DRIVERS += libmesa_pipe_r600 libmesa_pipe_radeon
+LOCAL_SHARED_LIBRARIES += libstlport
 endif
 ifneq ($(filter radeonsi, $(MESA_GPU_DRIVERS)),)
 gallium_DRIVERS += libmesa_pipe_radeonsi
--- a/src/egl/main/Makefile.am
+++ b/src/egl/main/Makefile.am
@@ -29,7 +29,6 @@ AM_CFLAGS = \
 	-I$(top_srcdir)/include \
 	-I$(top_srcdir)/src/gbm/main \
 	$(DEFINES) \
-	$(VISIBILITY_CFLAGS) \
 	$(EGL_CFLAGS) \
 	-D_EGL_NATIVE_PLATFORM=$(EGL_NATIVE_PLATFORM) \
 	-D_EGL_DRIVER_SEARCH_DIR=\"$(EGL_DRIVER_INSTALL_DIR)\" \
--- a/src/egl/wayland/wayland-drm/Makefile.am
+++ b/src/egl/wayland/wayland-drm/Makefile.am
@@ -1,7 +1,6 @@
 AM_CFLAGS = -I$(top_srcdir)/src/egl/main \
 	    -I$(top_srcdir)/include \
 	    $(DEFINES) \
-	    $(VISIBILITY_CFLAGS) \
 	    $(WAYLAND_CFLAGS) 

 noinst_LTLIBRARIES = libwayland-drm.la
--- a/src/egl/wayland/wayland-egl/Makefile.am
+++ b/src/egl/wayland/wayland-egl/Makefile.am
@@ -2,7 +2,6 @@ pkgconfigdir = $(libdir)/pkgconfig
 pkgconfig_DATA = wayland-egl.pc

 AM_CFLAGS = $(DEFINES) \
-	    $(VISIBILITY_CFLAGS) \
 	    $(WAYLAND_CFLAGS)

 lib_LTLIBRARIES = libwayland-egl.la
--- a/src/gallium/Android.mk
+++ b/src/gallium/Android.mk
@@ -61,7 +61,7 @@ ifneq ($(filter r300g, $(MESA_GPU_DRIVERS)),)
 SUBDIRS += drivers/r300
 endif
 ifneq ($(filter r600g, $(MESA_GPU_DRIVERS)),)
-SUBDIRS += drivers/r600
+SUBDIRS += drivers/r600 drivers/radeon
 endif
 ifneq ($(filter radeonsi, $(MESA_GPU_DRIVERS)),)
 SUBDIRS += drivers/radeonsi
--- a/src/gallium/auxiliary/Makefile.am
+++ b/src/gallium/auxiliary/Makefile.am
@@ -38,17 +38,13 @@ libgallium_la_SOURCES += \
 endif

 indices/u_indices_gen.c: $(srcdir)/indices/u_indices_gen.py
-	$(MKDIR_P) indices
 	$(AM_V_GEN) $(PYTHON2) $< > $@

 indices/u_unfilled_gen.c: $(srcdir)/indices/u_unfilled_gen.py
-	$(MKDIR_P) indices
 	$(AM_V_GEN) $(PYTHON2) $< > $@

 util/u_format_srgb.c: $(srcdir)/util/u_format_srgb.py
-	$(MKDIR_P) util
 	$(AM_V_GEN) $(PYTHON2) $< > $@

 util/u_format_table.c: $(srcdir)/util/u_format_table.py $(srcdir)/util/u_format_pack.py $(srcdir)/util/u_format_parse.py $(srcdir)/util/u_format.csv
-	$(MKDIR_P) util
 	$(AM_V_GEN) $(PYTHON2) $(srcdir)/util/u_format_table.py $(srcdir)/util/u_format.csv > $@
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -44,7 +44,6 @@ C_SOURCES := \
 	hud/hud_fps.c \
        hud/hud_driver_query.c \
 	os/os_misc.c \
-	os/os_process.c \
 	os/os_time.c \
 	pipebuffer/pb_buffer_fenced.c \
 	pipebuffer/pb_buffer_malloc.c \
@@ -164,7 +163,6 @@ GENERATED_SOURCES := \

 GALLIVM_SOURCES := \
        gallivm/lp_bld_arit.c \
-        gallivm/lp_bld_arit_overflow.c \
        gallivm/lp_bld_assert.c \
        gallivm/lp_bld_bitarit.c \
        gallivm/lp_bld_const.c \
@@ -173,7 +171,6 @@ GALLIVM_SOURCES := \
        gallivm/lp_bld_format_aos.c \
        gallivm/lp_bld_format_aos_array.c \
 	gallivm/lp_bld_format_float.c \
-        gallivm/lp_bld_format_srgb.c \
        gallivm/lp_bld_format_soa.c \
        gallivm/lp_bld_format_yuv.c \
        gallivm/lp_bld_gather.c \
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -111,7 +111,6 @@ struct cso_context {
   void *velements, *velements_saved;
   struct pipe_query *render_condition, *render_condition_saved;
   uint render_condition_mode, render_condition_mode_saved;
-   boolean render_condition_cond, render_condition_cond_saved;

   struct pipe_clip_state clip;
   struct pipe_clip_state clip_saved;
@@ -724,17 +723,13 @@ void cso_restore_stencil_ref(struct cso_context *ctx)
 }

 void cso_set_render_condition(struct cso_context *ctx,
-                              struct pipe_query *query,
-                              boolean condition, uint mode)
+                              struct pipe_query *query, uint mode)
 {
   struct pipe_context *pipe = ctx->pipe;

-   if (ctx->render_condition != query ||
-       ctx->render_condition_mode != mode ||
-       ctx->render_condition_cond != condition) {
-      pipe->render_condition(pipe, query, condition, mode);
+   if (ctx->render_condition != query || ctx->render_condition_mode != mode) {
+      pipe->render_condition(pipe, query, mode);
      ctx->render_condition = query;
-      ctx->render_condition_cond = condition;
      ctx->render_condition_mode = mode;
   }
 }
@@ -742,14 +737,12 @@ void cso_set_render_condition(struct cso_context *ctx,
 void cso_save_render_condition(struct cso_context *ctx)
 {
   ctx->render_condition_saved = ctx->render_condition;
-   ctx->render_condition_cond_saved = ctx->render_condition_cond;
   ctx->render_condition_mode_saved = ctx->render_condition_mode;
 }

 void cso_restore_render_condition(struct cso_context *ctx)
 {
   cso_set_render_condition(ctx, ctx->render_condition_saved,
-                            ctx->render_condition_cond_saved,
                            ctx->render_condition_mode_saved);
 }

--- a/src/gallium/auxiliary/cso_cache/cso_context.h
+++ b/src/gallium/auxiliary/cso_cache/cso_context.h
@@ -170,8 +170,7 @@ void cso_save_stencil_ref(struct cso_context *cso);
 void cso_restore_stencil_ref(struct cso_context *cso);

 void cso_set_render_condition(struct cso_context *cso,
-                              struct pipe_query *query,
-                              boolean condition, uint mode);
+                              struct pipe_query *query, uint mode);
 void cso_save_render_condition(struct cso_context *cso);
 void cso_restore_render_condition(struct cso_context *cso);

--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -58,7 +58,7 @@ draw_get_option_use_llvm(void)

 #ifdef PIPE_ARCH_X86
      util_cpu_detect();
-      /* require SSE2 due to LLVM PR6960. XXX Might be fixed by now? */
+      /* require SSE2 due to LLVM PR6960. */
      if (!util_cpu_caps.has_sse2)
         value = FALSE;
 #endif
@@ -78,9 +78,6 @@ draw_create_context(struct pipe_context *pipe, boolean try_llvm)
   if (draw == NULL)
      goto err_out;

-   /* we need correct cpu caps for disabling denorms in draw_vbo() */
-   util_cpu_detect();
-
 #if HAVE_LLVM
   if (try_llvm && draw_get_option_use_llvm()) {
      draw->llvm = draw_llvm_create(draw);
@@ -141,7 +138,6 @@ boolean draw_init(struct draw_context *draw)
   draw->clip_z = TRUE;

   draw->pt.user.planes = (float (*) [DRAW_TOTAL_CLIP_PLANES][4]) &(draw->plane[0]);
-   draw->pt.user.eltMax = ~0;

   if (!draw_pipeline_init( draw ))
      return FALSE;
@@ -742,7 +738,6 @@ draw_current_shader_clipvertex_output(const struct draw_context *draw)
 uint
 draw_current_shader_clipdistance_output(const struct draw_context *draw, int index)
 {
-   debug_assert(index < PIPE_MAX_CLIP_OR_CULL_DISTANCE_ELEMENT_COUNT);
   if (draw->gs.geometry_shader)
      return draw->gs.geometry_shader->clipdistance_output[index];
   return draw->vs.clipdistance_output[index];
@@ -761,7 +756,6 @@ draw_current_shader_num_written_clipdistances(const struct draw_context *draw)
 uint
 draw_current_shader_culldistance_output(const struct draw_context *draw, int index)
 {
-   debug_assert(index < PIPE_MAX_CLIP_OR_CULL_DISTANCE_ELEMENT_COUNT);
   if (draw->gs.geometry_shader)
      return draw->gs.geometry_shader->culldistance_output[index];
   return draw->vs.vertex_shader->culldistance_output[index];
--- a/src/gallium/auxiliary/draw/draw_gs.c
+++ b/src/gallium/auxiliary/draw/draw_gs.c
@@ -792,13 +792,13 @@ draw_create_geometry_shader(struct draw_context *draw,
      if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_VIEWPORT_INDEX)
         gs->viewport_index_output = i;
      if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_CLIPDIST) {
-         debug_assert(gs->info.output_semantic_index[i] <
-                      PIPE_MAX_CLIP_OR_CULL_DISTANCE_ELEMENT_COUNT);
-         gs->clipdistance_output[gs->info.output_semantic_index[i]] = i;
+         if (gs->info.output_semantic_index[i] == 0)
+            gs->clipdistance_output[0] = i;
+         else
+            gs->clipdistance_output[1] = i;
      }
      if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_CULLDIST) {
-         debug_assert(gs->info.output_semantic_index[i] <
-                      PIPE_MAX_CLIP_OR_CULL_DISTANCE_ELEMENT_COUNT);
+         debug_assert(gs->info.output_semantic_index[i] < Elements(gs->culldistance_output));
         gs->culldistance_output[gs->info.output_semantic_index[i]] = i;
      }
   }
--- a/src/gallium/auxiliary/draw/draw_gs.h
+++ b/src/gallium/auxiliary/draw/draw_gs.h
@@ -67,8 +67,8 @@ struct draw_geometry_shader {
   struct tgsi_shader_info info;
   unsigned position_output;
   unsigned viewport_index_output;
-   unsigned clipdistance_output[PIPE_MAX_CLIP_OR_CULL_DISTANCE_ELEMENT_COUNT];
-   unsigned culldistance_output[PIPE_MAX_CLIP_OR_CULL_DISTANCE_ELEMENT_COUNT];
+   unsigned clipdistance_output[2];
+   unsigned culldistance_output[2];

   unsigned max_output_vertices;
   unsigned primitive_boundary;
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -32,7 +32,6 @@
 #include "draw_gs.h"

 #include "gallivm/lp_bld_arit.h"
-#include "gallivm/lp_bld_arit_overflow.h"
 #include "gallivm/lp_bld_logic.h"
 #include "gallivm/lp_bld_const.h"
 #include "gallivm/lp_bld_swizzle.h"
@@ -674,7 +673,6 @@ generate_vs(struct draw_llvm_variant *variant,

 static void
 generate_fetch(struct gallivm_state *gallivm,
-               struct draw_context *draw,
               LLVMValueRef vbuffers_ptr,
               LLVMValueRef *res,
               struct pipe_vertex_element *velem,
@@ -697,58 +695,35 @@ generate_fetch(struct gallivm_state *gallivm,
   LLVMValueRef buffer_size = draw_jit_dvbuffer_size(gallivm, vbuffer_ptr);
   LLVMValueRef stride;
   LLVMValueRef buffer_overflowed;
-   LLVMValueRef needed_buffer_size;
   LLVMValueRef temp_ptr =
      lp_build_alloca(gallivm,
                      lp_build_vec_type(gallivm, lp_float32_vec4_type()), "");
-   LLVMValueRef ofbit = NULL;
   struct lp_build_if_state if_ctx;

   if (velem->instance_divisor) {
-      /* Index is equal to the start instance plus the number of current 
-       * instance divided by the divisor. In this case we compute it as:
-       * index = start_instance + ((instance_id - start_instance) / divisor)
-       */
-      LLVMValueRef current_instance;
-      index = lp_build_const_int32(gallivm, draw->start_instance);
-      current_instance = LLVMBuildSub(builder, instance_id, index, "");
-      current_instance = LLVMBuildUDiv(builder, current_instance,
-                                       lp_build_const_int32(gallivm, velem->instance_divisor),
-                                       "instance_divisor");
-      index = LLVMBuildAdd(builder, index, current_instance, "instance");
+      /* array index = instance_id / instance_divisor */
+      index = LLVMBuildUDiv(builder, instance_id,
+                            lp_build_const_int32(gallivm, velem->instance_divisor),
+                            "instance_divisor");
   }

-   stride = lp_build_umul_overflow(gallivm, vb_stride, index, &ofbit);
-   stride = lp_build_uadd_overflow(gallivm, stride, vb_buffer_offset, &ofbit);
-   stride = lp_build_uadd_overflow(
-      gallivm, stride,
-      lp_build_const_int32(gallivm, velem->src_offset), &ofbit);
-   needed_buffer_size = lp_build_uadd_overflow(
-      gallivm, stride,
-      lp_build_const_int32(gallivm,
-                           util_format_get_blocksize(velem->src_format)),
-      &ofbit);
+   stride = LLVMBuildMul(builder, vb_stride, index, "");

-   buffer_overflowed = LLVMBuildICmp(builder, LLVMIntUGT,
-                                     needed_buffer_size, buffer_size,
+   stride = LLVMBuildAdd(builder, stride,
+                         vb_buffer_offset,
+                         "");
+   stride = LLVMBuildAdd(builder, stride,
+                         lp_build_const_int32(gallivm, velem->src_offset),
+                         "");
+
+   buffer_overflowed = LLVMBuildICmp(builder, LLVMIntUGE,
+                                     stride, buffer_size,
                                     "buffer_overflowed");
-   buffer_overflowed = LLVMBuildOr(builder, buffer_overflowed, ofbit, "");
-#if 0
-   lp_build_printf(gallivm, "vbuf index = %u, vb_stride is %u\n",
-                   index, vb_stride);
-   lp_build_printf(gallivm, "   vb_buffer_offset = %u, src_offset is %u\n",
-                   vb_buffer_offset,
-                   lp_build_const_int32(gallivm, velem->src_offset));
-   lp_build_print_value(gallivm, "   blocksize = ",
-                        lp_build_const_int32(
-                           gallivm,
-                           util_format_get_blocksize(velem->src_format)));
-   lp_build_printf(gallivm, "   instance_id = %u\n", instance_id);
-   lp_build_printf(gallivm, "   stride = %u\n", stride);
-   lp_build_printf(gallivm, "   buffer size = %u\n", buffer_size);
-   lp_build_printf(gallivm, "   needed_buffer_size = %u\n", needed_buffer_size);
+   /*
+   lp_build_printf(gallivm, "vbuf index = %d, stride is %d\n", indices, stride);
+   lp_build_print_value(gallivm, "   buffer size = ", buffer_size);
   lp_build_print_value(gallivm, "   buffer overflowed = ", buffer_overflowed);
-#endif
+   */

   lp_build_if(&if_ctx, gallivm, buffer_overflowed);
   {
@@ -1620,7 +1595,6 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
   if (elts) {
      start = zero;
      end = fetch_count;
-      count = fetch_count;
   }
   else {
      end = lp_build_add(&bld, start, count);
@@ -1630,7 +1604,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,

   fetch_max = LLVMBuildSub(builder, end, one, "fetch_max");

-   lp_build_loop_begin(&lp_loop, gallivm, zero);
+   lp_build_loop_begin(&lp_loop, gallivm, start);
   {
      LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
      LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][LP_MAX_VECTOR_WIDTH / 32] = { { 0 } };
@@ -1638,7 +1612,10 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
      LLVMValueRef clipmask;   /* holds the clipmask value */
      const LLVMValueRef (*ptr_aos)[TGSI_NUM_CHANNELS];

-      io_itr = lp_loop.counter;
+      if (elts)
+         io_itr = lp_loop.counter;
+      else
+         io_itr = LLVMBuildSub(builder, lp_loop.counter, start, "");

      io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, "");
 #if DEBUG_STORE
@@ -1651,7 +1628,6 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
            LLVMBuildAdd(builder,
                         lp_loop.counter,
                         lp_build_const_int32(gallivm, i), "");
-         true_index = LLVMBuildAdd(builder, start, true_index, "");

         /* make sure we're not out of bounds which can happen
          * if fetch_count % 4 != 0, because on the last iteration
@@ -1671,7 +1647,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
                  gallivm,
                  lp_build_vec_type(gallivm, lp_type_int(32)), "");
            struct lp_build_if_state if_ctx;
-            index_overflowed = LLVMBuildICmp(builder, LLVMIntUGT,
+            index_overflowed = LLVMBuildICmp(builder, LLVMIntUGE,
                                             true_index, fetch_elt_max,
                                             "index_overflowed");
            
@@ -1705,7 +1681,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
            LLVMValueRef vb_index =
               lp_build_const_int32(gallivm, velem->vertex_buffer_index);
            LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr, &vb_index, 1, "");
-            generate_fetch(gallivm, draw, vbuffers_ptr,
+            generate_fetch(gallivm, vbuffers_ptr,
                           &aos_attribs[j][i], velem, vb, true_index,
                           system_values.instance_id);
         }
@@ -1768,7 +1744,8 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
                     vs_info->num_outputs, vs_type,
                     have_clipdist);
   }
-   lp_build_loop_end_cond(&lp_loop, count, step, LLVMIntUGE);
+
+   lp_build_loop_end_cond(&lp_loop, end, step, LLVMIntUGE);

   sampler->destroy(sampler);

--- a/src/gallium/auxiliary/draw/draw_llvm_sample.c
+++ b/src/gallium/auxiliary/draw/draw_llvm_sample.c
@@ -238,7 +238,6 @@ draw_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base,
                                       const struct lp_derivatives *derivs,
                                       LLVMValueRef lod_bias, /* optional */
                                       LLVMValueRef explicit_lod, /* optional */
-                                       boolean scalar_lod,
                                       LLVMValueRef *texel)
 {
   struct draw_llvm_sampler_soa *sampler = (struct draw_llvm_sampler_soa *)base;
@@ -257,7 +256,7 @@ draw_llvm_sampler_soa_emit_fetch_texel(const struct lp_build_sampler_soa *base,
                       coords,
                       offsets,
                       derivs,
-                       lod_bias, explicit_lod, scalar_lod,
+                       lod_bias, explicit_lod,
                       texel);
 }

--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -831,12 +831,7 @@ static struct aaline_stage *
 aaline_stage_from_pipe(struct pipe_context *pipe)
 {
   struct draw_context *draw = (struct draw_context *) pipe->draw;
-
-   if (draw) {
-      return aaline_stage(draw->pipeline.aaline);
-   } else {
-      return NULL;
-   }
+   return aaline_stage(draw->pipeline.aaline);
 }


@@ -849,12 +844,7 @@ aaline_create_fs_state(struct pipe_context *pipe,
                       const struct pipe_shader_state *fs)
 {
   struct aaline_stage *aaline = aaline_stage_from_pipe(pipe);
-   struct aaline_fragment_shader *aafs = NULL;
-
-   if (aaline == NULL)
-      return NULL;
-
-   aafs = CALLOC_STRUCT(aaline_fragment_shader);
+   struct aaline_fragment_shader *aafs = CALLOC_STRUCT(aaline_fragment_shader);

   if (aafs == NULL)
      return NULL;
@@ -874,10 +864,6 @@ aaline_bind_fs_state(struct pipe_context *pipe, void *fs)
   struct aaline_stage *aaline = aaline_stage_from_pipe(pipe);
   struct aaline_fragment_shader *aafs = (struct aaline_fragment_shader *) fs;

-   if (aaline == NULL) {
-      return;
-   }
-
   /* save current */
   aaline->fs = aafs;
   /* pass-through */
@@ -891,19 +877,14 @@ aaline_delete_fs_state(struct pipe_context *pipe, void *fs)
   struct aaline_stage *aaline = aaline_stage_from_pipe(pipe);
   struct aaline_fragment_shader *aafs = (struct aaline_fragment_shader *) fs;

-   if (aafs == NULL) {
-      return;
-   }
+   /* pass-through */
+   aaline->driver_delete_fs_state(pipe, aafs->driver_fs);

-   if (aaline != NULL) {
-      /* pass-through */
-      aaline->driver_delete_fs_state(pipe, aafs->driver_fs);
-
-      if (aafs->aaline_fs)
-         aaline->driver_delete_fs_state(pipe, aafs->aaline_fs);
-   }
+   if (aafs->aaline_fs)
+      aaline->driver_delete_fs_state(pipe, aafs->aaline_fs);

   FREE((void*)aafs->state.tokens);
+
   FREE(aafs);
 }

@@ -914,10 +895,6 @@ aaline_bind_sampler_states(struct pipe_context *pipe,
 {
   struct aaline_stage *aaline = aaline_stage_from_pipe(pipe);

-   if (aaline == NULL) {
-      return;
-   }
-
   /* save current */
   memcpy(aaline->state.sampler, sampler, num * sizeof(void *));
   aaline->num_samplers = num;
@@ -935,10 +912,6 @@ aaline_set_sampler_views(struct pipe_context *pipe,
   struct aaline_stage *aaline = aaline_stage_from_pipe(pipe);
   uint i;

-   if (aaline == NULL) {
-      return;
-   }
-
   /* save current */
   for (i = 0; i < num; i++) {
      pipe_sampler_view_reference(&aaline->state.sampler_views[i], views[i]);
--- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
@@ -308,9 +308,9 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
      newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_W;
      ctx->emit_instruction(ctx, &newInst);

-      /* KILL_IF -tmp0.yyyy;   # if -tmp0.y < 0, KILL */
+      /* KIL -tmp0.yyyy;   # if -tmp0.y < 0, KILL */
      newInst = tgsi_default_full_instruction();
-      newInst.Instruction.Opcode = TGSI_OPCODE_KILL_IF;
+      newInst.Instruction.Opcode = TGSI_OPCODE_KIL;
      newInst.Instruction.NumDstRegs = 0;
      newInst.Instruction.NumSrcRegs = 1;
      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
--- a/src/gallium/auxiliary/draw/draw_pipe_cull.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_cull.c
@@ -1,5 +1,5 @@
 /**************************************************************************
- *
+ * 
 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
 * All Rights Reserved.
 *
@@ -10,11 +10,11 @@
 * distribute, sub license, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
- *
+ * 
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
- *
+ * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -22,7 +22,7 @@
 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
+ * 
 **************************************************************************/

 /**
@@ -51,10 +51,10 @@ static INLINE struct cull_stage *cull_stage( struct draw_stage *stage )
   return (struct cull_stage *)stage;
 }

-static INLINE boolean
-cull_distance_is_out(float dist)
+static INLINE
+boolean cull_distance_is_out(float dist)
 {
-   return (dist < 0.0f) || util_is_inf_or_nan(dist);
+   return (dist < 0) || util_is_inf_or_nan(dist);
 }

 /*
@@ -68,21 +68,23 @@ static void cull_point( struct draw_stage *stage,
 {
   const unsigned num_written_culldistances =
      draw_current_shader_num_written_culldistances(stage->draw);
-   unsigned i;

-   debug_assert(num_written_culldistances);
-
-   for (i = 0; i < num_written_culldistances; ++i) {
-      unsigned cull_idx = i / 4;
-      unsigned out_idx =
-         draw_current_shader_culldistance_output(stage->draw, cull_idx);
-      unsigned idx = i % 4;
-      float cull1 = header->v[0]->data[out_idx][idx];
-      boolean vert1_out = cull_distance_is_out(cull1);
-      if (vert1_out)
-         return;
+   if (num_written_culldistances) {
+      unsigned i;
+      boolean culled = FALSE;
+      for (i = 0; i < num_written_culldistances; ++i) {
+         unsigned cull_idx = i / 4;
+         unsigned out_idx =
+            draw_current_shader_culldistance_output(stage->draw, cull_idx);
+         unsigned idx = i % 4;
+         float cull1 = header->v[0]->data[out_idx][idx];
+         boolean vert1_out = cull_distance_is_out(cull1);
+         if (vert1_out)
+            culled = TRUE;
+      }
+      if (!culled)
+         stage->next->point( stage->next, header );
   }
-   stage->next->point( stage->next, header );
 }

 /*
@@ -92,27 +94,29 @@ static void cull_point( struct draw_stage *stage,
 * on primitives without faces (e.g. points and lines)
 */
 static void cull_line( struct draw_stage *stage,
-                       struct prim_header *header )
+		      struct prim_header *header )
 {
   const unsigned num_written_culldistances =
      draw_current_shader_num_written_culldistances(stage->draw);
-   unsigned i;

-   debug_assert(num_written_culldistances);
-
-   for (i = 0; i < num_written_culldistances; ++i) {
-      unsigned cull_idx = i / 4;
-      unsigned out_idx =
-         draw_current_shader_culldistance_output(stage->draw, cull_idx);
-      unsigned idx = i % 4;
-      float cull1 = header->v[0]->data[out_idx][idx];
-      float cull2 = header->v[1]->data[out_idx][idx];
-      boolean vert1_out = cull_distance_is_out(cull1);
-      boolean vert2_out = cull_distance_is_out(cull2);
-      if (vert1_out && vert2_out)
-         return;
+   if (num_written_culldistances) {
+      unsigned i;
+      boolean culled = FALSE;
+      for (i = 0; i < num_written_culldistances; ++i) {
+         unsigned cull_idx = i / 4;
+         unsigned out_idx =
+            draw_current_shader_culldistance_output(stage->draw, cull_idx);
+         unsigned idx = i % 4;
+         float cull1 = header->v[0]->data[out_idx][idx];
+         float cull2 = header->v[1]->data[out_idx][idx];
+         boolean vert1_out = cull_distance_is_out(cull1);
+         boolean vert2_out = cull_distance_is_out(cull2);
+         if (vert1_out && vert2_out)
+            culled = TRUE;
+      }
+      if (!culled)
+         stage->next->line( stage->next, header );
   }
-   stage->next->line( stage->next, header );
 }

 /*
@@ -129,6 +133,7 @@ static void cull_tri( struct draw_stage *stage,
   /* Do the distance culling */
   if (num_written_culldistances) {
      unsigned i;
+      boolean culled = FALSE;
      for (i = 0; i < num_written_culldistances; ++i) {
         unsigned cull_idx = i / 4;
         unsigned out_idx =
@@ -141,8 +146,10 @@ static void cull_tri( struct draw_stage *stage,
         boolean vert2_out = cull_distance_is_out(cull2);
         boolean vert3_out = cull_distance_is_out(cull3);
         if (vert1_out && vert2_out && vert3_out)
-            return;
+            culled = TRUE;
      }
+      if (!culled)
+         stage->next->tri( stage->next, header );
   }

   /* Do the regular face culling */
@@ -159,7 +166,7 @@ static void cull_tri( struct draw_stage *stage,
      const float fx = v1[0] - v2[0];
      const float fy = v1[1] - v2[1];

-
+   
      /* det = cross(e,f).z */
      header->det = ex * fy - ey * fx;

@@ -210,7 +217,7 @@ static void cull_first_line( struct draw_stage *stage,
   }
 }

-static void cull_first_tri( struct draw_stage *stage,
+static void cull_first_tri( struct draw_stage *stage, 
 			    struct prim_header *header )
 {
   struct cull_stage *cull = cull_stage(stage);
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -278,7 +278,7 @@ pstip_transform_inst(struct tgsi_transform_context *ctx,


      /* 
-       * Insert new MUL/TEX/KILL_IF instructions at start of program
+       * Insert new MUL/TEX/KILP instructions at start of program
       * Take gl_FragCoord, divide by 32 (stipple size), sample the
       * texture and kill fragment if needed.
       *
@@ -315,9 +315,9 @@ pstip_transform_inst(struct tgsi_transform_context *ctx,
      newInst.Src[1].Register.Index = pctx->freeSampler;
      ctx->emit_instruction(ctx, &newInst);

-      /* KILL_IF -texTemp;   # if -texTemp < 0, KILL fragment */
+      /* KIL -texTemp;   # if -texTemp < 0, KILL fragment */
      newInst = tgsi_default_full_instruction();
-      newInst.Instruction.Opcode = TGSI_OPCODE_KILL_IF;
+      newInst.Instruction.Opcode = TGSI_OPCODE_KIL;
      newInst.Instruction.NumDstRegs = 0;
      newInst.Instruction.NumSrcRegs = 1;
      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
@@ -402,7 +402,7 @@ pstip_update_texture(struct pstip_stage *pstip)
   /*
    * Load alpha texture.
    * Note: 0 means keep the fragment, 255 means kill it.
-    * We'll negate the texel value and use KILL_IF which kills if value
+    * We'll negate the texel value and use KILP which kills if value
    * is negative.
    */
   for (i = 0; i < 32; i++) {
--- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
@@ -138,7 +138,7 @@ emit_vertex( struct vbuf_stage *vbuf,
      /* Note: we really do want data[0] here, not data[pos]: 
       */
      vbuf->translate->set_buffer(vbuf->translate, 0, vertex->data[0], 0, ~0);
-      vbuf->translate->run(vbuf->translate, 0, 1, 0, 0, vbuf->vertex_ptr);
+      vbuf->translate->run(vbuf->translate, 0, 1, 0, vbuf->vertex_ptr);

      if (0) draw_dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr);
      
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -55,10 +55,6 @@ struct gallivm_state;
 /** Sum of frustum planes and user-defined planes */
 #define DRAW_TOTAL_CLIP_PLANES (6 + PIPE_MAX_CLIP_PLANES)

-/**
- * The largest possible index of a vertex that can be fetched.
- */
-#define DRAW_MAX_FETCH_IDX 0xffffffff

 struct pipe_context;
 struct draw_vertex_shader;
@@ -310,7 +306,6 @@ struct draw_context
   } extra_shader_outputs;

   unsigned instance_id;
-   unsigned start_instance;

 #ifdef HAVE_LLVM
   struct draw_llvm *llvm;
@@ -472,13 +467,14 @@ void
 draw_stats_clipper_primitives(struct draw_context *draw,
                              const struct draw_prim_info *prim_info);

+
 /** 
 * Return index i from the index buffer.
 * If the index buffer would overflow we return the
- * maximum possible index.
+ * index of the first element in the vb.
 */
 #define DRAW_GET_IDX(_elts, _i)                   \
-   (((_i) >= draw->pt.user.eltMax) ? DRAW_MAX_FETCH_IDX : (_elts)[_i])
+   (((_i) >= draw->pt.user.eltMax) ? 0 : (_elts)[_i])

 /**
 * Return index of the given viewport clamping it
@@ -490,20 +486,5 @@ draw_clamp_viewport_idx(int idx)
   return ((PIPE_MAX_VIEWPORTS > idx || idx < 0) ? idx : 0);
 }

-/**
- * Adds two unsigned integers and if the addition
- * overflows then it returns the value from
- * from the overflow_value variable.
- */
-static INLINE unsigned
-draw_overflow_uadd(unsigned a, unsigned b,
-                   unsigned overflow_value)
-{
-   unsigned res = a + b;
-   if (res < a || res < b) {
-      res = overflow_value;
-   }
-   return res;
-}

 #endif /* DRAW_PRIVATE_H */
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -345,8 +345,7 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count)
 /** Helper code for below */
 #define PRIM_RESTART_LOOP(elements) \
   do { \
-      for (j = 0; j < count; j++) {               \
-         i = draw_overflow_uadd(start, j, MAX_LOOP_IDX);  \
+      for (i = start; i < end; i++) { \
         if (i < elt_max && elements[i] == info->restart_index) { \
            if (cur_count > 0) { \
               /* draw elts up to prev pos */ \
@@ -378,11 +377,9 @@ draw_pt_arrays_restart(struct draw_context *draw,
   const unsigned prim = info->mode;
   const unsigned start = info->start;
   const unsigned count = info->count;
+   const unsigned end = start + count;
   const unsigned elt_max = draw->pt.user.eltMax;
-   unsigned i, j, cur_start, cur_count;
-   /* The largest index within a loop using the i variable as the index.
-    * Used for overflow detection */
-   const unsigned MAX_LOOP_IDX = 0xffffffff;
+   unsigned i, cur_start, cur_count;

   assert(info->primitive_restart);

@@ -459,14 +456,8 @@ draw_vbo(struct draw_context *draw,
   unsigned instance;
   unsigned index_limit;
   unsigned count;
-   unsigned fpstate = util_fpstate_get();
   struct pipe_draw_info resolved_info;

-   /* Make sure that denorms are treated like zeros. This is 
-    * the behavior required by D3D10. OpenGL doesn't care.
-    */
-   util_fpstate_set_denorms_to_zero(fpstate);
-
   resolve_draw_info(info, &resolved_info);
   info = &resolved_info;

@@ -517,16 +508,11 @@ draw_vbo(struct draw_context *draw,
                                     draw->pt.vertex_element,
                                     draw->pt.nr_vertex_elements,
                                     info);
-#if HAVE_LLVM
-   if (!draw->llvm)
-#endif
-   {
-      if (index_limit == 0) {
+
+   if (index_limit == 0) {
      /* one of the buffers is too small to do any valid drawing */
-         debug_warning("draw: VBO too small to draw anything\n");
-         util_fpstate_set(fpstate);
-         return;
-      }
+      debug_warning("draw: VBO too small to draw anything\n");
+      return;
   }

   /* If we're collecting stats then make sure we start from scratch */
@@ -543,13 +529,6 @@ draw_vbo(struct draw_context *draw,

   for (instance = 0; instance < info->instance_count; instance++) {
      draw->instance_id = instance + info->start_instance;
-      draw->start_instance = info->start_instance;
-      /* check for overflow */
-      if (draw->instance_id < instance ||
-          draw->instance_id < info->start_instance) {
-         /* if we overflown just set the instance id to the max */
-         draw->instance_id = 0xffffffff;
-      }

      draw_new_instance(draw);

@@ -565,5 +544,4 @@ draw_vbo(struct draw_context *draw,
   if (draw->collect_statistics) {
      draw->render->pipeline_statistics(draw->render, &draw->statistics);
   }
-   util_fpstate_set(fpstate);
 }
--- a/src/gallium/auxiliary/draw/draw_pt_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_emit.c
@@ -171,7 +171,6 @@ draw_pt_emit(struct pt_emit *emit,
   translate->run(translate,
 		  0,
 		  vertex_count,
-                  draw->start_instance,
                  draw->instance_id,
 		  hw_verts );

@@ -235,7 +234,6 @@ draw_pt_emit_linear(struct pt_emit *emit,
   translate->run(translate,
                  0,
                  count,
-                  draw->start_instance,
                  draw->instance_id,
                  hw_verts);

@@ -255,6 +253,12 @@ draw_pt_emit_linear(struct pt_emit *emit,
        i < prim_info->primitive_count;
        start += prim_info->primitive_lengths[i], i++)
   {
+      if (draw->collect_statistics) {
+         draw->statistics.c_invocations +=
+            u_decomposed_prims_for_vertices(prim_info->prim,
+                                            prim_info->primitive_lengths[i]);
+      }
+
      render->draw_arrays(render,
                          start,
                          prim_info->primitive_lengths[i]);
--- a/src/gallium/auxiliary/draw/draw_pt_fetch.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c
@@ -168,7 +168,6 @@ draw_pt_fetch_run(struct pt_fetch *fetch,
   translate->run_elts( translate,
 			elts,
 			count,
-                        draw->start_instance,
                        draw->instance_id,
 			verts );
 }
@@ -196,7 +195,6 @@ draw_pt_fetch_run_linear(struct pt_fetch *fetch,
   translate->run( translate,
                   start,
                   count,
-                   draw->start_instance,
                   draw->instance_id,
                   verts );
 }
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c
@@ -210,7 +210,6 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle,
   feme->translate->run_elts( feme->translate,
 			      fetch_elts,
 			      fetch_count,
-                              draw->start_instance,
                              draw->instance_id,
 			      hw_verts );

@@ -268,7 +267,6 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle,
   feme->translate->run( feme->translate,
                         start,
                         count,
-                         draw->start_instance,
                         draw->instance_id,
                         hw_verts );

@@ -328,7 +326,6 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle,
   feme->translate->run( feme->translate,
                         start,
                         count,
-                         draw->start_instance,
                         draw->instance_id,
                         hw_verts );

--- a/src/gallium/auxiliary/draw/draw_pt_so_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_so_emit.c
@@ -182,29 +182,12 @@ static void so_emit_prim(struct pt_so_emit *so,

         buffer = (float *)((char *)draw->so.targets[ob]->mapping +
                            draw->so.targets[ob]->target.buffer_offset +
-                            draw->so.targets[ob]->internal_offset) +
-            state->output[slot].dst_offset;
+                            draw->so.targets[ob]->internal_offset) + state->output[slot].dst_offset;
         
         if (idx == so->pos_idx && pcp_ptr)
-            memcpy(buffer, &pre_clip_pos[start_comp],
-                   num_comps * sizeof(float));
+            memcpy(buffer, &pre_clip_pos[start_comp], num_comps * sizeof(float));
         else
-            memcpy(buffer, &input[idx][start_comp],
-                   num_comps * sizeof(float));
-#if 0
-         {
-            int j;
-            debug_printf("VERT[%d], offset = %d, slot[%d] sc = %d, num_c = %d, idx = %d = [",
-                         i + draw->so.targets[ob]->emitted_vertices,
-                         draw->so.targets[ob]->internal_offset,
-                         slot, start_comp, num_comps, idx);
-            for (j = 0; j < num_comps; ++j) {
-               unsigned *ubuffer = (unsigned*)buffer;
-               debug_printf("%d (0x%x), ", ubuffer[j], ubuffer[j]);
-            }
-            debug_printf("]\n");
-         }
-#endif
+            memcpy(buffer, &input[idx][start_comp], num_comps * sizeof(float));
      }
      for (ob = 0; ob < draw->so.num_targets; ++ob) {
         struct draw_so_target *target = draw->so.targets[ob];
--- a/src/gallium/auxiliary/draw/draw_pt_vsplit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_vsplit.c
@@ -33,9 +33,6 @@
 #define SEGMENT_SIZE 1024
 #define MAP_SIZE     256

-/* The largest possible index withing an index buffer */
-#define MAX_ELT_IDX 0xffffffff
-
 struct vsplit_frontend {
   struct draw_pt_front_end base;
   struct draw_context *draw;
@@ -85,15 +82,16 @@ vsplit_flush_cache(struct vsplit_frontend *vsplit, unsigned flags)
 * Add a fetch element and add it to the draw elements.
 */
 static INLINE void
-vsplit_add_cache(struct vsplit_frontend *vsplit, unsigned fetch, unsigned ofbias)
+vsplit_add_cache(struct vsplit_frontend *vsplit, unsigned fetch)
 {
+   struct draw_context *draw = vsplit->draw;
   unsigned hash;

+   fetch = MIN2(fetch, draw->pt.max_index);
+
   hash = fetch % MAP_SIZE;

-   /* If the value isn't in the cache of it's an overflow due to the
-    * element bias */
-   if (vsplit->cache.fetches[hash] != fetch || ofbias) {
+   if (vsplit->cache.fetches[hash] != fetch) {
      /* update cache */
      vsplit->cache.fetches[hash] = fetch;
      vsplit->cache.draws[hash] = vsplit->cache.num_fetch_elts;
@@ -106,109 +104,22 @@ vsplit_add_cache(struct vsplit_frontend *vsplit, unsigned fetch, unsigned ofbias
   vsplit->draw_elts[vsplit->cache.num_draw_elts++] = vsplit->cache.draws[hash];
 }

-/**
- * Returns the base index to the elements array.
- * The value is checked for overflows (both integer overflows
- * and the elements array overflow).
- */
-static INLINE unsigned
-vsplit_get_base_idx(struct vsplit_frontend *vsplit,
-                    unsigned start, unsigned fetch, unsigned *ofbit)
-{
-   struct draw_context *draw = vsplit->draw;
-   unsigned elt_idx = draw_overflow_uadd(start, fetch, MAX_ELT_IDX);
-   if (ofbit)
-      *ofbit = 0;
-
-   /* Overflown indices need to wrap to the first element
-    * in the index buffer */
-   if (elt_idx >= draw->pt.user.eltMax) {
-      if (ofbit)
-         *ofbit = 1;
-      elt_idx = 0;
-   }
-
-   return elt_idx;
-}
-
-/**
- * Returns the element index adjust for the element bias.
- * The final element index is created from the actual element
- * index, plus the element bias, clamped to maximum elememt
- * index if that addition overflows.
- */
-static INLINE unsigned
-vsplit_get_bias_idx(struct vsplit_frontend *vsplit,
-                    int idx, int bias, unsigned *ofbias)
-{
-   int res = idx + bias;
-
-   if (ofbias)
-      *ofbias = 0;
-
-   if (idx > 0 && bias > 0) {
-      if (res < idx || res < bias) {
-         res = DRAW_MAX_FETCH_IDX;
-         if (ofbias)
-            *ofbias = 1;
-      }
-   } else if (idx < 0 && bias < 0) {
-      if (res > idx || res > bias) {
-         res = DRAW_MAX_FETCH_IDX;
-         if (ofbias)
-            *ofbias = 1;
-      }
-   }
-
-   return res;
-}
-
-#define VSPLIT_CREATE_IDX(elts, start, fetch, elt_bias)    \
-   unsigned elt_idx;                                       \
-   unsigned ofbit;                                         \
-   unsigned ofbias;                                        \
-   elt_idx = vsplit_get_base_idx(vsplit, start, fetch, &ofbit);          \
-   elt_idx = vsplit_get_bias_idx(vsplit, ofbit ? 0 : DRAW_GET_IDX(elts, elt_idx), elt_bias, &ofbias)
-
-static INLINE void
-vsplit_add_cache_ubyte(struct vsplit_frontend *vsplit, const ubyte *elts,
-                       unsigned start, unsigned fetch, int elt_bias)
-{
-   struct draw_context *draw = vsplit->draw;
-   VSPLIT_CREATE_IDX(elts, start, fetch, elt_bias);
-   vsplit_add_cache(vsplit, elt_idx, ofbias);
-}
-
-static INLINE void
-vsplit_add_cache_ushort(struct vsplit_frontend *vsplit, const ushort *elts,
-                       unsigned start, unsigned fetch, int elt_bias)
-{
-   struct draw_context *draw = vsplit->draw;
-   VSPLIT_CREATE_IDX(elts, start, fetch, elt_bias);
-   vsplit_add_cache(vsplit, elt_idx, ofbias);
-}
-

 /**
 * Add a fetch element and add it to the draw elements.  The fetch element is
 * in full range (uint).
 */
 static INLINE void
-vsplit_add_cache_uint(struct vsplit_frontend *vsplit, const uint *elts,
-                      unsigned start, unsigned fetch, int elt_bias)
+vsplit_add_cache_uint(struct vsplit_frontend *vsplit, unsigned fetch)
 {
-   struct draw_context *draw = vsplit->draw;
-   unsigned raw_elem_idx = start + fetch + elt_bias;
-   VSPLIT_CREATE_IDX(elts, start, fetch, elt_bias);
-
-   /* special care for DRAW_MAX_FETCH_IDX */
-   if (raw_elem_idx == DRAW_MAX_FETCH_IDX && !vsplit->cache.has_max_fetch) {
+   /* special care for 0xffffffff */
+   if (fetch == 0xffffffff && !vsplit->cache.has_max_fetch) {
      unsigned hash = fetch % MAP_SIZE;
-      vsplit->cache.fetches[hash] = raw_elem_idx - 1; /* force update */
+      vsplit->cache.fetches[hash] = fetch - 1; /* force update */
      vsplit->cache.has_max_fetch = TRUE;
   }

-   vsplit_add_cache(vsplit, elt_idx, ofbias);
+   vsplit_add_cache(vsplit, fetch);
 }


@@ -217,17 +128,17 @@ vsplit_add_cache_uint(struct vsplit_frontend *vsplit, const uint *elts,

 #define FUNC vsplit_run_ubyte
 #define ELT_TYPE ubyte
-#define ADD_CACHE(vsplit, ib, start, fetch, bias) vsplit_add_cache_ubyte(vsplit,ib,start,fetch,bias)
+#define ADD_CACHE(vsplit, fetch) vsplit_add_cache(vsplit, fetch)
 #include "draw_pt_vsplit_tmp.h"

 #define FUNC vsplit_run_ushort
 #define ELT_TYPE ushort
-#define ADD_CACHE(vsplit, ib, start, fetch, bias) vsplit_add_cache_ushort(vsplit,ib,start,fetch, bias)
+#define ADD_CACHE(vsplit, fetch) vsplit_add_cache(vsplit, fetch)
 #include "draw_pt_vsplit_tmp.h"

 #define FUNC vsplit_run_uint
 #define ELT_TYPE uint
-#define ADD_CACHE(vsplit, ib, start, fetch, bias) vsplit_add_cache_uint(vsplit, ib, start, fetch, bias)
+#define ADD_CACHE(vsplit, fetch) vsplit_add_cache_uint(vsplit, fetch)
 #include "draw_pt_vsplit_tmp.h"


--- a/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
@@ -47,20 +47,13 @@ CONCAT(vsplit_primitive_, ELT_TYPE)(struct vsplit_frontend *vsplit,
   const unsigned start = istart;
   const unsigned end = istart + icount;

-   /* If the index buffer overflows we'll need to run
-    * through the normal paths */
-   if (start >= draw->pt.user.eltMax ||
-       end > draw->pt.user.eltMax ||
-       end < istart || end < icount)
-      return FALSE;
-
   /* use the ib directly */
   if (min_index == 0 && sizeof(ib[0]) == sizeof(draw_elts[0])) {
      if (icount > vsplit->max_vertices)
         return FALSE;

-      for (i = 0; i < icount; i++) {
-         ELT_TYPE idx = DRAW_GET_IDX(ib, start + i);
+      for (i = start; i < end; i++) {
+         ELT_TYPE idx = DRAW_GET_IDX(ib, i);
         if (idx < min_index || idx > max_index) {
            debug_printf("warning: index out of range\n");
         }
@@ -89,29 +82,25 @@ CONCAT(vsplit_primitive_, ELT_TYPE)(struct vsplit_frontend *vsplit,
   fetch_start = min_index + elt_bias;
   fetch_count = max_index - min_index + 1;

-   /* Check for overflow in the fetch_start */
-   if (fetch_start < min_index || fetch_start < elt_bias)
-      return FALSE;
-
   if (!draw_elts) {
      if (min_index == 0) {
-         for (i = 0; i < icount; i++) {
-            ELT_TYPE idx = DRAW_GET_IDX(ib, i + start);
+         for (i = start; i < end; i++) {
+            ELT_TYPE idx = DRAW_GET_IDX(ib, i);

            if (idx < min_index || idx > max_index) {
               debug_printf("warning: index out of range\n");
            }
-            vsplit->draw_elts[i] = (ushort) idx;
+            vsplit->draw_elts[i - start] = (ushort) idx;
         }
      }
      else {
-         for (i = 0; i < icount; i++) {
-            ELT_TYPE idx = DRAW_GET_IDX(ib, i + start);
+         for (i = start; i < end; i++) {
+            ELT_TYPE idx = DRAW_GET_IDX(ib, i);

            if (idx < min_index || idx > max_index) {
               debug_printf("warning: index out of range\n");
            }
-            vsplit->draw_elts[i] = (ushort) (idx - min_index);
+            vsplit->draw_elts[i - start] = (ushort) (idx - min_index);
         }
      }

@@ -148,36 +137,41 @@ CONCAT(vsplit_segment_cache_, ELT_TYPE)(struct vsplit_frontend *vsplit,
   spoken = !!spoken;
   if (ibias == 0) {
      if (spoken)
-         ADD_CACHE(vsplit, ib, 0, ispoken, 0);
+         ADD_CACHE(vsplit, DRAW_GET_IDX(ib, ispoken));

-      for (i = spoken; i < icount; i++) {
-         ADD_CACHE(vsplit, ib, istart, i, 0);
-      }
+      for (i = spoken; i < icount; i++)
+         ADD_CACHE(vsplit, DRAW_GET_IDX(ib, istart + i));

      if (close)
-         ADD_CACHE(vsplit, ib, 0, iclose, 0);
+         ADD_CACHE(vsplit, DRAW_GET_IDX(ib, iclose));
   }
   else if (ibias > 0) {
      if (spoken)
-         ADD_CACHE(vsplit, ib, 0, ispoken, ibias);
+         ADD_CACHE(vsplit, (uint) DRAW_GET_IDX(ib, ispoken) + ibias);

      for (i = spoken; i < icount; i++)
-         ADD_CACHE(vsplit, ib, istart, i, ibias);
+         ADD_CACHE(vsplit, (uint) DRAW_GET_IDX(ib, istart + i) + ibias);

      if (close)
-         ADD_CACHE(vsplit, ib, 0, iclose, ibias);
+         ADD_CACHE(vsplit, (uint) DRAW_GET_IDX(ib, iclose) + ibias);
   }
   else {
      if (spoken) {
-         ADD_CACHE(vsplit, ib, 0, ispoken, ibias);
+         if ((int) ib[ispoken] < -ibias)
+            return;
+         ADD_CACHE(vsplit, DRAW_GET_IDX(ib, ispoken) + ibias);
      }

      for (i = spoken; i < icount; i++) {
-         ADD_CACHE(vsplit, ib, istart, i, ibias);
+         if ((int) DRAW_GET_IDX(ib, istart + i) < -ibias)
+            return;
+         ADD_CACHE(vsplit, DRAW_GET_IDX(ib, istart + i) + ibias);
      }

      if (close) {
-         ADD_CACHE(vsplit, ib, 0, iclose, ibias);
+         if ((int) DRAW_GET_IDX(ib, iclose) < -ibias)
+            return;
+         ADD_CACHE(vsplit, DRAW_GET_IDX(ib, iclose) + ibias);
      }
   }

--- a/src/gallium/auxiliary/draw/draw_vs.c
+++ b/src/gallium/auxiliary/draw/draw_vs.c
@@ -86,12 +86,12 @@ draw_create_vertex_shader(struct draw_context *draw,
            found_clipvertex = TRUE;
            vs->clipvertex_output = i;
         } else if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_CLIPDIST) {
-            debug_assert(vs->info.output_semantic_index[i] <
-                         PIPE_MAX_CLIP_OR_CULL_DISTANCE_ELEMENT_COUNT);
-            vs->clipdistance_output[vs->info.output_semantic_index[i]] = i;
+            if (vs->info.output_semantic_index[i] == 0)
+               vs->clipdistance_output[0] = i;
+            else
+               vs->clipdistance_output[1] = i;
         } else if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_CULLDIST) {
-            debug_assert(vs->info.output_semantic_index[i] <
-                         PIPE_MAX_CLIP_OR_CULL_DISTANCE_ELEMENT_COUNT);
+            debug_assert(vs->info.output_semantic_index[i] < Elements(vs->culldistance_output));
            vs->culldistance_output[vs->info.output_semantic_index[i]] = i;
         }
      }
--- a/src/gallium/auxiliary/draw/draw_vs.h
+++ b/src/gallium/auxiliary/draw/draw_vs.h
@@ -112,8 +112,8 @@ struct draw_vertex_shader {
   unsigned position_output;
   unsigned edgeflag_output;
   unsigned clipvertex_output;
-   unsigned clipdistance_output[PIPE_MAX_CLIP_OR_CULL_DISTANCE_ELEMENT_COUNT];
-   unsigned culldistance_output[PIPE_MAX_CLIP_OR_CULL_DISTANCE_ELEMENT_COUNT];
+   unsigned clipdistance_output[2];
+   unsigned culldistance_output[2];
   /* Extracted from shader:
    */
   const float (*immediates)[4];
--- a/src/gallium/auxiliary/draw/draw_vs_variant.c
+++ b/src/gallium/auxiliary/draw/draw_vs_variant.c
@@ -168,7 +168,6 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_variant *variant,
   vsvg->fetch->run_elts( vsvg->fetch, 
                          elts,
                          count,
-                          vsvg->draw->start_instance,
                          vsvg->draw->instance_id,
                          temp_buffer );

@@ -212,7 +211,6 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_variant *variant,

   vsvg->emit->run( vsvg->emit,
                    0, count,
-                    vsvg->draw->start_instance,
                    vsvg->draw->instance_id,
                    output_buffer );

@@ -236,7 +234,6 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_variant *variant,
   vsvg->fetch->run( vsvg->fetch, 
                     start,
                     count,
-                     vsvg->draw->start_instance,
                     vsvg->draw->instance_id,
                     temp_buffer );

@@ -277,7 +274,6 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_variant *variant,
   
   vsvg->emit->run( vsvg->emit,
                    0, count,
-                    vsvg->draw->start_instance,
                    vsvg->draw->instance_id,
                    output_buffer );

--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -62,7 +62,6 @@
 #include "lp_bld_debug.h"
 #include "lp_bld_bitarit.h"
 #include "lp_bld_arit.h"
-#include "lp_bld_flow.h"


 #define EXP_POLY_DEGREE 5
@@ -2306,14 +2305,19 @@ lp_build_rsqrt(struct lp_build_context *bld,
   /*
    * This should be faster but all denormals will end up as infinity.
    */
-   if (0 && lp_build_fast_rsqrt_available(type)) {
+   if (0 && ((util_cpu_caps.has_sse && type.width == 32 && type.length == 4) ||
+        (util_cpu_caps.has_avx && type.width == 32 && type.length == 8))) {
      const unsigned num_iterations = 1;
      LLVMValueRef res;
      unsigned i;
+      const char *intrinsic = NULL;

-      /* rsqrt(1.0) != 1.0 here */
-      res = lp_build_fast_rsqrt(bld, a);
-
+      if (type.length == 4) {
+         intrinsic = "llvm.x86.sse.rsqrt.ps";
+      }
+      else {
+         intrinsic = "llvm.x86.avx.rsqrt.ps.256";
+      }
      if (num_iterations) {
         /*
          * Newton-Raphson will result in NaN instead of infinity for zero,
@@ -2333,6 +2337,8 @@ lp_build_rsqrt(struct lp_build_context *bld,

         inf = LLVMBuildBitCast(builder, inf, lp_build_vec_type(bld->gallivm, type), "");

+         res = lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, a);
+
         for (i = 0; i < num_iterations; ++i) {
            res = lp_build_rsqrt_refine(bld, a, res);
         }
@@ -2343,6 +2349,11 @@ lp_build_rsqrt(struct lp_build_context *bld,
         cmp = lp_build_compare(bld->gallivm, type, PIPE_FUNC_EQUAL, a, bld->one);
         res = lp_build_select(bld, cmp, bld->one, res);
      }
+      else {
+         /* rsqrt(1.0) != 1.0 here */
+         res = lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, a);
+
+      }

      return res;
   }
@@ -2350,58 +2361,6 @@ lp_build_rsqrt(struct lp_build_context *bld,
   return lp_build_rcp(bld, lp_build_sqrt(bld, a));
 }

-/**
- * If there's a fast (inaccurate) rsqrt instruction available
- * (caller may want to avoid to call rsqrt_fast if it's not available,
- * i.e. for calculating x^0.5 it may do rsqrt_fast(x) * x but if
- * unavailable it would result in sqrt/div/mul so obviously
- * much better to just call sqrt, skipping both div and mul).
- */
-boolean
-lp_build_fast_rsqrt_available(struct lp_type type)
-{
-   assert(type.floating);
-
-   if ((util_cpu_caps.has_sse && type.width == 32 && type.length == 4) ||
-       (util_cpu_caps.has_avx && type.width == 32 && type.length == 8)) {
-      return true;
-   }
-   return false;
-}
-
-
-/**
- * Generate 1/sqrt(a).
- * Result is undefined for values < 0, infinity for +0.
- * Precision is limited, only ~10 bits guaranteed
- * (rsqrt 1.0 may not be 1.0, denorms may be flushed to 0).
- */
-LLVMValueRef
-lp_build_fast_rsqrt(struct lp_build_context *bld,
-                    LLVMValueRef a)
-{
-   LLVMBuilderRef builder = bld->gallivm->builder;
-   const struct lp_type type = bld->type;
-
-   assert(lp_check_value(type, a));
-
-   if (lp_build_fast_rsqrt_available(type)) {
-      const char *intrinsic = NULL;
-
-      if (type.length == 4) {
-         intrinsic = "llvm.x86.sse.rsqrt.ps";
-      }
-      else {
-         intrinsic = "llvm.x86.avx.rsqrt.ps.256";
-      }
-      return lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, a);
-   }
-   else {
-      debug_printf("%s: emulating fast rsqrt with rcp/sqrt\n", __FUNCTION__);
-   }
-   return lp_build_rcp(bld, lp_build_sqrt(bld, a));
-}
-

 /**
 * Generate sin(a) using SSE2
@@ -2602,14 +2561,15 @@ lp_build_sin(struct lp_build_context *bld,
    * xmm3 = poly_mask;
    * y2 = _mm_and_ps(xmm3, y2); //, xmm3);
    * y = _mm_andnot_ps(xmm3, y);
-    * y = _mm_or_ps(y,y2);
+    * y = _mm_add_ps(y,y2);
    */
   LLVMValueRef y2_i = LLVMBuildBitCast(b, y2_9, bld->int_vec_type, "y2_i");
   LLVMValueRef y_i = LLVMBuildBitCast(b, y_10, bld->int_vec_type, "y_i");
   LLVMValueRef y2_and = LLVMBuildAnd(b, y2_i, poly_mask, "y2_and");
-   LLVMValueRef poly_mask_inv = LLVMBuildNot(b, poly_mask, "poly_mask_inv");
+   LLVMValueRef inv = lp_build_const_int_vec(gallivm, bld->type, ~0);
+   LLVMValueRef poly_mask_inv = LLVMBuildXor(b, poly_mask, inv, "poly_mask_inv");
   LLVMValueRef y_and = LLVMBuildAnd(b, y_i, poly_mask_inv, "y_and");
-   LLVMValueRef y_combine = LLVMBuildOr(b, y_and, y2_and, "y_combine");
+   LLVMValueRef y_combine = LLVMBuildAdd(b, y_and, y2_and, "y_combine");

   /*
    * update the sign
@@ -2819,14 +2779,14 @@ lp_build_cos(struct lp_build_context *bld,
    * xmm3 = poly_mask;
    * y2 = _mm_and_ps(xmm3, y2); //, xmm3);
    * y = _mm_andnot_ps(xmm3, y);
-    * y = _mm_or_ps(y,y2);
+    * y = _mm_add_ps(y,y2);
    */
   LLVMValueRef y2_i = LLVMBuildBitCast(b, y2_9, bld->int_vec_type, "y2_i");
   LLVMValueRef y_i = LLVMBuildBitCast(b, y_10, bld->int_vec_type, "y_i");
   LLVMValueRef y2_and = LLVMBuildAnd(b, y2_i, poly_mask, "y2_and");
-   LLVMValueRef poly_mask_inv = LLVMBuildNot(b, poly_mask, "poly_mask_inv");
+   LLVMValueRef poly_mask_inv = LLVMBuildXor(b, poly_mask, inv, "poly_mask_inv");
   LLVMValueRef y_and = LLVMBuildAnd(b, y_i, poly_mask_inv, "y_and");
-   LLVMValueRef y_combine = LLVMBuildOr(b, y_and, y2_and, "y_combine");
+   LLVMValueRef y_combine = LLVMBuildAdd(b, y_and, y2_and, "y_combine");

   /*
    * update the sign
@@ -2895,7 +2855,7 @@ lp_build_log(struct lp_build_context *bld,
 * Generate polynomial.
 * Ex:  coeffs[0] + x * coeffs[1] + x^2 * coeffs[2].
 */
-LLVMValueRef
+static LLVMValueRef
 lp_build_polynomial(struct lp_build_context *bld,
                    LLVMValueRef x,
                    const double *coeffs,
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
@@ -231,19 +231,6 @@ LLVMValueRef
 lp_build_rsqrt(struct lp_build_context *bld,
               LLVMValueRef a);

-boolean
-lp_build_fast_rsqrt_available(struct lp_type type);
-
-LLVMValueRef
-lp_build_fast_rsqrt(struct lp_build_context *bld,
-                    LLVMValueRef a);
-
-LLVMValueRef
-lp_build_polynomial(struct lp_build_context *bld,
-                    LLVMValueRef x,
-                    const double *coeffs,
-                    unsigned num_coeffs);
-
 LLVMValueRef
 lp_build_cos(struct lp_build_context *bld,
             LLVMValueRef a);
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit_overflow.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit_overflow.c
@@ -1,151 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2013
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-/**
- * @file
- * Helper
- *
- * The functions in this file implement arthmetic operations with support
- * for overflow detection and reporting.
- *
- */
-
-#include "lp_bld_arit_overflow.h"
-
-#include "lp_bld_type.h"
-#include "lp_bld_const.h"
-#include "lp_bld_init.h"
-#include "lp_bld_intr.h"
-#include "lp_bld_logic.h"
-#include "lp_bld_pack.h"
-#include "lp_bld_debug.h"
-#include "lp_bld_bitarit.h"
-
-#include "util/u_memory.h"
-#include "util/u_debug.h"
-#include "util/u_math.h"
-#include "util/u_string.h"
-#include "util/u_cpu_detect.h"
-
-#include <float.h>
-
-
-static LLVMValueRef
-build_binary_int_overflow(struct gallivm_state *gallivm,
-                          const char *intr_prefix,
-                          LLVMValueRef a,
-                          LLVMValueRef b,
-                          LLVMValueRef *ofbit)
-{
-   LLVMBuilderRef builder = gallivm->builder;
-   char intr_str[256];
-   LLVMTypeRef type_ref;
-   LLVMTypeKind type_kind;
-   unsigned type_width;
-   LLVMTypeRef oelems[2];
-   LLVMValueRef oresult;
-   LLVMTypeRef otype;
-
-   debug_assert(LLVMTypeOf(a) == LLVMTypeOf(b));
-   type_ref = LLVMTypeOf(a);
-   type_kind = LLVMGetTypeKind(type_ref);
-
-   debug_assert(type_kind == LLVMIntegerTypeKind);
-   type_width = LLVMGetIntTypeWidth(type_ref);
-
-   debug_assert(type_width == 16 || type_width == 32 || type_width == 64);
-
-   util_snprintf(intr_str, sizeof intr_str, "%s.i%u",
-                 intr_prefix, type_width);
-
-   oelems[0] = type_ref;
-   oelems[1] = LLVMInt1TypeInContext(gallivm->context);
-
-   otype = LLVMStructTypeInContext(gallivm->context, oelems, 2, FALSE);
-   oresult = lp_build_intrinsic_binary(builder, intr_str,
-                                       otype, a, b);
-   if (ofbit) {
-      if (*ofbit) {
-         *ofbit = LLVMBuildOr(
-            builder, *ofbit,
-            LLVMBuildExtractValue(builder, oresult, 1, ""), "");
-      } else {
-         *ofbit = LLVMBuildExtractValue(builder, oresult, 1, "");
-      }
-   }
-
-   return LLVMBuildExtractValue(builder, oresult, 0, "");
-}
-
-/**
- * Performs unsigned addition of two integers and reports 
- * overflow if detected.
- *
- * The values @a and @b must be of the same integer type. If
- * an overflow is detected the IN/OUT @ofbit parameter is used:
- * - if it's pointing to a null value, the overflow bit is simply
- *   stored inside the variable it's pointing to,
- * - if it's pointing to a valid value, then that variable,
- *   which must be of i1 type, is ORed with the newly detected
- *   overflow bit. This is done to allow chaining of a number of
- *   overflow functions together without having to test the 
- *   overflow bit after every single one.
- */
-LLVMValueRef
-lp_build_uadd_overflow(struct gallivm_state *gallivm,
-                       LLVMValueRef a,
-                       LLVMValueRef b,
-                       LLVMValueRef *ofbit)
-{
-   return build_binary_int_overflow(gallivm, "llvm.uadd.with.overflow",
-                                    a, b, ofbit);
-}
-
-/**
- * Performs unsigned multiplication of  two integers and 
- * reports overflow if detected.
- *
- * The values @a and @b must be of the same integer type. If
- * an overflow is detected the IN/OUT @ofbit parameter is used:
- * - if it's pointing to a null value, the overflow bit is simply
- *   stored inside the variable it's pointing to,
- * - if it's pointing to a valid value, then that variable,
- *   which must be of i1 type, is ORed with the newly detected
- *   overflow bit. This is done to allow chaining of a number of
- *   overflow functions together without having to test the 
- *   overflow bit after every single one.
- */
-LLVMValueRef
-lp_build_umul_overflow(struct gallivm_state *gallivm,
-                       LLVMValueRef a,
-                       LLVMValueRef b,
-                       LLVMValueRef *ofbit)
-{
-   return build_binary_int_overflow(gallivm, "llvm.umul.with.overflow",
-                                    a, b, ofbit);
-}
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit_overflow.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit_overflow.h
@@ -1,57 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2013 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * @file
- * Helper arithmetic functions with support for overflow detection
- * and reporting.
- *
- * @author Zack Rusin <zackr@vmware.com>
- */
-
-
-#ifndef LP_BLD_ARIT_OVERFLOW_H
-#define LP_BLD_ARIT_OVERFLOW_H
-
-
-#include "gallivm/lp_bld.h"
-
-struct gallivm_state;
-
-LLVMValueRef
-lp_build_uadd_overflow(struct gallivm_state *gallivm,
-                       LLVMValueRef a,
-                       LLVMValueRef b,
-                       LLVMValueRef *ofbit);
-
-LLVMValueRef
-lp_build_umul_overflow(struct gallivm_state *gallivm,
-                       LLVMValueRef a,
-                       LLVMValueRef b,
-                       LLVMValueRef *ofbit);
-
-#endif /* !LP_BLD_ARIT_OVERFLOW_H */
--- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
@@ -79,6 +79,82 @@



+/**
+ * Byte swap on element. It will construct a call to intrinsic llvm.bswap
+ * based on the type.
+ *
+ * @param res           element to byte swap.
+ * @param type          int16_t, int32_t, int64_t, float or double
+ * @param 
+ */
+LLVMValueRef
+lp_build_bswap(struct gallivm_state *gallivm,
+               LLVMValueRef res,
+               struct lp_type type)
+{
+   LLVMTypeRef int_type = LLVMIntTypeInContext(gallivm->context,
+                                               type.width);
+   const char *intrinsic = NULL;
+   if (type.width == 8)
+      return res;
+   if (type.width == 16)
+      intrinsic = "llvm.bswap.i16";
+   else if (type.width == 32)
+     intrinsic = "llvm.bswap.i32";
+   else if (type.width == 64)
+      intrinsic = "llvm.bswap.i64";
+
+   assert (intrinsic != NULL);
+
+   /* In case of a floating-point type cast to a int of same size and then
+    * cast back to fp type.
+    */
+   if (type.floating)
+      res = LLVMBuildBitCast(gallivm->builder, res, int_type, "");
+   res = lp_build_intrinsic_unary(gallivm->builder, intrinsic, int_type, res);
+   if (type.floating)
+      res = LLVMBuildBitCast(gallivm->builder, res,
+                             lp_build_elem_type(gallivm, type), "");
+   return res;
+}
+
+
+/**
+ * Byte swap every element in the vector.
+ *
+ * @param packed        <vector> to convert
+ * @param src_type      <vector> type of int16_t, int32_t, int64_t, float or
+ *                      double
+ * @param dst_type      <vector> type to return
+ */
+LLVMValueRef
+lp_build_bswap_vec(struct gallivm_state *gallivm,
+                   LLVMValueRef packed,
+                   struct lp_type src_type_vec,
+                   struct lp_type dst_type_vec)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMTypeRef dst_type = lp_build_elem_type(gallivm, dst_type_vec);
+   LLVMValueRef res;
+
+   if (src_type_vec.length == 1) {
+      res = lp_build_bswap(gallivm, packed, src_type_vec);
+      res = LLVMBuildBitCast(gallivm->builder, res, dst_type, "");
+   } else {
+      unsigned i;
+      res = LLVMGetUndef(lp_build_vec_type(gallivm, dst_type_vec));
+      for (i = 0; i < src_type_vec.length; ++i) {
+         LLVMValueRef index = lp_build_const_int32(gallivm, i);
+         LLVMValueRef elem = LLVMBuildExtractElement(builder, packed, index, "");
+         elem = lp_build_bswap(gallivm, elem, src_type_vec);
+         elem = LLVMBuildBitCast(gallivm->builder, elem, dst_type, "");
+         res = LLVMBuildInsertElement(gallivm->builder, res, elem, index, "");
+      }
+   }
+   return res;
+}
+
+
 /**
 * Converts int16 half-float to float32
 * Note this can be performed in 1 instruction if vcvtph2ps exists (f16c/cvt16)
--- a/src/gallium/auxiliary/gallivm/lp_bld_conv.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.h
@@ -42,6 +42,17 @@

 struct lp_type;

+LLVMValueRef
+lp_build_bswap(struct gallivm_state *gallivm,
+               LLVMValueRef res,
+               struct lp_type type);
+
+LLVMValueRef
+lp_build_bswap_vec(struct gallivm_state *gallivm,
+                   LLVMValueRef packed,
+                   struct lp_type src_type,
+                   struct lp_type dst_type);
+
 LLVMValueRef
 lp_build_half_to_float(struct gallivm_state *gallivm,
                       LLVMValueRef src);
--- a/src/gallium/auxiliary/gallivm/lp_bld_flow.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_flow.c
@@ -188,7 +188,7 @@ lp_build_mask_value(struct lp_build_mask_context *mask)
 /**
 * Update boolean mask with given value (bitwise AND).
 * Typically used to update the quad's pixel alive/killed mask
- * after depth testing, alpha testing, TGSI_OPCODE_KILL_IF, etc.
+ * after depth testing, alpha testing, TGSI_OPCODE_KIL, etc.
 */
 void
 lp_build_mask_update(struct lp_build_mask_context *mask,
--- a/src/gallium/auxiliary/gallivm/lp_bld_format.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h
@@ -158,16 +158,4 @@ lp_build_rgb9e5_to_float(struct gallivm_state *gallivm,
                         LLVMValueRef src,
                         LLVMValueRef *dst);

-LLVMValueRef
-lp_build_float_to_srgb_packed(struct gallivm_state *gallivm,
-                              const struct util_format_description *dst_fmt,
-                              struct lp_type src_type,
-                              LLVMValueRef *src);
-
-LLVMValueRef
-lp_build_srgb_to_linear(struct gallivm_state *gallivm,
-                        struct lp_type src_type,
-                        LLVMValueRef src);
-
-
 #endif /* !LP_BLD_FORMAT_H */
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
@@ -139,12 +139,12 @@ format_matches_type(const struct util_format_description *desc,


 /**
- * Unpack a single pixel into its XYZW components.
+ * Unpack a single pixel into its RGBA components.
 *
 * @param desc  the pixel format for the packed pixel value
 * @param packed integer pixel in a format such as PIPE_FORMAT_B8G8R8A8_UNORM
 *
- * @return XYZW in a float[4] or ubyte[4] or ushort[4] vector.
+ * @return RGBA in a float[4] or ubyte[4] or ushort[4] vector.
 */
 static INLINE LLVMValueRef
 lp_build_unpack_arith_rgba_aos(struct gallivm_state *gallivm,
@@ -159,6 +159,7 @@ lp_build_unpack_arith_rgba_aos(struct gallivm_state *gallivm,

   boolean normalized;
   boolean needs_uitofp;
+   unsigned shift;
   unsigned i;

   /* TODO: Support more formats */
@@ -171,6 +172,10 @@ lp_build_unpack_arith_rgba_aos(struct gallivm_state *gallivm,
    * matches floating point size */
   assert (LLVMTypeOf(packed) == LLVMInt32TypeInContext(gallivm->context));

+#ifdef PIPE_ARCH_BIG_ENDIAN
+   packed = lp_build_bswap(gallivm, packed, lp_type_uint(32));
+#endif
+
   /* Broadcast the packed value to all four channels
    * before: packed = BGRA
    * after: packed = {BGRA, BGRA, BGRA, BGRA}
@@ -189,11 +194,11 @@ lp_build_unpack_arith_rgba_aos(struct gallivm_state *gallivm,
   /* Initialize vector constants */
   normalized = FALSE;
   needs_uitofp = FALSE;
+   shift = 0;

   /* Loop over 4 color components */
   for (i = 0; i < 4; ++i) {
      unsigned bits = desc->channel[i].size;
-      unsigned shift = desc->channel[i].shift;

      if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
         shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
@@ -219,10 +224,12 @@ lp_build_unpack_arith_rgba_aos(struct gallivm_state *gallivm,
         else
            scales[i] =  lp_build_const_float(gallivm, 1.0);
      }
+
+      shift += bits;
   }

-   /* Ex: convert packed = {XYZW, XYZW, XYZW, XYZW}
-    * into masked = {X, Y, Z, W}
+   /* Ex: convert packed = {BGRA, BGRA, BGRA, BGRA}
+    * into masked = {B, G, R, A}
    */
   shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), "");
   masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), "");
@@ -269,6 +276,7 @@ lp_build_pack_rgba_aos(struct gallivm_state *gallivm,
   LLVMValueRef shifts[4];
   LLVMValueRef scales[4];
   boolean normalized;
+   unsigned shift;
   unsigned i, j;

   assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
@@ -294,9 +302,9 @@ lp_build_pack_rgba_aos(struct gallivm_state *gallivm,
                                       LLVMConstVector(swizzles, 4), "");

   normalized = FALSE;
+   shift = 0;
   for (i = 0; i < 4; ++i) {
      unsigned bits = desc->channel[i].size;
-      unsigned shift = desc->channel[i].shift;

      if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) {
         shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
@@ -317,6 +325,8 @@ lp_build_pack_rgba_aos(struct gallivm_state *gallivm,
         else
            scales[i] = lp_build_const_float(gallivm, 1.0);
      }
+
+      shift += bits;
   }

   if (normalized)
@@ -400,11 +410,16 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,

      packed = lp_build_gather(gallivm, type.length/4,
                               format_desc->block.bits, type.width*4,
-                               base_ptr, offset, TRUE);
+                               base_ptr, offset);

      assert(format_desc->block.bits <= vec_len);

      packed = LLVMBuildBitCast(gallivm->builder, packed, dst_vec_type, "");
+#ifdef PIPE_ARCH_BIG_ENDIAN
+      if (type.floating)
+         packed = lp_build_bswap_vec(gallivm, packed, type,
+                                    lp_type_float_vec(type.width, vec_len));
+#endif
      return lp_build_format_swizzle_aos(format_desc, &bld, packed);
   }

@@ -438,7 +453,7 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,

         packed = lp_build_gather_elem(gallivm, num_pixels,
                                       format_desc->block.bits, 32,
-                                       base_ptr, offset, k, FALSE);
+                                       base_ptr, offset, k);

         tmps[k] = lp_build_unpack_arith_rgba_aos(gallivm,
                                                  format_desc,
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos_array.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos_array.c
@@ -40,6 +40,58 @@
 #include "pipe/p_state.h"


+#ifdef PIPE_ARCH_BIG_ENDIAN
+static LLVMValueRef
+lp_build_read_int_bswap(struct gallivm_state *gallivm,
+                        LLVMValueRef base_ptr,
+                        unsigned src_width,
+                        LLVMTypeRef src_type,
+                        unsigned i,
+                        LLVMTypeRef dst_type)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef index = lp_build_const_int32(gallivm, i);
+   LLVMValueRef ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "");
+   LLVMValueRef res = LLVMBuildLoad(builder, ptr, "");
+   res = lp_build_bswap(gallivm, res, lp_type_uint(src_width));
+   return LLVMBuildBitCast(builder, res, dst_type, "");
+}
+
+static LLVMValueRef
+lp_build_fetch_read_big_endian(struct gallivm_state *gallivm,
+                               struct lp_type src_type,
+                               LLVMValueRef base_ptr)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   unsigned src_width = src_type.width;
+   unsigned length = src_type.length;
+   LLVMTypeRef src_elem_type = LLVMIntTypeInContext(gallivm->context, src_width);
+   LLVMTypeRef dst_elem_type = lp_build_elem_type (gallivm, src_type);
+   LLVMTypeRef src_ptr_type = LLVMPointerType(src_elem_type, 0);
+   LLVMValueRef res;
+
+   base_ptr = LLVMBuildPointerCast(builder, base_ptr, src_ptr_type, "");
+   if (length == 1) {
+      /* Scalar */
+      res = lp_build_read_int_bswap(gallivm, base_ptr, src_width, src_elem_type,
+                                    0, dst_elem_type);
+   } else {
+      /* Vector */
+      LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length);
+      unsigned i;
+
+      res = LLVMGetUndef(dst_vec_type);
+      for (i = 0; i < length; ++i) {
+         LLVMValueRef index = lp_build_const_int32(gallivm, i);
+         LLVMValueRef elem = lp_build_read_int_bswap(gallivm, base_ptr, src_width,
+                                                     src_elem_type, i, dst_elem_type);
+         res = LLVMBuildInsertElement(builder, res, elem, index, "");
+      }
+   }
+
+   return res;
+}
+#endif

 /**
 * @brief lp_build_fetch_rgba_aos_array
@@ -72,9 +124,13 @@ lp_build_fetch_rgba_aos_array(struct gallivm_state *gallivm,

   /* Read whole vector from memory, unaligned */
   ptr = LLVMBuildGEP(builder, base_ptr, &offset, 1, "");
+#ifdef PIPE_ARCH_BIG_ENDIAN
+   res = lp_build_fetch_read_big_endian(gallivm, src_type, ptr);
+#else
   ptr = LLVMBuildPointerCast(builder, ptr, LLVMPointerType(src_vec_type, 0), "");
   res = LLVMBuildLoad(builder, ptr, "");
   lp_set_load_alignment(res, src_type.width / 8);
+#endif

   /* Truncate doubles to float */
   if (src_type.floating && src_type.width == 64) {
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
@@ -115,6 +115,7 @@ lp_build_unpack_rgba_soa(struct gallivm_state *gallivm,
   LLVMBuilderRef builder = gallivm->builder;
   struct lp_build_context bld;
   LLVMValueRef inputs[4];
+   unsigned start;
   unsigned chan;

   assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
@@ -127,9 +128,9 @@ lp_build_unpack_rgba_soa(struct gallivm_state *gallivm,
   lp_build_context_init(&bld, gallivm, type);

   /* Decode the input vector components */
+   start = 0;
   for (chan = 0; chan < format_desc->nr_channels; ++chan) {
      const unsigned width = format_desc->channel[chan].size;
-      const unsigned start = format_desc->channel[chan].shift;
      const unsigned stop = start + width;
      LLVMValueRef input;

@@ -163,23 +164,11 @@ lp_build_unpack_rgba_soa(struct gallivm_state *gallivm,
          */

         if (type.floating) {
-            if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
-               assert(width == 8);
-               if (format_desc->swizzle[3] == chan) {
-                  input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
-               }
-               else {
-                  struct lp_type conv_type = lp_uint_type(type);
-                  input = lp_build_srgb_to_linear(gallivm, conv_type, input);
-               }
-            }
-            else {
-               if(format_desc->channel[chan].normalized)
-                  input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
-               else
-                  input = LLVMBuildSIToFP(builder, input,
-                                          lp_build_vec_type(gallivm, type), "");
-            }
+            if(format_desc->channel[chan].normalized)
+               input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
+            else
+               input = LLVMBuildSIToFP(builder, input,
+                                       lp_build_vec_type(gallivm, type), "");
         }
         else if (format_desc->channel[chan].pure_integer) {
            /* Nothing to do */
@@ -267,6 +256,8 @@ lp_build_unpack_rgba_soa(struct gallivm_state *gallivm,
      }

      inputs[chan] = input;
+
+      start = stop;
   }

   lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out);
@@ -300,11 +291,7 @@ lp_build_rgba8_to_fi32_soa(struct gallivm_state *gallivm,

   /* Decode the input vector components */
   for (chan = 0; chan < 4; ++chan) {
-#ifdef PIPE_ARCH_LITTLE_ENDIAN
      unsigned start = chan*8;
-#else
-      unsigned start = (3-chan)*8;
-#endif
      unsigned stop = start + 8;
      LLVMValueRef input;

@@ -356,7 +343,6 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,

   if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
       (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
-        format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB ||
        format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
       format_desc->block.width == 1 &&
       format_desc->block.height == 1 &&
@@ -374,14 +360,13 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,

      /*
       * gather the texels from the texture
-       * Ex: packed = {XYZW, XYZW, XYZW, XYZW}
+       * Ex: packed = {BGRA, BGRA, BGRA, BGRA}.
       */
-      assert(format_desc->block.bits <= type.width);
      packed = lp_build_gather(gallivm,
                               type.length,
                               format_desc->block.bits,
                               type.width,
-                               base_ptr, offset, FALSE);
+                               base_ptr, offset);

      /*
       * convert texels to float rgba
@@ -406,8 +391,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,

      packed = lp_build_gather(gallivm, type.length,
                               format_desc->block.bits,
-                               type.width, base_ptr, offset,
-                               FALSE);
+                               type.width, base_ptr, offset);
      if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT) {
         lp_build_r11g11b10_to_float(gallivm, packed, rgba_out);
      }
@@ -434,14 +418,14 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
         LLVMValueRef s_offset = lp_build_const_int_vec(gallivm, type, 4);
         offset = LLVMBuildAdd(builder, offset, s_offset, "");
         packed = lp_build_gather(gallivm, type.length,
-                                  32, type.width, base_ptr, offset, FALSE);
+                                  32, type.width, base_ptr, offset);
         packed = LLVMBuildAnd(builder, packed,
                               lp_build_const_int_vec(gallivm, type, mask), "");
      }
      else {
         assert (format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
         packed = lp_build_gather(gallivm, type.length,
-                                  32, type.width, base_ptr, offset, TRUE);
+                                  32, type.width, base_ptr, offset);
         packed = LLVMBuildBitCast(builder, packed,
                                   lp_build_vec_type(gallivm, type), "");
      }
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_srgb.c
@@ -1,344 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2013 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-/**
- * @file
- * Format conversion code for srgb formats.
- *
- * Functions for converting from srgb to linear and vice versa.
- * From http://www.opengl.org/registry/specs/EXT/texture_sRGB.txt:
- *
- * srgb->linear:
- * cl = cs / 12.92,                 cs <= 0.04045
- * cl = ((cs + 0.055)/1.055)^2.4,   cs >  0.04045
- *
- * linear->srgb:
- * if (isnan(cl)) {
- *    Map IEEE-754 Not-a-number to zero.
- *    cs = 0.0;
- * } else if (cl > 1.0) {
- *    cs = 1.0;
- * } else if (cl < 0.0) {
- *    cs = 0.0;
- * } else if (cl < 0.0031308) {
- *    cs = 12.92 * cl;
- * } else {
- *    cs = 1.055 * pow(cl, 0.41666) - 0.055;
- * }
- *
- * This does not need to be accurate, however at least for d3d10
- * (http://msdn.microsoft.com/en-us/library/windows/desktop/dd607323%28v=vs.85%29.aspx):
- * 1) For srgb->linear, it is required that the error on the srgb side is
- *    not larger than 0.5f, which I interpret that if you map the value back
- *    to srgb from linear using the ideal conversion, it would not be off by
- *    more than 0.5f (that is, it would map to the same 8-bit integer value
- *    as it was before conversion to linear).
- * 2) linear->srgb is permitted 0.6f which luckily looks like quite a large
- *    error is allowed.
- * 3) Additionally, all srgb values converted to linear and back must result
- *    in the same value as they were originally.
- *
- * @author Roland Scheidegger <sroland@vmware.com>
- */
-
-
-#include "util/u_debug.h"
-
-#include "lp_bld_type.h"
-#include "lp_bld_const.h"
-#include "lp_bld_arit.h"
-#include "lp_bld_bitarit.h"
-#include "lp_bld_logic.h"
-#include "lp_bld_format.h"
-
-
-
-/**
- * Convert srgb int values to linear float values.
- * Several possibilities how to do this, e.g.
- * - table
- * - doing the pow() with int-to-float and float-to-int tricks
- *   (http://stackoverflow.com/questions/6475373/optimizations-for-pow-with-const-non-integer-exponent)
- * - just using standard polynomial approximation
- *   (3rd order polynomial is required for crappy but just sufficient accuracy)
- *
- * @param src   integer (vector) value(s) to convert
- *              (8 bit values unpacked to 32 bit already).
- */
-LLVMValueRef
-lp_build_srgb_to_linear(struct gallivm_state *gallivm,
-                        struct lp_type src_type,
-                        LLVMValueRef src)
-{
-   struct lp_type f32_type = lp_type_float_vec(32, src_type.length * 32);
-   struct lp_build_context f32_bld;
-   LLVMValueRef srcf, part_lin, part_pow, is_linear, lin_const, lin_thresh;
-   double coeffs[4] = {0.0023f,
-                       0.0030f / 255.0f,
-                       0.6935f / (255.0f * 255.0f),
-                       0.3012f / (255.0f * 255.0f * 255.0f)
-   };
-
-   assert(src_type.width == 32);
-
-   lp_build_context_init(&f32_bld, gallivm, f32_type);
-
-   /*
-    * using polynomial: (src * (src * (src * 0.3012 + 0.6935) + 0.0030) + 0.0023)
-    * ( poly =  0.3012*x^3 + 0.6935*x^2 + 0.0030*x + 0.0023)
-    * (found with octave polyfit and some magic as I couldn't get the error
-    * function right). Using the above mentioned error function, the values stay
-    * within +-0.35, except for the lowest values - hence tweaking linear segment
-    * to cover the first 16 instead of the first 11 values (the error stays
-    * just about acceptable there too).
-    * Hence: lin = src > 15 ? poly : src / 12.6
-    * This function really only makes sense for vectors, should use LUT otherwise.
-    * All in all (including float conversion) 11 instructions (with sse4.1),
-    * 6 constants (polynomial could be done with 1 instruction less at the cost
-    * of slightly worse dependency chain, fma should also help).
-    */
-   /* doing the 1/255 mul as part of the approximation */
-   srcf = lp_build_int_to_float(&f32_bld, src);
-   lin_const = lp_build_const_vec(gallivm, f32_type, 1.0f / (12.6f * 255.0f));
-   part_lin = lp_build_mul(&f32_bld, srcf, lin_const);
-
-   part_pow = lp_build_polynomial(&f32_bld, srcf, coeffs, 4);
-
-   lin_thresh = lp_build_const_vec(gallivm, f32_type, 15.0f);
-   is_linear = lp_build_compare(gallivm, f32_type, PIPE_FUNC_LEQUAL, srcf, lin_thresh);
-   return lp_build_select(&f32_bld, is_linear, part_lin, part_pow);
-}
-
-
-/**
- * Convert linear float values to srgb int values.
- * Several possibilities how to do this, e.g.
- * - use table (based on exponent/highest order mantissa bits) and do
- *   linear interpolation (https://gist.github.com/rygorous/2203834)
- * - Chebyshev polynomial
- * - Approximation using reciprocals
- * - using int-to-float and float-to-int tricks for pow()
- *   (http://stackoverflow.com/questions/6475373/optimizations-for-pow-with-const-non-integer-exponent)
- *
- * @param src   float (vector) value(s) to convert.
- */
-static LLVMValueRef
-lp_build_linear_to_srgb(struct gallivm_state *gallivm,
-                        struct lp_type src_type,
-                        LLVMValueRef src)
-{
-   LLVMBuilderRef builder = gallivm->builder;
-   struct lp_build_context f32_bld;
-   LLVMValueRef lin_thresh, lin, lin_const, is_linear, tmp, pow_final;
-
-   lp_build_context_init(&f32_bld, gallivm, src_type);
-
-   src = lp_build_clamp(&f32_bld, src, f32_bld.zero, f32_bld.one);
-
-   if (0) {
-      /*
-       * using int-to-float and float-to-int trick for pow().
-       * This is much more accurate than necessary thanks to the correction,
-       * but it most certainly makes no sense without rsqrt available.
-       * Bonus points if you understand how this works...
-       * All in all (including min/max clamp, conversion) 19 instructions.
-       */
-
-      float exp_f = 2.0f / 3.0f;
-      /* some compilers can't do exp2f, so this is exp2f(127.0f/exp_f - 127.0f) */
-      float exp2f_c = 1.30438178253e+19f;
-      float coeff_f = 0.62996f;
-      LLVMValueRef pow_approx, coeff, x2, exponent, pow_1, pow_2;
-      struct lp_type int_type = lp_int_type(src_type);
-
-      /*
-       * First calculate approx x^8/12
-       */
-      exponent = lp_build_const_vec(gallivm, src_type, exp_f);
-      coeff = lp_build_const_vec(gallivm, src_type,
-                                 exp2f_c * powf(coeff_f, 1.0f / exp_f));
-
-      /* premultiply src */
-      tmp = lp_build_mul(&f32_bld, coeff, src);
-      /* "log2" */
-      tmp = LLVMBuildBitCast(builder, tmp, lp_build_vec_type(gallivm, int_type), "");
-      tmp = lp_build_int_to_float(&f32_bld, tmp);
-      /* multiply for pow */
-      tmp = lp_build_mul(&f32_bld, tmp, exponent);
-      /* "exp2" */
-      pow_approx = lp_build_itrunc(&f32_bld, tmp);
-      pow_approx = LLVMBuildBitCast(builder, pow_approx,
-                                    lp_build_vec_type(gallivm, src_type), "");
-
-      /*
-       * Since that pow was inaccurate (like 3 bits, though each sqrt step would
-       * give another bit), compensate the error (which is why we chose another
-       * exponent in the first place).
-       */
-      /* x * x^(8/12) = x^(20/12) */
-      pow_1 = lp_build_mul(&f32_bld, pow_approx, src);
-
-      /* x * x * x^(-4/12) = x^(20/12) */
-      /* Should avoid using rsqrt if it's not available, but
-       * using x * x^(4/12) * x^(4/12) instead will change error weight */
-      tmp = lp_build_fast_rsqrt(&f32_bld, pow_approx);
-      x2 = lp_build_mul(&f32_bld, src, src);
-      pow_2 = lp_build_mul(&f32_bld, x2, tmp);
-
-      /* average the values so the errors cancel out, compensate bias,
-       * we also squeeze the 1.055 mul of the srgb conversion plus the 255.0 mul
-       * for conversion to int in here */
-      tmp = lp_build_add(&f32_bld, pow_1, pow_2);
-      coeff = lp_build_const_vec(gallivm, src_type,
-                                 1.0f / (3.0f * coeff_f) * 0.999852f *
-                                 powf(1.055f * 255.0f, 4.0f));
-      pow_final = lp_build_mul(&f32_bld, tmp, coeff);
-
-      /* x^(5/12) = rsqrt(rsqrt(x^20/12)) */
-      if (lp_build_fast_rsqrt_available(src_type)) {
-         pow_final = lp_build_fast_rsqrt(&f32_bld,
-                        lp_build_fast_rsqrt(&f32_bld, pow_final));
-      }
-      else {
-         pow_final = lp_build_sqrt(&f32_bld, lp_build_sqrt(&f32_bld, pow_final));
-      }
-      pow_final = lp_build_add(&f32_bld, pow_final,
-                               lp_build_const_vec(gallivm, src_type, -0.055f * 255.0f));
-   }
-
-   else {
-      /*
-       * using "rational polynomial" approximation here.
-       * Essentially y = a*x^0.375 + b*x^0.5 + c, with also
-       * factoring in the 255.0 mul and the scaling mul.
-       * (a is closer to actual value so has higher weight than b.)
-       * Note: the constants are magic values. They were found empirically,
-       * possibly could be improved but good enough (be VERY careful with
-       * error metric if you'd want to tweak them, they also MUST fit with
-       * the crappy polynomial above for srgb->linear since it is required
-       * that each srgb value maps back to the same value).
-       * This function has an error of max +-0.17 (and we'd only require +-0.6),
-       * for the approximated srgb->linear values the error is naturally larger
-       * (+-0.42) but still accurate enough (required +-0.5 essentially).
-       * All in all (including min/max clamp, conversion) 15 instructions.
-       * FMA would help (minus 2 instructions).
-       */
-
-      LLVMValueRef x05, x0375, a_const, b_const, c_const, tmp2;
-
-      if (lp_build_fast_rsqrt_available(src_type)) {
-         tmp = lp_build_fast_rsqrt(&f32_bld, src);
-         x05 = lp_build_mul(&f32_bld, src, tmp);
-      }
-      else {
-         /*
-          * I don't really expect this to be practical without rsqrt
-          * but there's no reason for triple punishment so at least
-          * save the otherwise resulting division and unnecessary mul...
-          */
-         x05 = lp_build_sqrt(&f32_bld, src);
-      }
-
-      tmp = lp_build_mul(&f32_bld, x05, src);
-      if (lp_build_fast_rsqrt_available(src_type)) {
-         x0375 = lp_build_fast_rsqrt(&f32_bld, lp_build_fast_rsqrt(&f32_bld, tmp));
-      }
-      else {
-         x0375 = lp_build_sqrt(&f32_bld, lp_build_sqrt(&f32_bld, tmp));
-      }
-
-      a_const = lp_build_const_vec(gallivm, src_type, 0.675f * 1.0622 * 255.0f);
-      b_const = lp_build_const_vec(gallivm, src_type, 0.325f * 1.0622 * 255.0f);
-      c_const = lp_build_const_vec(gallivm, src_type, -0.0620f * 255.0f);
-
-      tmp = lp_build_mul(&f32_bld, a_const, x0375);
-      tmp2 = lp_build_mul(&f32_bld, b_const, x05);
-      tmp2 = lp_build_add(&f32_bld, tmp2, c_const);
-      pow_final = lp_build_add(&f32_bld, tmp, tmp2);
-   }
-
-   /* linear part is easy */
-   lin_const = lp_build_const_vec(gallivm, src_type, 12.92f * 255.0f);
-   lin = lp_build_mul(&f32_bld, src, lin_const);
-
-   lin_thresh = lp_build_const_vec(gallivm, src_type, 0.0031308f);
-   is_linear = lp_build_compare(gallivm, src_type, PIPE_FUNC_LEQUAL, src, lin_thresh);
-   tmp = lp_build_select(&f32_bld, is_linear, lin, pow_final);
-
-   f32_bld.type.sign = 0;
-   return lp_build_iround(&f32_bld, tmp);
-}
-
-
-/**
- * Convert linear float soa values to packed srgb AoS values.
- * This only handles packed formats which are 4x8bit in size
- * (rgba and rgbx plus swizzles).
- *
- * @param src   float SoA (vector) values to convert.
- */
-LLVMValueRef
-lp_build_float_to_srgb_packed(struct gallivm_state *gallivm,
-                              const struct util_format_description *dst_fmt,
-                              struct lp_type src_type,
-                              LLVMValueRef *src)
-{
-   LLVMBuilderRef builder = gallivm->builder;
-   unsigned chan;
-   struct lp_build_context f32_bld;
-   struct lp_type int32_type = lp_int_type(src_type);
-   LLVMValueRef tmpsrgb[4], alpha, dst;
-
-   lp_build_context_init(&f32_bld, gallivm, src_type);
-
-   /* rgb is subject to linear->srgb conversion, alpha is not */
-   for (chan = 0; chan < 3; chan++) {
-      tmpsrgb[chan] = lp_build_linear_to_srgb(gallivm, src_type, src[chan]);
-   }
-   /*
-    * can't use lp_build_conv since we want to keep values as 32bit
-    * here so we can interleave with rgb to go from SoA->AoS.
-    */
-   alpha = lp_build_clamp(&f32_bld, src[3], f32_bld.zero, f32_bld.one);
-   alpha = lp_build_mul(&f32_bld, alpha,
-                        lp_build_const_vec(gallivm, src_type, 255.0f));
-   tmpsrgb[3] = lp_build_iround(&f32_bld, alpha);
-
-   dst = lp_build_zero(gallivm, int32_type);
-   for (chan = 0; chan < dst_fmt->nr_channels; chan++) {
-      if (dst_fmt->swizzle[chan] <= UTIL_FORMAT_SWIZZLE_W) {
-         unsigned ls;
-         LLVMValueRef shifted, shift_val;
-         ls = dst_fmt->channel[dst_fmt->swizzle[chan]].shift;
-         shift_val = lp_build_const_int_vec(gallivm, int32_type, ls);
-         shifted = LLVMBuildShl(builder, tmpsrgb[chan], shift_val, "");
-         dst = LLVMBuildOr(builder, dst, shifted, "");
-      }
-   }
-   return dst;
-}
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
@@ -497,7 +497,7 @@ lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm,
   assert(format_desc->block.width == 2);
   assert(format_desc->block.height == 1);

-   packed = lp_build_gather(gallivm, n, 32, 32, base_ptr, offset, FALSE);
+   packed = lp_build_gather(gallivm, n, 32, 32, base_ptr, offset);

   (void)j;

--- a/src/gallium/auxiliary/gallivm/lp_bld_gather.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_gather.c
@@ -78,8 +78,7 @@ lp_build_gather_elem(struct gallivm_state *gallivm,
                     unsigned dst_width,
                     LLVMValueRef base_ptr,
                     LLVMValueRef offsets,
-                     unsigned i,
-                     boolean vector_justify)
+                     unsigned i)
 {
   LLVMTypeRef src_type = LLVMIntTypeInContext(gallivm->context, src_width);
   LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
@@ -98,12 +97,10 @@ lp_build_gather_elem(struct gallivm_state *gallivm,
      res = LLVMBuildTrunc(gallivm->builder, res, dst_elem_type, "");
   } else if (src_width < dst_width) {
      res = LLVMBuildZExt(gallivm->builder, res, dst_elem_type, "");
-      if (vector_justify) {
 #ifdef PIPE_ARCH_BIG_ENDIAN
-         res = LLVMBuildShl(gallivm->builder, res,
-                            LLVMConstInt(dst_elem_type, dst_width - src_width, 0), "");
+      res = LLVMBuildShl(gallivm->builder, res,
+                         LLVMConstInt(dst_elem_type, dst_width - src_width, 0), "");
 #endif
-      }
   }

   return res;
@@ -115,20 +112,11 @@ lp_build_gather_elem(struct gallivm_state *gallivm,
 * Use for fetching texels from a texture.
 * For SSE, typical values are length=4, src_width=32, dst_width=32.
 *
- * When src_width < dst_width, the return value can be justified in
- * one of two ways:
- * "integer justification" is used when the caller treats the destination
- * as a packed integer bitmask, as described by the channels' "shift" and
- * "width" fields;
- * "vector justification" is used when the caller casts the destination
- * to a vector and needs channel X to be in vector element 0.
- *
 * @param length length of the offsets
 * @param src_width src element width in bits
 * @param dst_width result element width in bits (src will be expanded to fit)
 * @param base_ptr base pointer, should be a i8 pointer type.
 * @param offsets vector with offsets
- * @param vector_justify select vector rather than integer justification
 */
 LLVMValueRef
 lp_build_gather(struct gallivm_state *gallivm,
@@ -136,8 +124,7 @@ lp_build_gather(struct gallivm_state *gallivm,
                unsigned src_width,
                unsigned dst_width,
                LLVMValueRef base_ptr,
-                LLVMValueRef offsets,
-                boolean vector_justify)
+                LLVMValueRef offsets)
 {
   LLVMValueRef res;

@@ -145,7 +132,7 @@ lp_build_gather(struct gallivm_state *gallivm,
      /* Scalar */
      return lp_build_gather_elem(gallivm, length,
                                  src_width, dst_width,
-                                  base_ptr, offsets, 0, vector_justify);
+                                  base_ptr, offsets, 0);
   } else {
      /* Vector */

@@ -159,7 +146,7 @@ lp_build_gather(struct gallivm_state *gallivm,
         LLVMValueRef elem;
         elem = lp_build_gather_elem(gallivm, length,
                                     src_width, dst_width,
-                                     base_ptr, offsets, i, vector_justify);
+                                     base_ptr, offsets, i);
         res = LLVMBuildInsertElement(gallivm->builder, res, elem, index, "");
      }
   }
--- a/src/gallium/auxiliary/gallivm/lp_bld_gather.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_gather.h
@@ -47,8 +47,7 @@ lp_build_gather_elem(struct gallivm_state *gallivm,
                     unsigned dst_width,
                     LLVMValueRef base_ptr,
                     LLVMValueRef offsets,
-                     unsigned i,
-                     boolean vector_justify);
+                     unsigned i);

 LLVMValueRef
 lp_build_gather(struct gallivm_state *gallivm,
@@ -56,8 +55,7 @@ lp_build_gather(struct gallivm_state *gallivm,
                unsigned src_width,
                unsigned dst_width,
                LLVMValueRef base_ptr,
-                LLVMValueRef offsets,
-                boolean vector_justify);
+                LLVMValueRef offsets);

 LLVMValueRef
 lp_build_gather_values(struct gallivm_state * gallivm,
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -49,7 +49,7 @@
 *   - MC-JIT supports limited OSes (MacOSX and Linux)
 * - standard JIT in LLVM 3.1, with backports
 */
-#if defined(PIPE_ARCH_PPC_64) || defined(PIPE_ARCH_S390) || defined(PIPE_ARCH_ARM) || defined(PIPE_ARCH_AARCH64)
+#if defined(PIPE_ARCH_PPC_64) || defined(PIPE_ARCH_S390)
 #  define USE_MCJIT 1
 #  define HAVE_AVX 0
 #elif HAVE_LLVM >= 0x0302 || (HAVE_LLVM == 0x0301 && defined(HAVE_JIT_AVX_SUPPORT))
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -215,7 +215,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
   struct lp_build_context *float_size_bld = &bld->float_size_in_bld;
   struct lp_build_context *float_bld = &bld->float_bld;
   struct lp_build_context *coord_bld = &bld->coord_bld;
-   struct lp_build_context *levelf_bld = &bld->levelf_bld;
+   struct lp_build_context *perquadf_bld = &bld->perquadf_bld;
   const unsigned dims = bld->dims;
   LLVMValueRef ddx_ddy[2];
   LLVMBuilderRef builder = bld->gallivm->builder;
@@ -235,8 +235,6 @@ lp_build_rho(struct lp_build_sample_context *bld,

   /* Note that all simplified calculations will only work for isotropic filtering */

-   assert(bld->num_lods != length);
-
   first_level = bld->dynamic_state->first_level(bld->dynamic_state,
                                                 bld->gallivm, texture_unit);
   first_level_vec = lp_build_broadcast_scalar(int_size_bld, first_level);
@@ -250,14 +248,14 @@ lp_build_rho(struct lp_build_sample_context *bld,
       * Cube map code did already everything except size mul and per-quad extraction.
       */
      rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
-                                      levelf_bld->type, cube_rho, 0);
+                                      perquadf_bld->type, cube_rho, 0);
      if (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) {
-         rho = lp_build_sqrt(levelf_bld, rho);
+         rho = lp_build_sqrt(perquadf_bld, rho);
      }
      /* Could optimize this for single quad just skip the broadcast */
      cubesize = lp_build_extract_broadcast(gallivm, bld->float_size_in_type,
-                                            levelf_bld->type, float_size, index0);
-      rho = lp_build_mul(levelf_bld, cubesize, rho);
+                                            perquadf_bld->type, float_size, index0);
+      rho = lp_build_mul(perquadf_bld, cubesize, rho);
   }
   else if (derivs && !(bld->static_texture_state->target == PIPE_TEXTURE_CUBE)) {
      LLVMValueRef ddmax[3], ddx[3], ddy[3];
@@ -291,12 +289,12 @@ lp_build_rho(struct lp_build_sample_context *bld,
         }
         rho_vec = lp_build_max(coord_bld, rho_xvec, rho_yvec);
         rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
-                                         levelf_bld->type, rho_vec, 0);
+                                         perquadf_bld->type, rho_vec, 0);
         /*
          * note that as long as we don't care about per-pixel lod could reduce math
          * more (at some shuffle cost), but for now only do sqrt after packing.
          */
-         rho = lp_build_sqrt(levelf_bld, rho);
+         rho = lp_build_sqrt(perquadf_bld, rho);
      }
      else {
         rho_vec = ddmax[0];
@@ -311,7 +309,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
          * since we can't handle per-pixel rho/lod from now on (TODO).
          */
         rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
-                                         levelf_bld->type, rho_vec, 0);
+                                         perquadf_bld->type, rho_vec, 0);
      }
   }
   else {
@@ -383,8 +381,8 @@ lp_build_rho(struct lp_build_sample_context *bld,
         rho_vec = lp_build_max(coord_bld, rho_xvec, rho_yvec);

         rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
-                                         levelf_bld->type, rho_vec, 0);
-         rho = lp_build_sqrt(levelf_bld, rho);
+                                         perquadf_bld->type, rho_vec, 0);
+         rho = lp_build_sqrt(perquadf_bld, rho);
      }
      else {
         ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]);
@@ -464,7 +462,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
               }
            }
            rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
-                                            levelf_bld->type, rho, 0);
+                                            perquadf_bld->type, rho, 0);
         }
         else {
            if (dims <= 1) {
@@ -654,11 +652,11 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,

 {
   LLVMBuilderRef builder = bld->gallivm->builder;
-   struct lp_build_context *levelf_bld = &bld->levelf_bld;
+   struct lp_build_context *perquadf_bld = &bld->perquadf_bld;
   LLVMValueRef lod;

-   *out_lod_ipart = bld->leveli_bld.zero;
-   *out_lod_fpart = levelf_bld->zero;
+   *out_lod_ipart = bld->perquadi_bld.zero;
+   *out_lod_fpart = perquadf_bld->zero;

   if (bld->static_sampler_state->min_max_lod_equal) {
      /* User is forcing sampling from a particular mipmap level.
@@ -668,15 +666,12 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
         bld->dynamic_state->min_lod(bld->dynamic_state,
                                     bld->gallivm, sampler_unit);

-      lod = lp_build_broadcast_scalar(levelf_bld, min_lod);
+      lod = lp_build_broadcast_scalar(perquadf_bld, min_lod);
   }
   else {
      if (explicit_lod) {
-         if (bld->num_lods != bld->coord_type.length)
-            lod = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
-                                            levelf_bld->type, explicit_lod, 0);
-         else
-            lod = explicit_lod;
+         lod = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
+                                         perquadf_bld->type, explicit_lod, 0);
      }
      else {
         LLVMValueRef rho;
@@ -699,29 +694,29 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,

            if (mip_filter == PIPE_TEX_MIPFILTER_NONE ||
                mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
-               *out_lod_ipart = lp_build_ilog2(levelf_bld, rho);
-               *out_lod_fpart = levelf_bld->zero;
+               *out_lod_ipart = lp_build_ilog2(perquadf_bld, rho);
+               *out_lod_fpart = perquadf_bld->zero;
               return;
            }
            if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR &&
                !(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
-               lp_build_brilinear_rho(levelf_bld, rho, BRILINEAR_FACTOR,
+               lp_build_brilinear_rho(perquadf_bld, rho, BRILINEAR_FACTOR,
                                      out_lod_ipart, out_lod_fpart);
               return;
            }
         }

         if (0) {
-            lod = lp_build_log2(levelf_bld, rho);
+            lod = lp_build_log2(perquadf_bld, rho);
         }
         else {
-            lod = lp_build_fast_log2(levelf_bld, rho);
+            lod = lp_build_fast_log2(perquadf_bld, rho);
         }

         /* add shader lod bias */
         if (lod_bias) {
            lod_bias = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
-                  levelf_bld->type, lod_bias, 0);
+                  perquadf_bld->type, lod_bias, 0);
            lod = LLVMBuildFAdd(builder, lod, lod_bias, "shader_lod_bias");
         }
      }
@@ -731,7 +726,7 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
         LLVMValueRef sampler_lod_bias =
            bld->dynamic_state->lod_bias(bld->dynamic_state,
                                         bld->gallivm, sampler_unit);
-         sampler_lod_bias = lp_build_broadcast_scalar(levelf_bld,
+         sampler_lod_bias = lp_build_broadcast_scalar(perquadf_bld,
                                                      sampler_lod_bias);
         lod = LLVMBuildFAdd(builder, lod, sampler_lod_bias, "sampler_lod_bias");
      }
@@ -741,33 +736,33 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
         LLVMValueRef max_lod =
            bld->dynamic_state->max_lod(bld->dynamic_state,
                                        bld->gallivm, sampler_unit);
-         max_lod = lp_build_broadcast_scalar(levelf_bld, max_lod);
+         max_lod = lp_build_broadcast_scalar(perquadf_bld, max_lod);

-         lod = lp_build_min(levelf_bld, lod, max_lod);
+         lod = lp_build_min(perquadf_bld, lod, max_lod);
      }
      if (bld->static_sampler_state->apply_min_lod) {
         LLVMValueRef min_lod =
            bld->dynamic_state->min_lod(bld->dynamic_state,
                                        bld->gallivm, sampler_unit);
-         min_lod = lp_build_broadcast_scalar(levelf_bld, min_lod);
+         min_lod = lp_build_broadcast_scalar(perquadf_bld, min_lod);

-         lod = lp_build_max(levelf_bld, lod, min_lod);
+         lod = lp_build_max(perquadf_bld, lod, min_lod);
      }
   }

   if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
      if (!(gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR)) {
-         lp_build_brilinear_lod(levelf_bld, lod, BRILINEAR_FACTOR,
+         lp_build_brilinear_lod(perquadf_bld, lod, BRILINEAR_FACTOR,
                                out_lod_ipart, out_lod_fpart);
      }
      else {
-         lp_build_ifloor_fract(levelf_bld, lod, out_lod_ipart, out_lod_fpart);
+         lp_build_ifloor_fract(perquadf_bld, lod, out_lod_ipart, out_lod_fpart);
      }

      lp_build_name(*out_lod_fpart, "lod_fpart");
   }
   else {
-      *out_lod_ipart = lp_build_iround(levelf_bld, lod);
+      *out_lod_ipart = lp_build_iround(perquadf_bld, lod);
   }

   lp_build_name(*out_lod_ipart, "lod_ipart");
@@ -789,20 +784,20 @@ lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
                           LLVMValueRef lod_ipart,
                           LLVMValueRef *level_out)
 {
-   struct lp_build_context *leveli_bld = &bld->leveli_bld;
+   struct lp_build_context *perquadi_bld = &bld->perquadi_bld;
   LLVMValueRef first_level, last_level, level;

   first_level = bld->dynamic_state->first_level(bld->dynamic_state,
                                                 bld->gallivm, texture_unit);
   last_level = bld->dynamic_state->last_level(bld->dynamic_state,
                                               bld->gallivm, texture_unit);
-   first_level = lp_build_broadcast_scalar(leveli_bld, first_level);
-   last_level = lp_build_broadcast_scalar(leveli_bld, last_level);
+   first_level = lp_build_broadcast_scalar(perquadi_bld, first_level);
+   last_level = lp_build_broadcast_scalar(perquadi_bld, last_level);

-   level = lp_build_add(leveli_bld, lod_ipart, first_level);
+   level = lp_build_add(perquadi_bld, lod_ipart, first_level);

   /* clamp level to legal range of levels */
-   *level_out = lp_build_clamp(leveli_bld, level, first_level, last_level);
+   *level_out = lp_build_clamp(perquadi_bld, level, first_level, last_level);
 }


@@ -820,8 +815,8 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
                           LLVMValueRef *level1_out)
 {
   LLVMBuilderRef builder = bld->gallivm->builder;
-   struct lp_build_context *leveli_bld = &bld->leveli_bld;
-   struct lp_build_context *levelf_bld = &bld->levelf_bld;
+   struct lp_build_context *perquadi_bld = &bld->perquadi_bld;
+   struct lp_build_context *perquadf_bld = &bld->perquadf_bld;
   LLVMValueRef first_level, last_level;
   LLVMValueRef clamp_min;
   LLVMValueRef clamp_max;
@@ -830,11 +825,11 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
                                                 bld->gallivm, texture_unit);
   last_level = bld->dynamic_state->last_level(bld->dynamic_state,
                                               bld->gallivm, texture_unit);
-   first_level = lp_build_broadcast_scalar(leveli_bld, first_level);
-   last_level = lp_build_broadcast_scalar(leveli_bld, last_level);
+   first_level = lp_build_broadcast_scalar(perquadi_bld, first_level);
+   last_level = lp_build_broadcast_scalar(perquadi_bld, last_level);

-   *level0_out = lp_build_add(leveli_bld, lod_ipart, first_level);
-   *level1_out = lp_build_add(leveli_bld, *level0_out, leveli_bld->one);
+   *level0_out = lp_build_add(perquadi_bld, lod_ipart, first_level);
+   *level1_out = lp_build_add(perquadi_bld, *level0_out, perquadi_bld->one);

   /*
    * Clamp both *level0_out and *level1_out to [first_level, last_level], with
@@ -848,7 +843,7 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
    * converting to our lp_bld_logic helpers.
    */
 #if HAVE_LLVM < 0x0301
-   assert(leveli_bld->type.length == 1);
+   assert(perquadi_bld->type.length == 1);
 #endif

   /* *level0_out < first_level */
@@ -863,7 +858,7 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
                                 first_level, *level1_out, "");

   *lod_fpart_inout = LLVMBuildSelect(builder, clamp_min,
-                                      levelf_bld->zero, *lod_fpart_inout, "");
+                                      perquadf_bld->zero, *lod_fpart_inout, "");

   /* *level0_out >= last_level */
   clamp_max = LLVMBuildICmp(builder, LLVMIntSGE,
@@ -877,7 +872,7 @@ lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
                                 last_level, *level1_out, "");

   *lod_fpart_inout = LLVMBuildSelect(builder, clamp_max,
-                                      levelf_bld->zero, *lod_fpart_inout, "");
+                                      perquadf_bld->zero, *lod_fpart_inout, "");

   lp_build_name(*level0_out, "texture%u_miplevel0", texture_unit);
   lp_build_name(*level1_out, "texture%u_miplevel1", texture_unit);
@@ -1092,7 +1087,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
            LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);

            ileveli = lp_build_extract_broadcast(bld->gallivm,
-                                                 bld->leveli_bld.type,
+                                                 bld->perquadi_bld.type,
                                                 bld4.type,
                                                 ilevel,
                                                 indexi);
@@ -1121,7 +1116,7 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
         */
         assert(bld->num_lods == bld->coord_bld.type.length);
         if (bld->dims == 1) {
-            assert(bld->int_size_in_bld.type.length == 1);
+            assert(bld->int_size_bld.type.length == 1);
            int_size_vec = lp_build_broadcast_scalar(&bld->int_coord_bld,
                                                     bld->int_size);
            /* vector shift with variable shift count alert... */
@@ -1136,9 +1131,10 @@ lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
               tmp[i] = bld->int_size;
               tmp[i] = lp_build_minify(&bld->int_size_in_bld, tmp[i], ilevel1);
            }
-            *out_size = lp_build_concat(bld->gallivm, tmp,
-                                        bld->int_size_in_bld.type,
-                                        bld->num_lods);
+            int_size_vec = lp_build_concat(bld->gallivm,
+                                           tmp,
+                                           bld->int_size_in_bld.type,
+                                           bld->num_lods);
         }
      }
   }
@@ -1222,10 +1218,10 @@ lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
         *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type,
                                                coord_type, size, 0);
         if (dims >= 2) {
-            *out_height = lp_build_pack_aos_scalars(bld->gallivm, size_type,
-                                                    coord_type, size, 1);
+            *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type,
+                                                   coord_type, size, 1);
            if (dims == 3) {
-               *out_depth = lp_build_pack_aos_scalars(bld->gallivm, size_type,
+               *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type,
                                                      coord_type, size, 2);
            }
         }
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -268,13 +268,13 @@ struct lp_build_sample_context
   struct lp_type texel_type;
   struct lp_build_context texel_bld;

-   /** Float level type */
-   struct lp_type levelf_type;
-   struct lp_build_context levelf_bld;
+   /** Float per-quad type */
+   struct lp_type perquadf_type;
+   struct lp_build_context perquadf_bld;

-   /** Int level type */
-   struct lp_type leveli_type;
-   struct lp_build_context leveli_bld;
+   /** Int per-quad type */
+   struct lp_type perquadi_type;
+   struct lp_build_context perquadi_bld;

   /* Common dynamic state values */
   LLVMValueRef row_stride_array;
@@ -477,7 +477,6 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
                    const struct lp_derivatives *derivs,
                    LLVMValueRef lod_bias,
                    LLVMValueRef explicit_lod,
-                    boolean scalar_lod,
                    LLVMValueRef texel_out[4]);


--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
@@ -531,7 +531,7 @@ lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld,
                              bld->texel_type.length,
                              bld->format_desc->block.bits,
                              bld->texel_type.width,
-                              data_ptr, offset, TRUE);
+                              data_ptr, offset);

      rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
   }
@@ -893,7 +893,7 @@ lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld,
                                       bld->texel_type.length,
                                       bld->format_desc->block.bits,
                                       bld->texel_type.width,
-                                       data_ptr, offset[k][j][i], TRUE);
+                                       data_ptr, offset[k][j][i]);

               rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
            }
@@ -1422,8 +1422,8 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,

   if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
      LLVMValueRef h16vec_scale = lp_build_const_vec(bld->gallivm,
-                                                     bld->levelf_bld.type, 256.0);
-      LLVMTypeRef i32vec_type = bld->leveli_bld.vec_type;
+                                                     bld->perquadf_bld.type, 256.0);
+      LLVMTypeRef i32vec_type = lp_build_vec_type(bld->gallivm, bld->perquadi_bld.type);
      struct lp_build_if_state if_ctx;
      LLVMValueRef need_lerp;
      unsigned num_quads = bld->coord_bld.type.length / 4;
@@ -1433,9 +1433,9 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
      lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32vec_type, "lod_fpart.fixed16");

      /* need_lerp = lod_fpart > 0 */
-      if (bld->num_lods == 1) {
+      if (num_quads == 1) {
         need_lerp = LLVMBuildICmp(builder, LLVMIntSGT,
-                                   lod_fpart, bld->leveli_bld.zero,
+                                   lod_fpart, bld->perquadi_bld.zero,
                                   "need_lerp");
      }
      else {
@@ -1450,9 +1450,9 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
          * lod_fpart values have same sign.
          * We can however then skip the greater than comparison.
          */
-         lod_fpart = lp_build_max(&bld->leveli_bld, lod_fpart,
-                                  bld->leveli_bld.zero);
-         need_lerp = lp_build_any_true_range(&bld->leveli_bld, bld->num_lods, lod_fpart);
+         lod_fpart = lp_build_max(&bld->perquadi_bld, lod_fpart,
+                                  bld->perquadi_bld.zero);
+         need_lerp = lp_build_any_true_range(&bld->perquadi_bld, num_quads, lod_fpart);
      }

      lp_build_if(&if_ctx, bld->gallivm, need_lerp);
@@ -1462,6 +1462,9 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
         lp_build_context_init(&u8n_bld, bld->gallivm, lp_type_unorm(8, bld->vector_width));

         /* sample the second mipmap level */
+         lp_build_mipmap_level_sizes(bld, ilevel1,
+                                     &size1,
+                                     &row_stride1_vec, &img_stride1_vec);
         lp_build_mipmap_level_sizes(bld, ilevel1,
                                     &size1,
                                     &row_stride1_vec, &img_stride1_vec);
@@ -1508,7 +1511,7 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,

         /* interpolate samples from the two mipmap levels */

-         if (num_quads == 1 && bld->num_lods == 1) {
+         if (num_quads == 1) {
            lod_fpart = LLVMBuildTrunc(builder, lod_fpart, u8n_bld.elem_type, "");
            lod_fpart = lp_build_broadcast_scalar(&u8n_bld, lod_fpart);

@@ -1523,16 +1526,17 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
 #endif
         }
         else {
-            unsigned num_chans_per_lod = 4 * bld->coord_type.length / bld->num_lods;
-            LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type, bld->leveli_bld.type.length);
+            const unsigned num_chans_per_quad = 4 * 4;
+            LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type, bld->perquadi_bld.type.length);
            LLVMValueRef shuffle[LP_MAX_VECTOR_LENGTH];

            /* Take the LSB of lod_fpart */
            lod_fpart = LLVMBuildTrunc(builder, lod_fpart, tmp_vec_type, "");

            /* Broadcast each lod weight into their respective channels */
+            assert(u8n_bld.type.length == num_quads * num_chans_per_quad);
            for (i = 0; i < u8n_bld.type.length; ++i) {
-               shuffle[i] = lp_build_const_int32(bld->gallivm, i / num_chans_per_lod);
+               shuffle[i] = lp_build_const_int32(bld->gallivm, i / num_chans_per_quad);
            }
            lod_fpart = LLVMBuildShuffleVector(builder, lod_fpart, LLVMGetUndef(tmp_vec_type),
                                               LLVMConstVector(shuffle, u8n_bld.type.length), "");
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -979,17 +979,17 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
   if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
      struct lp_build_if_state if_ctx;
      LLVMValueRef need_lerp;
+      unsigned num_quads = bld->coord_bld.type.length / 4;

      /* need_lerp = lod_fpart > 0 */
-      if (bld->num_lods == 1) {
+      if (num_quads == 1) {
         need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT,
-                                   lod_fpart, bld->levelf_bld.zero,
+                                   lod_fpart, bld->perquadf_bld.zero,
                                   "need_lerp");
      }
      else {
         /*
-          * We'll do mip filtering if any of the quads (or individual
-          * pixel in case of per-pixel lod) need it.
+          * We'll do mip filtering if any of the quads need it.
          * It might be better to split the vectors here and only fetch/filter
          * quads which need it.
          */
@@ -998,13 +998,13 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,
          * negative values which would screw up filtering if not all
          * lod_fpart values have same sign.
          */
-         lod_fpart = lp_build_max(&bld->levelf_bld, lod_fpart,
-                                  bld->levelf_bld.zero);
-         need_lerp = lp_build_compare(bld->gallivm, bld->levelf_bld.type,
+         lod_fpart = lp_build_max(&bld->perquadf_bld, lod_fpart,
+                                  bld->perquadf_bld.zero);
+         need_lerp = lp_build_compare(bld->gallivm, bld->perquadf_bld.type,
                                      PIPE_FUNC_GREATER,
-                                      lod_fpart, bld->levelf_bld.zero);
-         need_lerp = lp_build_any_true_range(&bld->leveli_bld, bld->num_lods, need_lerp);
-      }
+                                      lod_fpart, bld->perquadf_bld.zero);
+         need_lerp = lp_build_any_true_range(&bld->perquadi_bld, num_quads, need_lerp);
+     }

      lp_build_if(&if_ctx, bld->gallivm, need_lerp);
      {
@@ -1036,11 +1036,10 @@ lp_build_sample_mipmap(struct lp_build_sample_context *bld,

         /* interpolate samples from the two mipmap levels */

-         if (bld->num_lods != bld->coord_type.length)
-            lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
-                                                              bld->levelf_bld.type,
-                                                              bld->texel_bld.type,
-                                                              lod_fpart);
+         lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
+                                                           bld->perquadf_bld.type,
+                                                           bld->texel_bld.type,
+                                                           lod_fpart);

         for (chan = 0; chan < 4; chan++) {
            colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
@@ -1144,7 +1143,7 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
                            mip_filter,
                            lod_ipart, lod_fpart);
   } else {
-      *lod_ipart = bld->leveli_bld.zero;
+      *lod_ipart = bld->perquadi_bld.zero;
   }

   /*
@@ -1167,7 +1166,7 @@ lp_build_sample_common(struct lp_build_sample_context *bld,
      else {
         first_level = bld->dynamic_state->first_level(bld->dynamic_state,
                                                       bld->gallivm, texture_index);
-         first_level = lp_build_broadcast_scalar(&bld->leveli_bld, first_level);
+         first_level = lp_build_broadcast_scalar(&bld->perquadi_bld, first_level);
         *ilevel0 = first_level;
      }
      break;
@@ -1296,7 +1295,7 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld,
                     const LLVMValueRef *offsets,
                     LLVMValueRef *colors_out)
 {
-   struct lp_build_context *perquadi_bld = &bld->leveli_bld;
+   struct lp_build_context *perquadi_bld = &bld->perquadi_bld;
   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
   unsigned dims = bld->dims, chan;
   unsigned target = bld->static_texture_state->target;
@@ -1306,14 +1305,10 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld,
   LLVMValueRef width, height, depth, i, j;
   LLVMValueRef offset, out_of_bounds, out1;

+   /* XXX just like ordinary sampling, we don't handle per-pixel lod (yet). */
   if (explicit_lod && bld->static_texture_state->target != PIPE_BUFFER) {
-      if (bld->num_lods != int_coord_bld->type.length) {
-         ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type,
-                                            perquadi_bld->type, explicit_lod, 0);
-      }
-      else {
-         ilevel = explicit_lod;
-      }
+      ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type,
+                                         perquadi_bld->type, explicit_lod, 0);
      lp_build_nearest_mip_level(bld, texture_unit, ilevel, &ilevel);
   }
   else {
@@ -1494,7 +1489,6 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
                    const struct lp_derivatives *derivs, /* optional */
                    LLVMValueRef lod_bias, /* optional */
                    LLVMValueRef explicit_lod, /* optional */
-                    boolean scalar_lod,
                    LLVMValueRef texel_out[4])
 {
   unsigned dims = texture_dims(static_texture_state->target);
@@ -1535,6 +1529,10 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
   bld.float_size_in_type.length = dims > 1 ? 4 : 1;
   bld.int_size_in_type = lp_int_type(bld.float_size_in_type);
   bld.texel_type = type;
+   bld.perquadf_type = type;
+   /* we want native vector size to be able to use our intrinsics */
+   bld.perquadf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1;
+   bld.perquadi_type = lp_int_type(bld.perquadf_type);

   /* always using the first channel hopefully should be safe,
    * if not things WILL break in other places anyway.
@@ -1565,51 +1563,21 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
      debug_printf("  .min_mip_filter = %u\n", derived_sampler_state.min_mip_filter);
   }

-   /*
-    * This is all a bit complicated different paths are chosen for performance
-    * reasons.
-    * Essentially, there can be 1 lod per element, 1 lod per quad or 1 lod for
-    * everything (the last two options are equivalent for 4-wide case).
-    * If there's per-quad lod but we split to 4-wide so we can use AoS, per-quad
-    * lod is calculated then the lod value extracted afterwards so making this
-    * case basically the same as far as lod handling is concerned for the
-    * further sample/filter code as the 1 lod for everything case.
-    * Different lod handling mostly shows up when building mipmap sizes
-    * (lp_build_mipmap_level_sizes() and friends) and also in filtering
-    * (getting the fractional part of the lod to the right texels).
-    */
-
   /*
    * There are other situations where at least the multiple int lods could be
    * avoided like min and max lod being equal.
    */
-   if (explicit_lod && !scalar_lod &&
-       ((is_fetch && bld.static_texture_state->target != PIPE_BUFFER) ||
-        (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)))
-      bld.num_lods = type.length;
-   /* TODO: for true scalar_lod should only use 1 lod value */
-   else if ((is_fetch && explicit_lod && bld.static_texture_state->target != PIPE_BUFFER ) ||
-            (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
+   if ((is_fetch && explicit_lod && bld.static_texture_state->target != PIPE_BUFFER) ||
+       (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
      bld.num_lods = num_quads;
   }
   else {
      bld.num_lods = 1;
   }

-   bld.levelf_type = type;
-   /* we want native vector size to be able to use our intrinsics */
-   if (bld.num_lods != type.length) {
-      bld.levelf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1;
-   }
-   bld.leveli_type = lp_int_type(bld.levelf_type);
   bld.float_size_type = bld.float_size_in_type;
-   /* Note: size vectors may not be native. They contain minified w/h/d/_ values,
-    * with per-element lod that is w0/h0/d0/_/w1/h1/d1_/... so up to 8x4f32 */
-   if (bld.num_lods > 1) {
-      bld.float_size_type.length = bld.num_lods == type.length ?
-                                      bld.num_lods * bld.float_size_in_type.length :
-                                      type.length;
-   }
+   bld.float_size_type.length = bld.num_lods > 1 ? type.length :
+                                   bld.float_size_in_type.length;
   bld.int_size_type = lp_int_type(bld.float_size_type);

   lp_build_context_init(&bld.float_bld, gallivm, bld.float_type);
@@ -1622,8 +1590,8 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
   lp_build_context_init(&bld.int_size_bld, gallivm, bld.int_size_type);
   lp_build_context_init(&bld.float_size_bld, gallivm, bld.float_size_type);
   lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type);
-   lp_build_context_init(&bld.levelf_bld, gallivm, bld.levelf_type);
-   lp_build_context_init(&bld.leveli_bld, gallivm, bld.leveli_type);
+   lp_build_context_init(&bld.perquadf_bld, gallivm, bld.perquadf_type);
+   lp_build_context_init(&bld.perquadi_bld, gallivm, bld.perquadi_type);

   /* Get the dynamic state */
   tex_width = dynamic_state->width(dynamic_state, gallivm, texture_index);
@@ -1767,31 +1735,14 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
         bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type);
         bld4.texel_type = bld.texel_type;
         bld4.texel_type.length = 4;
-         bld4.levelf_type = type4;
+         bld4.perquadf_type = type4;
         /* we want native vector size to be able to use our intrinsics */
-         bld4.levelf_type.length = 1;
-         bld4.leveli_type = lp_int_type(bld4.levelf_type);
+         bld4.perquadf_type.length = 1;
+         bld4.perquadi_type = lp_int_type(bld4.perquadf_type);

-         if (explicit_lod && !scalar_lod &&
-             ((is_fetch && bld.static_texture_state->target != PIPE_BUFFER) ||
-              (!is_fetch && mip_filter != PIPE_TEX_MIPFILTER_NONE)))
-            bld4.num_lods = type4.length;
-         else
-            bld4.num_lods = 1;
-
-         bld4.levelf_type = type4;
-         /* we want native vector size to be able to use our intrinsics */
-         if (bld4.num_lods != type4.length) {
-            bld4.levelf_type.length = 1;
-         }
-         bld4.leveli_type = lp_int_type(bld4.levelf_type);
+         bld4.num_lods = 1;
+         bld4.int_size_type = bld4.int_size_in_type;
         bld4.float_size_type = bld4.float_size_in_type;
-         if (bld4.num_lods > 1) {
-            bld4.float_size_type.length = bld4.num_lods == type4.length ?
-                                            bld4.num_lods * bld4.float_size_in_type.length :
-                                            type4.length;
-         }
-         bld4.int_size_type = lp_int_type(bld4.float_size_type);

         lp_build_context_init(&bld4.float_bld, gallivm, bld4.float_type);
         lp_build_context_init(&bld4.float_vec_bld, gallivm, type4);
@@ -1803,15 +1754,15 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
         lp_build_context_init(&bld4.int_size_bld, gallivm, bld4.int_size_type);
         lp_build_context_init(&bld4.float_size_bld, gallivm, bld4.float_size_type);
         lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type);
-         lp_build_context_init(&bld4.levelf_bld, gallivm, bld4.levelf_type);
-         lp_build_context_init(&bld4.leveli_bld, gallivm, bld4.leveli_type);
+         lp_build_context_init(&bld4.perquadf_bld, gallivm, bld4.perquadf_type);
+         lp_build_context_init(&bld4.perquadi_bld, gallivm, bld4.perquadi_type);

         for (i = 0; i < num_quads; i++) {
            LLVMValueRef s4, t4, r4;
-            LLVMValueRef lod_ipart4, lod_fpart4 = NULL;
-            LLVMValueRef ilevel04, ilevel14 = NULL;
+            LLVMValueRef lod_iparts, lod_fparts = NULL;
+            LLVMValueRef ilevel0s, ilevel1s = NULL;
+            LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
            LLVMValueRef offsets4[4] = { NULL };
-            unsigned num_lods = bld4.num_lods;

            s4 = lp_build_extract_range(gallivm, s, 4*i, 4);
            t4 = lp_build_extract_range(gallivm, t, 4*i, 4);
@@ -1826,27 +1777,27 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
                  }
               }
            }
-            lod_ipart4 = lp_build_extract_range(gallivm, lod_ipart, num_lods * i, num_lods);
-            ilevel04 = lp_build_extract_range(gallivm, ilevel0, num_lods * i, num_lods);
+            lod_iparts = LLVMBuildExtractElement(builder, lod_ipart, indexi, "");
+            ilevel0s = LLVMBuildExtractElement(builder, ilevel0, indexi, "");
            if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
-               ilevel14 = lp_build_extract_range(gallivm, ilevel1, num_lods * i, num_lods);
-               lod_fpart4 = lp_build_extract_range(gallivm, lod_fpart, num_lods * i, num_lods);
+               ilevel1s = LLVMBuildExtractElement(builder, ilevel1, indexi, "");
+               lod_fparts = LLVMBuildExtractElement(builder, lod_fpart, indexi, "");
            }

            if (use_aos) {
               /* do sampling/filtering with fixed pt arithmetic */
               lp_build_sample_aos(&bld4, sampler_index,
                                   s4, t4, r4, offsets4,
-                                   lod_ipart4, lod_fpart4,
-                                   ilevel04, ilevel14,
+                                   lod_iparts, lod_fparts,
+                                   ilevel0s, ilevel1s,
                                   texelout4);
            }

            else {
               lp_build_sample_general(&bld4, sampler_index,
                                       s4, t4, r4, offsets4,
-                                       lod_ipart4, lod_fpart4,
-                                       ilevel04, ilevel14,
+                                       lod_iparts, lod_fparts,
+                                       ilevel0s, ilevel1s,
                                       texelout4);
            }
            for (j = 0; j < 4; j++) {
@@ -1913,7 +1864,6 @@ lp_build_size_query_soa(struct gallivm_state *gallivm,
   lp_build_context_init(&bld_int_vec, gallivm, lp_type_int_vec(32, 128));

   if (explicit_lod) {
-      /* FIXME: this needs to honor per-element lod */
      lod = LLVMBuildExtractElement(gallivm->builder, explicit_lod, lp_build_const_int32(gallivm, 0), "");
      first_level = dynamic_state->first_level(dynamic_state, gallivm, texture_unit);
      lod = lp_build_broadcast_scalar(&bld_int_vec,
--- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c
@@ -217,20 +217,6 @@ lp_build_swizzle_scalar_aos(struct lp_build_context *bld,

      a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type2), "");

-      /*
-       * Vector element 0 is always channel X.
-       *
-       *                        76 54 32 10 (array numbering)
-       * Little endian reg in:  YX YX YX YX
-       * Little endian reg out: YY YY YY YY if shift right (shift == -1)
-       *                        XX XX XX XX if shift left (shift == 1)
-       *
-       *                        01 23 45 67 (array numbering)
-       * Big endian reg in:     XY XY XY XY
-       * Big endian reg out:    YY YY YY YY if shift left (shift == 1)
-       *                        XX XX XX XX if shift right (shift == -1)
-       *
-       */
 #ifdef PIPE_ARCH_LITTLE_ENDIAN
      shift = channel == 0 ? 1 : -1;
 #else
@@ -254,23 +240,10 @@ lp_build_swizzle_scalar_aos(struct lp_build_context *bld,
      /*
       * Bit mask and recursive shifts
       *
-       * Little-endian registers:
-       *
-       *   7654 3210
-       *   WZYX WZYX .... WZYX  <= input
-       *   00Y0 00Y0 .... 00Y0  <= mask
-       *   00YY 00YY .... 00YY  <= shift right 1 (shift amount -1)
-       *   YYYY YYYY .... YYYY  <= shift left 2 (shift amount 2)
-       *
-       * Big-endian registers:
-       *
-       *   0123 4567
       *   XYZW XYZW .... XYZW  <= input
-       *   0Y00 0Y00 .... 0Y00  <= mask
-       *   YY00 YY00 .... YY00  <= shift left 1 (shift amount 1)
-       *   YYYY YYYY .... YYYY  <= shift right 2 (shift amount -2)
-       *
-       * shifts[] gives little-endian shift amounts; we need to negate for big-endian.
+       *   0Y00 0Y00 .... 0Y00
+       *   YY00 YY00 .... YY00
+       *   YYYY YYYY .... YYYY  <= output
       */
      struct lp_type type4;
      const int shifts[4][2] = {
@@ -301,15 +274,14 @@ lp_build_swizzle_scalar_aos(struct lp_build_context *bld,
         LLVMValueRef tmp = NULL;
         int shift = shifts[channel][i];

-         /* See endianness diagram above */
-#ifdef PIPE_ARCH_BIG_ENDIAN
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
         shift = -shift;
 #endif

         if(shift > 0)
-            tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), "");
+            tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), "");
         if(shift < 0)
-            tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), "");
+            tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), "");

         assert(tmp);
         if(tmp)
@@ -502,39 +474,21 @@ lp_build_swizzle_aos(struct lp_build_context *bld,

      /*
       * Mask and shift the channels, trying to group as many channels in the
-       * same shift as possible.  The shift amount is positive for shifts left
-       * and negative for shifts right.
+       * same shift as possible
       */
      for (shift = -3; shift <= 3; ++shift) {
         uint64_t mask = 0;

         assert(type4.width <= sizeof(mask)*8);

-         /*
-          * Vector element numbers follow the XYZW order, so 0 is always X, etc.
-          * After widening 4 times we have:
-          *
-          *                                3210
-          * Little-endian register layout: WZYX
-          *
-          *                                0123
-          * Big-endian register layout:    XYZW
-          *
-          * For little-endian, higher-numbered channels are obtained by a shift right
-          * (negative shift amount) and lower-numbered channels by a shift left
-          * (positive shift amount).  The opposite is true for big-endian.
-          */
         for (chan = 0; chan < 4; ++chan) {
-            if (swizzles[chan] < 4) {
-               /* We need to move channel swizzles[chan] into channel chan */
+            /* FIXME: big endian */
+            if (swizzles[chan] < 4 &&
+                chan - swizzles[chan] == shift) {
 #ifdef PIPE_ARCH_LITTLE_ENDIAN
-               if (swizzles[chan] - chan == -shift) {
-                  mask |= ((1ULL << type.width) - 1) << (swizzles[chan] * type.width);
-               }
+               mask |= ((1ULL << type.width) - 1) << (swizzles[chan] * type.width);
 #else
-               if (swizzles[chan] - chan == shift) {
-                  mask |= ((1ULL << type.width) - 1) << (type4.width - type.width) >> (swizzles[chan] * type.width);
-               }
+               mask |= ((1ULL << type.width) - 1) << (type4.width - type.width) >> (swizzles[chan] * type.width);
 #endif
            }
         }
@@ -548,11 +502,21 @@ lp_build_swizzle_aos(struct lp_build_context *bld,
            masked = LLVMBuildAnd(builder, a,
                                  lp_build_const_int_vec(bld->gallivm, type4, mask), "");
            if (shift > 0) {
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
               shifted = LLVMBuildShl(builder, masked,
                                      lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), "");
+#else
+               shifted = LLVMBuildLShr(builder, masked,
+                                       lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), "");
+#endif
            } else if (shift < 0) {
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
               shifted = LLVMBuildLShr(builder, masked,
                                       lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), "");
+#else
+               shifted = LLVMBuildShl(builder, masked,
+                                      lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), "");
+#endif
            } else {
               shifted = masked;
            }
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
@@ -390,8 +390,11 @@ lp_build_emit_fetch_texoffset(
   if (chan_index == LP_CHAN_ALL) {
      swizzle = ~0;
   } else {
-      assert(chan_index < TGSI_SWIZZLE_W);
      swizzle = tgsi_util_get_src_register_swizzle(&reg.Register, chan_index);
+      if (swizzle > 2) {
+         assert(0 && "invalid swizzle in emit_fetch_texoffset()");
+         return bld_base->base.undef;
+      }
   }

   assert(off->Index <= bld_base->info->file_max[off->File]);
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
@@ -184,7 +184,6 @@ struct lp_build_sampler_soa
                        const struct lp_derivatives *derivs,
                        LLVMValueRef lod_bias, /* optional */
                        LLVMValueRef explicit_lod, /* optional */
-                        boolean scalar_lod,
                        LLVMValueRef *texel);

   void
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
@@ -396,7 +396,7 @@ frc_emit(
                                       TGSI_OPCODE_SUB, emit_data->args[0], tmp);
 }

-/* TGSI_OPCODE_KILL_IF */
+/* TGSI_OPCODE_KIL */

 static void
 kil_fetch_args(
@@ -419,7 +419,7 @@ kil_fetch_args(
   emit_data->dst_type = LLVMVoidTypeInContext(bld_base->base.gallivm->context);
 }

-/* TGSI_OPCODE_KILL */
+/* TGSI_OPCODE_KILP */

 static void
 kilp_fetch_args(
@@ -633,6 +633,8 @@ rsq_emit(
   struct lp_build_tgsi_context * bld_base,
   struct lp_build_emit_data * emit_data)
 {
+   emit_data->args[0] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ABS,
+                                               emit_data->args[0]);
   if (bld_base->rsq_action.emit) {
      bld_base->rsq_action.emit(&bld_base->rsq_action, bld_base, emit_data);
   } else {
@@ -869,8 +871,8 @@ lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
   bld_base->op_actions[TGSI_OPCODE_EX2].fetch_args = scalar_unary_fetch_args;
   bld_base->op_actions[TGSI_OPCODE_IF].fetch_args = scalar_unary_fetch_args;
   bld_base->op_actions[TGSI_OPCODE_UIF].fetch_args = scalar_unary_fetch_args;
-   bld_base->op_actions[TGSI_OPCODE_KILL_IF].fetch_args = kil_fetch_args;
-   bld_base->op_actions[TGSI_OPCODE_KILL].fetch_args = kilp_fetch_args;
+   bld_base->op_actions[TGSI_OPCODE_KIL].fetch_args = kil_fetch_args;
+   bld_base->op_actions[TGSI_OPCODE_KILP].fetch_args = kilp_fetch_args;
   bld_base->op_actions[TGSI_OPCODE_RCP].fetch_args = scalar_unary_fetch_args;
   bld_base->op_actions[TGSI_OPCODE_SIN].fetch_args = scalar_unary_fetch_args;
   bld_base->op_actions[TGSI_OPCODE_LG2].fetch_args = scalar_unary_fetch_args;
@@ -1159,9 +1161,14 @@ iset_emit_cpu(
   struct lp_build_emit_data * emit_data,
   unsigned pipe_func)
 {
+   LLVMValueRef nz = lp_build_const_vec(bld_base->base.gallivm,
+					bld_base->int_bld.type, ~0U);
   LLVMValueRef cond = lp_build_cmp(&bld_base->int_bld, pipe_func,
                                    emit_data->args[0], emit_data->args[1]);
-   emit_data->output[emit_data->chan] = cond;
+   emit_data->output[emit_data->chan] = lp_build_select(&bld_base->int_bld,
+                                          cond,
+                                          nz,
+                                          bld_base->int_bld.zero);
 }

 /* TGSI_OPCODE_IMAX (CPU Only) */
@@ -1347,6 +1354,9 @@ rcp_emit_cpu(
 }

 /* Reciprical squareroot (CPU Only) */
+
+/* This is not the same as TGSI_OPCODE_RSQ, which requres the argument to be
+ * greater than or equal to 0 */
 static void
 recip_sqrt_emit_cpu(
   const struct lp_build_tgsi_action * action,
@@ -1610,9 +1620,14 @@ uset_emit_cpu(
   struct lp_build_emit_data * emit_data,
   unsigned pipe_func)
 {
+   LLVMValueRef nz = lp_build_const_vec(bld_base->base.gallivm,
+					bld_base->uint_bld.type, ~0U);
   LLVMValueRef cond = lp_build_cmp(&bld_base->uint_bld, pipe_func,
                                    emit_data->args[0], emit_data->args[1]);
-   emit_data->output[emit_data->chan] = cond;
+   emit_data->output[emit_data->chan] = lp_build_select(&bld_base->uint_bld,
+                                          cond,
+					  nz,
+                                          bld_base->uint_bld.zero);
 }


--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
@@ -657,10 +657,12 @@ lp_emit_instruction_aos(
   case TGSI_OPCODE_DDY:
      return FALSE;

-   case TGSI_OPCODE_KILL:
+   case TGSI_OPCODE_KILP:
+      /* predicated kill */
      return FALSE;

-   case TGSI_OPCODE_KILL_IF:
+   case TGSI_OPCODE_KIL:
+      /* conditional kill */
      return FALSE;

   case TGSI_OPCODE_PK2H:
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -1026,9 +1026,9 @@ emit_fetch_immediate(
   }

   if (stype == TGSI_TYPE_UNSIGNED) {
-      res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
+      res = LLVMConstBitCast(res, bld_base->uint_bld.vec_type);
   } else if (stype == TGSI_TYPE_SIGNED) {
-      res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
+      res = LLVMConstBitCast(res, bld_base->int_bld.vec_type);
   }
   return res;
 }
@@ -1576,7 +1576,6 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
   LLVMValueRef offsets[3] = { NULL };
   struct lp_derivatives derivs;
   struct lp_derivatives *deriv_ptr = NULL;
-   boolean scalar_lod;
   unsigned num_coords, num_derivs, num_offsets;
   unsigned i;

@@ -1694,9 +1693,6 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
      }
   }

-   /* TODO: use scalar lod if explicit_lod, lod_bias or derivs are broadcasted scalars */
-   scalar_lod = bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT;
-
   bld->sampler->emit_fetch_texel(bld->sampler,
                                  bld->bld_base.base.gallivm,
                                  bld->bld_base.base.type,
@@ -1705,7 +1701,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
                                  coords,
                                  offsets,
                                  deriv_ptr,
-                                  lod_bias, explicit_lod, scalar_lod,
+                                  lod_bias, explicit_lod,
                                  texel);
 }

@@ -1723,7 +1719,6 @@ emit_sample(struct lp_build_tgsi_soa_context *bld,
   LLVMValueRef offsets[3] = { NULL };
   struct lp_derivatives derivs;
   struct lp_derivatives *deriv_ptr = NULL;
-   boolean scalar_lod;
   unsigned num_coords, num_offsets, num_derivs;
   unsigned i;

@@ -1789,6 +1784,13 @@ emit_sample(struct lp_build_tgsi_soa_context *bld,
      return;
   }

+   /*
+    * unlike old-style tex opcodes the texture/sampler indices
+    * always come from src1 and src2 respectively.
+    */
+   texture_unit = inst->Src[1].Register.Index;
+   sampler_unit = inst->Src[2].Register.Index;
+
   if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
      lod_bias = lp_build_emit_fetch( &bld->bld_base, inst, 3, 0 );
      explicit_lod = NULL;
@@ -1841,9 +1843,6 @@ emit_sample(struct lp_build_tgsi_soa_context *bld,
      }
   }

-   /* TODO: use scalar lod if explicit_lod, lod_bias or derivs are broadcasted scalars */
-   scalar_lod = bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT;
-
   bld->sampler->emit_fetch_texel(bld->sampler,
                                  bld->bld_base.base.gallivm,
                                  bld->bld_base.base.type,
@@ -1852,7 +1851,7 @@ emit_sample(struct lp_build_tgsi_soa_context *bld,
                                  coords,
                                  offsets,
                                  deriv_ptr,
-                                  lod_bias, explicit_lod, scalar_lod,
+                                  lod_bias, explicit_lod,
                                  texel);
 }

@@ -1867,7 +1866,6 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
   LLVMValueRef explicit_lod = NULL;
   LLVMValueRef coords[3];
   LLVMValueRef offsets[3] = { NULL };
-   boolean scalar_lod;
   unsigned num_coords;
   unsigned dims;
   unsigned i;
@@ -1936,9 +1934,6 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
      }
   }

-   /* TODO: use scalar lod if explicit_lod is broadcasted scalar */
-   scalar_lod = bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT;
-
   bld->sampler->emit_fetch_texel(bld->sampler,
                                  bld->bld_base.base.gallivm,
                                  bld->bld_base.base.type,
@@ -1947,7 +1942,7 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
                                  coords,
                                  offsets,
                                  NULL,
-                                  NULL, explicit_lod, scalar_lod,
+                                  NULL, explicit_lod,
                                  texel);
 }

@@ -2043,7 +2038,7 @@ near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
 * Kill fragment if any of the src register values are negative.
 */
 static void
-emit_kill_if(
+emit_kil(
   struct lp_build_tgsi_soa_context *bld,
   const struct tgsi_full_instruction *inst,
   int pc)
@@ -2096,12 +2091,13 @@ emit_kill_if(


 /**
- * Unconditional fragment kill.
+ * Predicated fragment kill.
+ * XXX Actually, we do an unconditional kill (as in tgsi_exec.c).
 * The only predication is the execution mask which will apply if
 * we're inside a loop or conditional.
 */
 static void
-emit_kill(struct lp_build_tgsi_soa_context *bld,
+emit_kilp(struct lp_build_tgsi_soa_context *bld,
          int pc)
 {
   LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
@@ -2319,25 +2315,25 @@ ddy_emit(
 }

 static void
-kill_emit(
+kilp_emit(
   const struct lp_build_tgsi_action * action,
   struct lp_build_tgsi_context * bld_base,
   struct lp_build_emit_data * emit_data)
 {
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);

-   emit_kill(bld, bld_base->pc - 1);
+   emit_kilp(bld, bld_base->pc - 1);
 }

 static void
-kill_if_emit(
+kil_emit(
   const struct lp_build_tgsi_action * action,
   struct lp_build_tgsi_context * bld_base,
   struct lp_build_emit_data * emit_data)
 {
   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);

-   emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
+   emit_kil(bld, emit_data->inst, bld_base->pc - 1);
 }

 static void
@@ -3168,8 +3164,8 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
   bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
   bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
   bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
-   bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
-   bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
+   bld.bld_base.op_actions[TGSI_OPCODE_KIL].emit = kil_emit;
+   bld.bld_base.op_actions[TGSI_OPCODE_KILP].emit = kilp_emit;
   bld.bld_base.op_actions[TGSI_OPCODE_NRM].emit = nrm_emit;
   bld.bld_base.op_actions[TGSI_OPCODE_NRM4].emit = nrm_emit;
   bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
--- a/src/gallium/auxiliary/hud/hud_context.c
+++ b/src/gallium/auxiliary/hud/hud_context.c
@@ -33,8 +33,6 @@
 * Set GALLIUM_HUD=help for more info.
 */

-#include <stdio.h>
-
 #include "hud/hud_context.h"
 #include "hud/hud_private.h"
 #include "hud/font.h"
@@ -108,8 +106,8 @@ hud_draw_colored_prims(struct hud_context *hud, unsigned prim,
   hud->constants.color[1] = g;
   hud->constants.color[2] = b;
   hud->constants.color[3] = a;
-   hud->constants.translate[0] = (float) xoffset;
-   hud->constants.translate[1] = (float) yoffset;
+   hud->constants.translate[0] = xoffset;
+   hud->constants.translate[1] = yoffset;
   hud->constants.scale[0] = 1;
   hud->constants.scale[1] = yscale;
   cso_set_constant_buffer(cso, PIPE_SHADER_VERTEX, 0, &hud->constbuf);
@@ -129,10 +127,10 @@ hud_draw_colored_quad(struct hud_context *hud, unsigned prim,
                      float r, float g, float b, float a)
 {
   float buffer[] = {
-      (float) x1, (float) y1,
-      (float) x1, (float) y2,
-      (float) x2, (float) y2,
-      (float) x2, (float) y1,
+      x1, y1,
+      x1, y2,
+      x2, y2,
+      x2, y1,
   };

   hud_draw_colored_prims(hud, prim, buffer, 4, r, g, b, a, 0, 0, 1);
@@ -147,17 +145,17 @@ hud_draw_background_quad(struct hud_context *hud,

   assert(hud->bg.num_vertices + 4 <= hud->bg.max_num_vertices);

-   vertices[num++] = (float) x1;
-   vertices[num++] = (float) y1;
+   vertices[num++] = x1;
+   vertices[num++] = y1;

-   vertices[num++] = (float) x1;
-   vertices[num++] = (float) y2;
+   vertices[num++] = x1;
+   vertices[num++] = y2;

-   vertices[num++] = (float) x2;
-   vertices[num++] = (float) y2;
+   vertices[num++] = x2;
+   vertices[num++] = y2;

-   vertices[num++] = (float) x2;
-   vertices[num++] = (float) y1;
+   vertices[num++] = x2;
+   vertices[num++] = y1;

   hud->bg.num_vertices += num/2;
 }
@@ -202,25 +200,25 @@ hud_draw_string(struct hud_context *hud, unsigned x, unsigned y,

      assert(hud->text.num_vertices + num/4 + 4 <= hud->text.max_num_vertices);

-      vertices[num++] = (float) x1;
-      vertices[num++] = (float) y1;
-      vertices[num++] = (float) tx1;
-      vertices[num++] = (float) ty1;
+      vertices[num++] = x1;
+      vertices[num++] = y1;
+      vertices[num++] = tx1;
+      vertices[num++] = ty1;

-      vertices[num++] = (float) x1;
-      vertices[num++] = (float) y2;
-      vertices[num++] = (float) tx1;
-      vertices[num++] = (float) ty2;
+      vertices[num++] = x1;
+      vertices[num++] = y2;
+      vertices[num++] = tx1;
+      vertices[num++] = ty2;

-      vertices[num++] = (float) x2;
-      vertices[num++] = (float) y2;
-      vertices[num++] = (float) tx2;
-      vertices[num++] = (float) ty2;
+      vertices[num++] = x2;
+      vertices[num++] = y2;
+      vertices[num++] = tx2;
+      vertices[num++] = ty2;

-      vertices[num++] = (float) x2;
-      vertices[num++] = (float) y1;
-      vertices[num++] = (float) tx2;
-      vertices[num++] = (float) ty1;
+      vertices[num++] = x2;
+      vertices[num++] = y1;
+      vertices[num++] = tx2;
+      vertices[num++] = ty1;

      x += hud->font.glyph_width;
      s++;
@@ -318,25 +316,25 @@ hud_pane_accumulate_vertices(struct hud_context *hud,

   /* draw border */
   assert(hud->whitelines.num_vertices + num/2 + 8 <= hud->whitelines.max_num_vertices);
-   line_verts[num++] = (float) pane->x1;
-   line_verts[num++] = (float) pane->y1;
-   line_verts[num++] = (float) pane->x2;
-   line_verts[num++] = (float) pane->y1;
+   line_verts[num++] = pane->x1;
+   line_verts[num++] = pane->y1;
+   line_verts[num++] = pane->x2;
+   line_verts[num++] = pane->y1;

-   line_verts[num++] = (float) pane->x2;
-   line_verts[num++] = (float) pane->y1;
-   line_verts[num++] = (float) pane->x2;
-   line_verts[num++] = (float) pane->y2;
+   line_verts[num++] = pane->x2;
+   line_verts[num++] = pane->y1;
+   line_verts[num++] = pane->x2;
+   line_verts[num++] = pane->y2;

-   line_verts[num++] = (float) pane->x1;
-   line_verts[num++] = (float) pane->y2;
-   line_verts[num++] = (float) pane->x2;
-   line_verts[num++] = (float) pane->y2;
+   line_verts[num++] = pane->x1;
+   line_verts[num++] = pane->y2;
+   line_verts[num++] = pane->x2;
+   line_verts[num++] = pane->y2;

-   line_verts[num++] = (float) pane->x1;
-   line_verts[num++] = (float) pane->y1;
-   line_verts[num++] = (float) pane->x1;
-   line_verts[num++] = (float) pane->y2;
+   line_verts[num++] = pane->x1;
+   line_verts[num++] = pane->y1;
+   line_verts[num++] = pane->x1;
+   line_verts[num++] = pane->y2;

   /* draw horizontal lines inside the graph */
   for (i = 0; i <= 5; i++) {
@@ -407,8 +405,8 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex)

   hud->fb_width = tex->width0;
   hud->fb_height = tex->height0;
-   hud->constants.two_div_fb_width = 2.0f / hud->fb_width;
-   hud->constants.two_div_fb_height = 2.0f / hud->fb_height;
+   hud->constants.two_div_fb_width = 2.0 / hud->fb_width;
+   hud->constants.two_div_fb_height = 2.0 / hud->fb_height;

   cso_save_framebuffer(cso);
   cso_save_sample_mask(cso);
@@ -458,7 +456,7 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex)
   cso_set_geometry_shader_handle(cso, NULL);
   cso_set_vertex_shader_handle(cso, hud->vs);
   cso_set_vertex_elements(cso, 2, hud->velems);
-   cso_set_render_condition(cso, NULL, FALSE, 0);
+   cso_set_render_condition(cso, NULL, 0);
   cso_set_sampler_views(cso, PIPE_SHADER_FRAGMENT, 1,
                         &hud->font_sampler_view);
   cso_set_samplers(cso, PIPE_SHADER_FRAGMENT, 1, sampler_states);
@@ -488,7 +486,7 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex)
      hud->constants.color[0] = 0;
      hud->constants.color[1] = 0;
      hud->constants.color[2] = 0;
-      hud->constants.color[3] = 0.666f;
+      hud->constants.color[3] = 0.666;
      hud->constants.translate[0] = 0;
      hud->constants.translate[1] = 0;
      hud->constants.scale[0] = 1;
@@ -564,7 +562,7 @@ void
 hud_pane_set_max_value(struct hud_pane *pane, uint64_t value)
 {
   pane->max_value = value;
-   pane->yscale = -(int)pane->inner_height / (float)pane->max_value;
+   pane->yscale = -(int)pane->inner_height / (double)pane->max_value;
 }

 static struct hud_pane *
@@ -636,8 +634,8 @@ hud_graph_add_value(struct hud_graph *gr, uint64_t value)
      gr->vertices[1] = gr->vertices[(gr->index-1)*2+1];
      gr->index = 1;
   }
-   gr->vertices[(gr->index)*2+0] = (float) (gr->index * 2);
-   gr->vertices[(gr->index)*2+1] = (float) value;
+   gr->vertices[(gr->index)*2+0] = gr->index*2;
+   gr->vertices[(gr->index)*2+1] = value;
   gr->index++;

   if (gr->num_vertices < gr->pane->max_num_vertices) {
@@ -717,8 +715,8 @@ hud_parse_env_var(struct hud_context *hud, const char *env)
    */
   period_env = getenv("GALLIUM_HUD_PERIOD");
   if (period_env) {
-      float p = (float) atof(period_env);
-      if (p >= 0.0f) {
+      float p = atof(period_env);
+      if (p >= 0.0) {
         period = (unsigned) (p * 1000 * 1000);
      }
   }
@@ -961,8 +959,7 @@ hud_create(struct pipe_context *pipe, struct cso_context *cso)
   hud->fs_color =
         util_make_fragment_passthrough_shader(pipe,
                                               TGSI_SEMANTIC_COLOR,
-                                               TGSI_INTERPOLATE_CONSTANT,
-                                               TRUE);
+                                               TGSI_INTERPOLATE_CONSTANT);

   {
      /* Read a texture and do .xxxx swizzling. */
--- a/src/gallium/auxiliary/hud/hud_cpu.c
+++ b/src/gallium/auxiliary/hud/hud_cpu.c
@@ -116,12 +116,6 @@ query_cpu_load(struct hud_graph *gr)
   }
 }

-static void
-free_query_data(void *p)
-{
-   FREE(p);
-}
-
 void
 hud_cpu_graph_install(struct hud_pane *pane, unsigned cpu_index)
 {
@@ -150,11 +144,7 @@ hud_cpu_graph_install(struct hud_pane *pane, unsigned cpu_index)
   }

   gr->query_new_value = query_cpu_load;
-
-   /* Don't use free() as our callback as that messes up Gallium's
-    * memory debugger.  Use simple free_query_data() wrapper.
-    */
-   gr->free_query_data = free_query_data;
+   gr->free_query_data = free;

   info = gr->query_data;
   info->cpu_index = cpu_index;
--- a/src/gallium/auxiliary/hud/hud_fps.c
+++ b/src/gallium/auxiliary/hud/hud_fps.c
@@ -52,7 +52,7 @@ query_fps(struct hud_graph *gr)
         info->frames = 0;
         info->last_time = now;

-         hud_graph_add_value(gr, (uint64_t) fps);
+         hud_graph_add_value(gr, fps);
      }
   }
   else {
@@ -60,12 +60,6 @@ query_fps(struct hud_graph *gr)
   }
 }

-static void
-free_query_data(void *p)
-{
-   FREE(p);
-}
-
 void
 hud_fps_graph_install(struct hud_pane *pane)
 {
@@ -82,11 +76,7 @@ hud_fps_graph_install(struct hud_pane *pane)
   }

   gr->query_new_value = query_fps;
-
-   /* Don't use free() as our callback as that messes up Gallium's
-    * memory debugger.  Use simple free_query_data() wrapper.
-    */
-   gr->free_query_data = free_query_data;
+   gr->free_query_data = free;

   hud_pane_add_graph(pane, gr);
 }
--- a/src/gallium/auxiliary/hud/hud_private.h
+++ b/src/gallium/auxiliary/hud/hud_private.h
@@ -42,7 +42,7 @@ struct hud_graph {
   char name[128];
   void *query_data;
   void (*query_new_value)(struct hud_graph *gr);
-   void (*free_query_data)(void *ptr); /**< do not use ordinary free() */
+   void (*free_query_data)(void *ptr);

   /* mutable variables */
   unsigned num_vertices;
--- a/src/gallium/auxiliary/indices/u_indices.c
+++ b/src/gallium/auxiliary/indices/u_indices.c
@@ -150,26 +150,9 @@ int u_index_translator( unsigned hw_mask,
 }


-/**
- * If a driver does not support a particular gallium primitive type
- * (such as PIPE_PRIM_QUAD_STRIP) this function can be used to help
- * convert the primitive into a simpler type (like PIPE_PRIM_TRIANGLES).
- *
- * The generator functions generates a number of ushort or uint indexes
- * for drawing the new type of primitive.
- *
- * \param hw_mask  a bitmask of (1 << PIPE_PRIM_x) values that indicates
- *                 kind of primitives are supported by the driver.
- * \param prim  the PIPE_PRIM_x that the user wants to draw
- * \param start  index of first vertex to draw
- * \param nr  number of vertices to draw
- * \param in_pv  user's provoking vertex (PV_FIRST/LAST)
- * \param out_pv  desired proking vertex for the hardware (PV_FIRST/LAST)
- * \param out_prim  returns the new primitive type for the driver
- * \param out_index_size  returns OUT_USHORT or OUT_UINT
- * \param out_nr  returns new number of vertices to draw
- * \param out_generate  returns pointer to the generator function
- */
+
+
+
 int u_index_generator( unsigned hw_mask,
                       unsigned prim,
                       unsigned start,
--- a/src/gallium/auxiliary/indices/u_unfilled_indices.c
+++ b/src/gallium/auxiliary/indices/u_unfilled_indices.c
@@ -151,14 +151,7 @@ int u_unfilled_translator( unsigned prim,
 }


-/**
- * Utility for converting unfilled polygons into points, lines, triangles.
- * Few drivers have direct support for OpenGL's glPolygonMode.
- * This function helps with converting triangles into points or lines
- * when the front and back fill modes are the same.  When there's
- * different front/back fill modes, that can be handled with the
- * 'draw' module.
- */
+
 int u_unfilled_generator( unsigned prim,
                          unsigned start,
                          unsigned nr,
--- a/src/gallium/auxiliary/os/os_process.c
+++ b/src/gallium/auxiliary/os/os_process.c
@@ -1,92 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2013 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#include "pipe/p_config.h"
-#include "os/os_process.h"
-#include "util/u_memory.h"
-
-#if defined(PIPE_SUBSYSTEM_WINDOWS_USER)
-#  include <windows.h>
-#elif defined(PIPE_OS_BSD) || defined(PIPE_OS_APPLE)
-#  include <stdlib.h>
-#elif defined(__GLIBC__)
-#  include <errno.h>
-#else
-#warning unexpected platform in os_process.c
-#endif
-
-
-/**
- * Return the name of the current process.
- * \param procname  returns the process name
- * \param size  size of the procname buffer
- * \return  TRUE or FALSE for success, failure
- */
-boolean
-os_get_process_name(char *procname, size_t size)
-{
-   const char *name;
-#if defined(PIPE_SUBSYSTEM_WINDOWS_USER)
-   char szProcessPath[MAX_PATH];
-   char *lpProcessName;
-   char *lpProcessExt;
-
-   GetModuleFileNameA(NULL, szProcessPath, Elements(szProcessPath));
-
-   lpProcessName = strrchr(szProcessPath, '\\');
-   lpProcessName = lpProcessName ? lpProcessName + 1 : szProcessPath;
-
-   lpProcessExt = strrchr(lpProcessName, '.');
-   if (lpProcessExt) {
-      *lpProcessExt = '\0';
-   }
-
-   name = lpProcessName;
-
-#elif defined(PIPE_OS_BSD) || defined(PIPE_OS_APPLE)
-   /* *BSD and OS X */
-   name = getprogname();
-#elif defined(__GLIBC__)
-   name = program_invocation_short_name;
-#else
-#warning unexpected platform in os_process.c
-   return FALSE;
-#endif
-
-   assert(size > 0);
-   assert(procname);
-
-   if (name && procname && size > 0) {
-      strncpy(procname, name, size);
-      procname[size - 1] = '\0';
-      return TRUE;
-   }
-   else {
-      return FALSE;
-   }
-}
--- a/src/gallium/auxiliary/os/os_process.h
+++ b/src/gallium/auxiliary/os/os_process.h
@@ -1,40 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2013 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#ifndef OS_PROCESS_H
-#define OS_PROCESS_H
-
-
-#include "pipe/p_compiler.h"
-
-
-extern boolean
-os_get_process_name(char *str, size_t size);
-
-
-#endif /* OS_PROCESS_H */
--- a/src/gallium/auxiliary/os/os_time.c
+++ b/src/gallium/auxiliary/os/os_time.c
@@ -84,7 +84,7 @@ os_time_get_nano(void)
 void
 os_time_sleep(int64_t usecs)
 {
-   DWORD dwMilliseconds = (DWORD) ((usecs + 999) / 1000);
+   DWORD dwMilliseconds = (usecs + 999) / 1000;
   /* Avoid Sleep(O) as that would cause to sleep for an undetermined duration */
   if (dwMilliseconds) {
      Sleep(dwMilliseconds);
--- a/src/gallium/auxiliary/postprocess/filters.h
+++ b/src/gallium/auxiliary/postprocess/filters.h
@@ -30,9 +30,8 @@

 #include "postprocess/postprocess.h"

-typedef bool (*pp_init_func) (struct pp_queue_t *, unsigned int,
+typedef void (*pp_init_func) (struct pp_queue_t *, unsigned int,
                              unsigned int);
-typedef void (*pp_free_func) (struct pp_queue_t *, unsigned int);

 struct pp_filter_t
 {
@@ -42,19 +41,18 @@ struct pp_filter_t
   unsigned int verts;          /* How many are vertex shaders */
   pp_init_func init;           /* Init function */
   pp_func main;                /* Run function */
-   pp_free_func free;           /* Free function */
 };

 /*	Order matters. Put new filters in a suitable place. */

 static const struct pp_filter_t pp_filters[PP_FILTERS] = {
-/*    name			inner	shaders	verts	init			run                       free   */
-   { "pp_noblue",		0,	2,	1,	pp_noblue_init,		pp_nocolor,               pp_nocolor_free },
-   { "pp_nogreen",		0,	2,	1,	pp_nogreen_init,	pp_nocolor,               pp_nocolor_free },
-   { "pp_nored",		0,	2,	1,	pp_nored_init,		pp_nocolor,               pp_nocolor_free },
-   { "pp_celshade",		0,	2,	1,	pp_celshade_init,	pp_nocolor,               pp_celshade_free },
-   { "pp_jimenezmlaa",		2,	5,	2,	pp_jimenezmlaa_init,	pp_jimenezmlaa,           pp_jimenezmlaa_free },
-   { "pp_jimenezmlaa_color",	2,	5,	2,	pp_jimenezmlaa_init_color, pp_jimenezmlaa_color,  pp_jimenezmlaa_free },
+/*    name			inner	shaders	verts	init			run */
+   { "pp_noblue",		0,	2,	1,	pp_noblue_init,		pp_nocolor },
+   { "pp_nogreen",		0,	2,	1,	pp_nogreen_init,	pp_nocolor },
+   { "pp_nored",		0,	2,	1,	pp_nored_init,		pp_nocolor },
+   { "pp_celshade",		0,	2,	1,	pp_celshade_init,	pp_nocolor },
+   { "pp_jimenezmlaa",		2,	5,	2,	pp_jimenezmlaa_init,	pp_jimenezmlaa },
+   { "pp_jimenezmlaa_color",	2,	5,	2,	pp_jimenezmlaa_init_color, pp_jimenezmlaa_color },
 };

 #endif
--- a/src/gallium/auxiliary/postprocess/postprocess.h
+++ b/src/gallium/auxiliary/postprocess/postprocess.h
@@ -53,13 +53,11 @@ struct pp_queue_t

   struct pipe_resource *depth; /* depth of original input */
   struct pipe_resource *stencil;       /* stencil shared by inner_tmps */
-   struct pipe_resource *constbuf;      /* MLAA constant buffer */
-   struct pipe_resource *areamaptex;    /* MLAA area map texture */

   struct pipe_surface *tmps[2], *inner_tmps[3], *stencils;

   void ***shaders;             /* Shaders in TGSI form */
-   unsigned int *filters;       /* Active filter to filters.h mapping. */
+   unsigned int *verts;
   struct program *p;

   bool fbos_init;
@@ -77,14 +75,6 @@ void pp_debug(const char *, ...);
 struct program *pp_init_prog(struct pp_queue_t *, struct pipe_context *pipe,
                             struct cso_context *);
 void pp_init_fbos(struct pp_queue_t *, unsigned int, unsigned int);
-void pp_blit(struct pipe_context *pipe,
-             struct pipe_resource *src_tex,
-             int srcX0, int srcY0,
-             int srcX1, int srcY1,
-             int srcZ0,
-             struct pipe_surface *dst,
-             int dstX0, int dstY0,
-             int dstX1, int dstY1);

 /* The filters */

@@ -98,20 +88,14 @@ void pp_jimenezmlaa_color(struct pp_queue_t *, struct pipe_resource *,

 /* The filter init functions */

-bool pp_celshade_init(struct pp_queue_t *, unsigned int, unsigned int);
+void pp_celshade_init(struct pp_queue_t *, unsigned int, unsigned int);

-bool pp_nored_init(struct pp_queue_t *, unsigned int, unsigned int);
-bool pp_nogreen_init(struct pp_queue_t *, unsigned int, unsigned int);
-bool pp_noblue_init(struct pp_queue_t *, unsigned int, unsigned int);
+void pp_nored_init(struct pp_queue_t *, unsigned int, unsigned int);
+void pp_nogreen_init(struct pp_queue_t *, unsigned int, unsigned int);
+void pp_noblue_init(struct pp_queue_t *, unsigned int, unsigned int);

-bool pp_jimenezmlaa_init(struct pp_queue_t *, unsigned int, unsigned int);
-bool pp_jimenezmlaa_init_color(struct pp_queue_t *, unsigned int,
+void pp_jimenezmlaa_init(struct pp_queue_t *, unsigned int, unsigned int);
+void pp_jimenezmlaa_init_color(struct pp_queue_t *, unsigned int,
                               unsigned int);

-/* The filter free functions */
-
-void pp_celshade_free(struct pp_queue_t *, unsigned int);
-void pp_nocolor_free(struct pp_queue_t *, unsigned int);
-void pp_jimenezmlaa_free(struct pp_queue_t *, unsigned int);
-
 #endif
--- a/src/gallium/auxiliary/postprocess/pp_celshade.c
+++ b/src/gallium/auxiliary/postprocess/pp_celshade.c
@@ -30,17 +30,9 @@
 #include "postprocess/pp_filters.h"

 /** Init function */
-bool
+void
 pp_celshade_init(struct pp_queue_t *ppq, unsigned int n, unsigned int val)
 {
   ppq->shaders[n][1] =
      pp_tgsi_to_state(ppq->p->pipe, celshade, false, "celshade");
-
-   return (ppq->shaders[n][1] != NULL) ? TRUE : FALSE;
-}
-
-/** Free function */
-void
-pp_celshade_free(struct pp_queue_t *ppq, unsigned int n)
-{
 }
--- a/src/gallium/auxiliary/postprocess/pp_colors.c
+++ b/src/gallium/auxiliary/postprocess/pp_colors.c
@@ -57,37 +57,24 @@ pp_nocolor(struct pp_queue_t *ppq, struct pipe_resource *in,

 /* Init functions */

-bool
+void
 pp_nored_init(struct pp_queue_t *ppq, unsigned int n, unsigned int val)
 {
-   ppq->shaders[n][1] =
-      pp_tgsi_to_state(ppq->p->pipe, nored, false, "nored");
-
-   return (ppq->shaders[n][1] != NULL) ? TRUE : FALSE;
+   ppq->shaders[n][1] = pp_tgsi_to_state(ppq->p->pipe, nored, false, "nored");
 }


-bool
+void
 pp_nogreen_init(struct pp_queue_t *ppq, unsigned int n, unsigned int val)
 {
   ppq->shaders[n][1] =
      pp_tgsi_to_state(ppq->p->pipe, nogreen, false, "nogreen");
-
-   return (ppq->shaders[n][1] != NULL) ? TRUE : FALSE;
 }


-bool
+void
 pp_noblue_init(struct pp_queue_t *ppq, unsigned int n, unsigned int val)
 {
   ppq->shaders[n][1] =
      pp_tgsi_to_state(ppq->p->pipe, noblue, false, "noblue");
-
-   return (ppq->shaders[n][1] != NULL) ? TRUE : FALSE;
-}
-
-/* Free functions */
-void
-pp_nocolor_free(struct pp_queue_t *ppq, unsigned int n)
-{
 }
--- a/src/gallium/auxiliary/postprocess/pp_init.c
+++ b/src/gallium/auxiliary/postprocess/pp_init.c
@@ -31,6 +31,7 @@

 #include "pipe/p_screen.h"
 #include "util/u_inlines.h"
+#include "util/u_blit.h"
 #include "util/u_math.h"
 #include "util/u_debug.h"
 #include "util/u_memory.h"
@@ -41,75 +42,58 @@ struct pp_queue_t *
 pp_init(struct pipe_context *pipe, const unsigned int *enabled,
        struct cso_context *cso)
 {
-   unsigned int num_filters = 0;
+
   unsigned int curpos = 0, i, tmp_req = 0;
   struct pp_queue_t *ppq;
+   pp_func *tmp_q;

   pp_debug("Initializing the post-processing queue.\n");

   /* How many filters were requested? */
   for (i = 0; i < PP_FILTERS; i++) {
      if (enabled[i])
-         num_filters++;
+         curpos++;
   }
-   if (num_filters == 0)
+   if (!curpos)
      return NULL;

   ppq = CALLOC(1, sizeof(struct pp_queue_t));
+   tmp_q = CALLOC(curpos, sizeof(pp_func));
+   ppq->shaders = CALLOC(curpos, sizeof(void *));
+   ppq->verts = CALLOC(curpos, sizeof(unsigned int));

-   if (ppq == NULL) {
-      pp_debug("Unable to allocate memory for ppq.\n");
+   if (!tmp_q || !ppq || !ppq->shaders || !ppq->verts)
      goto error;
-   }
-
-   ppq->pp_queue = CALLOC(num_filters, sizeof(pp_func));
-   if (ppq->pp_queue == NULL) {
-      pp_debug("Unable to allocate memory for pp_queue.\n");
-      goto error;
-   }
-
-   ppq->shaders = CALLOC(num_filters, sizeof(void *));
-   ppq->filters = CALLOC(num_filters, sizeof(unsigned int));
-
-   if ((ppq->shaders == NULL) ||
-       (ppq->filters == NULL)) {
-      pp_debug("Unable to allocate memory for shaders and filter arrays.\n");
-      goto error;
-   }

   ppq->p = pp_init_prog(ppq, pipe, cso);
-   if (ppq->p == NULL) {
-      pp_debug("pp_init_prog returned NULL.\n");
+   if (!ppq->p)
      goto error;
-   }

   /* Add the enabled filters to the queue, in order */
   curpos = 0;
+   ppq->pp_queue = tmp_q;
   for (i = 0; i < PP_FILTERS; i++) {
      if (enabled[i]) {
         ppq->pp_queue[curpos] = pp_filters[i].main;
         tmp_req = MAX2(tmp_req, pp_filters[i].inner_tmps);
-         ppq->filters[curpos] = i;

         if (pp_filters[i].shaders) {
            ppq->shaders[curpos] =
               CALLOC(pp_filters[i].shaders + 1, sizeof(void *));
-            if (!ppq->shaders[curpos]) {
-               pp_debug("Unable to allocate memory for shader list.\n");
+            ppq->verts[curpos] = pp_filters[i].verts;
+            if (!ppq->shaders[curpos])
               goto error;
-            }
         }
-
-         /* Call the initialization function for the filter. */
-         if (!pp_filters[i].init(ppq, curpos, enabled[i])) {
-            pp_debug("Initialization for filter %u failed.\n", i);
-            goto error;
-         }           
+         pp_filters[i].init(ppq, curpos, enabled[i]);

         curpos++;
      }
   }

+   ppq->p->blitctx = util_create_blit(ppq->p->pipe, cso);
+   if (!ppq->p->blitctx)
+      goto error;
+
   ppq->n_filters = curpos;
   ppq->n_tmp = (curpos > 2 ? 2 : 1);
   ppq->n_inner_tmp = tmp_req;
@@ -120,18 +104,16 @@ pp_init(struct pipe_context *pipe, const unsigned int *enabled,
      ppq->shaders[i][0] = ppq->p->passvs;

   pp_debug("Queue successfully allocated. %u filter(s).\n", curpos);
-   
+
   return ppq;

 error:
+   pp_debug("Error setting up pp\n");

-   if (ppq) {
-      /* Assign curpos, since we only need to destroy initialized filters. */
-      ppq->n_filters = curpos;
-
-      /* Call the common free function which must handle partial initialization. */
-      pp_free(ppq);
-   }
+   if (ppq)
+      FREE(ppq->p);
+   FREE(ppq);
+   FREE(tmp_q);

   return NULL;
 }
@@ -160,76 +142,33 @@ pp_free_fbos(struct pp_queue_t *ppq)
   ppq->fbos_init = false;
 }

-/** 
- * Free the pp queue. Called on context termination and failure in
- * pp_init.
- */
+/** Free the pp queue. Called on context termination. */
 void
 pp_free(struct pp_queue_t *ppq)
 {
+
   unsigned int i, j;

   pp_free_fbos(ppq);

-   if (ppq && ppq->p) {
-      if (ppq->p->pipe && ppq->filters && ppq->shaders) {
-         for (i = 0; i < ppq->n_filters; i++) {
-            unsigned int filter = ppq->filters[i];
+   util_destroy_blit(ppq->p->blitctx);

-            if (ppq->shaders[i] == NULL) {
-               continue;
-            }
-
-            /*
-             * Common shader destruction code for all postprocessing
-             * filters.
-             */
-            for (j = 0; j < pp_filters[filter].shaders; j++) {
-               if (ppq->shaders[i][j] == NULL) {
-                  /* We reached the end of initialized shaders. */
-                  break;
-               }
-
-               if (ppq->shaders[i][j] == ppq->p->passvs) {
-                  continue;
-               }
-
-               assert(ppq);
-               assert(ppq->p);
-               assert(ppq->p->pipe);
- 
-               if (j >= pp_filters[filter].verts) {
-                  assert(ppq->p->pipe->delete_fs_state);
-                  ppq->p->pipe->delete_fs_state(ppq->p->pipe,
-                                                ppq->shaders[i][j]);
-                  ppq->shaders[i][j] = NULL;
-               } else {
-                  assert(ppq->p->pipe->delete_vs_state);
-                  ppq->p->pipe->delete_vs_state(ppq->p->pipe,
-                                                ppq->shaders[i][j]);
-                  ppq->shaders[i][j] = NULL;
-               }
-            }
-
-            /* Finally call each filter type's free functionality. */
-            pp_filters[filter].free(ppq, i);
+   for (i = 0; i < ppq->n_filters; i++) {
+      for (j = 0; j < PP_MAX_PASSES && ppq->shaders[i][j]; j++) {
+         if (j >= ppq->verts[i]) {
+            ppq->p->pipe->delete_fs_state(ppq->p->pipe, ppq->shaders[i][j]);
+            ppq->shaders[i][j] = NULL;
+         }
+         else if (ppq->shaders[i][j] != ppq->p->passvs) {
+            ppq->p->pipe->delete_vs_state(ppq->p->pipe, ppq->shaders[i][j]);
+            ppq->shaders[i][j] = NULL;
         }
      }
-
-      FREE(ppq->p);
   }

-   if (ppq) {
-      /*
-       * Handle partial initialization for common resource destruction
-       * in the create path.
-       */
-      FREE(ppq->filters);
-      FREE(ppq->shaders);
-      FREE(ppq->pp_queue);
-  
-      FREE(ppq);
-   }
+   FREE(ppq->p);
+   FREE(ppq->pp_queue);
+   FREE(ppq);

   pp_debug("Queue taken down.\n");
 }
@@ -317,6 +256,7 @@ pp_init_fbos(struct pp_queue_t *ppq, unsigned int w,
   if (!ppq->stencil || !ppq->stencils)
      goto error;

+
   p->framebuffer.width = w;
   p->framebuffer.height = h;

--- a/src/gallium/auxiliary/postprocess/pp_mlaa.c
+++ b/src/gallium/auxiliary/postprocess/pp_mlaa.c
@@ -43,6 +43,7 @@
 #include "postprocess/postprocess.h"
 #include "postprocess/pp_mlaa.h"
 #include "postprocess/pp_filters.h"
+#include "util/u_blit.h"
 #include "util/u_box.h"
 #include "util/u_sampler.h"
 #include "util/u_inlines.h"
@@ -55,16 +56,16 @@
 static float constants[] = { 1, 1, 0, 0 };
 static unsigned int dimensions[2] = { 0, 0 };

+static struct pipe_resource *constbuf, *areamaptex;
+
 /** Upload the constants. */
 static void
-up_consts(struct pp_queue_t *ppq)
+up_consts(struct pipe_context *pipe)
 {
-   struct pipe_context *pipe = ppq->p->pipe;
   struct pipe_box box;

   u_box_2d(0, 0, sizeof(constants), 1, &box);
-
-   pipe->transfer_inline_write(pipe, ppq->constbuf, 0, PIPE_TRANSFER_WRITE,
+   pipe->transfer_inline_write(pipe, constbuf, 0, PIPE_TRANSFER_WRITE,
                               &box, constants, sizeof(constants),
                               sizeof(constants));
 }
@@ -80,24 +81,11 @@ pp_jimenezmlaa_run(struct pp_queue_t *ppq, struct pipe_resource *in,
   struct pipe_depth_stencil_alpha_state mstencil;
   struct pipe_sampler_view v_tmp, *arr[3];

-   unsigned int w = 0;
-   unsigned int h = 0;
+   unsigned int w = p->framebuffer.width;
+   unsigned int h = p->framebuffer.height;

   const struct pipe_stencil_ref ref = { {1} };
-
-   /* Insufficient initialization checks. */
-   assert(p);
-   assert(ppq);
-   assert(ppq->constbuf);
-   assert(ppq->areamaptex);
-   assert(ppq->inner_tmp);
-   assert(ppq->shaders[n]);
-
-   w = p->framebuffer.width;
-   h = p->framebuffer.height;
-
   memset(&mstencil, 0, sizeof(mstencil));
-
   cso_set_stencil_ref(p->cso, &ref);

   /* Init the pixel size constant */
@@ -106,15 +94,13 @@ pp_jimenezmlaa_run(struct pp_queue_t *ppq, struct pipe_resource *in,
      constants[0] = 1.0f / p->framebuffer.width;
      constants[1] = 1.0f / p->framebuffer.height;

-      up_consts(ppq);
+      up_consts(p->pipe);
      dimensions[0] = p->framebuffer.width;
      dimensions[1] = p->framebuffer.height;
   }

-   cso_set_constant_buffer_resource(p->cso, PIPE_SHADER_VERTEX,
-                                    0, ppq->constbuf);
-   cso_set_constant_buffer_resource(p->cso, PIPE_SHADER_FRAGMENT,
-                                    0, ppq->constbuf);
+   cso_set_constant_buffer_resource(p->cso, PIPE_SHADER_VERTEX, 0, constbuf);
+   cso_set_constant_buffer_resource(p->cso, PIPE_SHADER_FRAGMENT, 0, constbuf);

   mstencil.stencil[0].enabled = 1;
   mstencil.stencil[0].valuemask = mstencil.stencil[0].writemask = ~0;
@@ -156,7 +142,7 @@ pp_jimenezmlaa_run(struct pp_queue_t *ppq, struct pipe_resource *in,
   mstencil.stencil[0].zpass_op = PIPE_STENCIL_OP_KEEP;
   cso_set_depth_stencil_alpha(p->cso, &mstencil);

-   pp_filter_setup_in(p, ppq->areamaptex);
+   pp_filter_setup_in(p, areamaptex);
   pp_filter_setup_out(p, ppq->inner_tmp[1]);

   u_sampler_view_default_template(&v_tmp, ppq->inner_tmp[0],
@@ -190,9 +176,10 @@ pp_jimenezmlaa_run(struct pp_queue_t *ppq, struct pipe_resource *in,
   pp_filter_set_fb(p);

   /* Blit the input to the output */
-   pp_blit(p->pipe, in, 0, 0,
-           w, h, 0, p->framebuffer.cbufs[0],
-           0, 0, w, h);
+   util_blit_pixels(p->blitctx, in, 0, 0, 0,
+                    w, h, 0, p->framebuffer.cbufs[0],
+                    0, 0, w, h, 0, PIPE_TEX_MIPFILTER_NEAREST,
+                    TGSI_WRITEMASK_XYZW, 0);

   u_sampler_view_default_template(&v_tmp, in, in->format);
   arr[0] = p->pipe->create_sampler_view(p->pipe, in, &v_tmp);
@@ -219,34 +206,32 @@ pp_jimenezmlaa_run(struct pp_queue_t *ppq, struct pipe_resource *in,
 }

 /** The init function of the MLAA filter. */
-static bool
+static void
 pp_jimenezmlaa_init_run(struct pp_queue_t *ppq, unsigned int n,
                        unsigned int val, bool iscolor)
 {

   struct pipe_box box;
   struct pipe_resource res;
-   char *tmp_text = NULL;
+   char *tmp_text;
+
+   constbuf = pipe_buffer_create(ppq->p->screen, PIPE_BIND_CONSTANT_BUFFER,
+                                 PIPE_USAGE_STATIC, sizeof(constants));
+   if (!constbuf) {
+      pp_debug("Failed to allocate constant buffer\n");
+      return;
+   }
+
+
+   pp_debug("mlaa: using %u max search steps\n", val);

   tmp_text = CALLOC(sizeof(blend2fs_1) + sizeof(blend2fs_2) +
                     IMM_SPACE, sizeof(char));

-   if (tmp_text == NULL) {
+   if (!tmp_text) {
      pp_debug("Failed to allocate shader space\n");
-      return FALSE;
+      return;
   }
-
-   ppq->constbuf = pipe_buffer_create(ppq->p->screen,
-                                      PIPE_BIND_CONSTANT_BUFFER,
-                                      PIPE_USAGE_STATIC,
-                                      sizeof(constants));
-   if (ppq->constbuf == NULL) {
-      pp_debug("Failed to allocate constant buffer\n");
-      goto fail;
-   }
-
-   pp_debug("mlaa: using %u max search steps\n", val);
-
   util_sprintf(tmp_text, "%s"
                "IMM FLT32 {    %.8f,     0.0000,     0.0000,     0.0000}\n"
                "%s\n", blend2fs_1, (float) val, blend2fs_2);
@@ -264,19 +249,15 @@ pp_jimenezmlaa_init_run(struct pp_queue_t *ppq, unsigned int n,
                                            res.target, 1, res.bind))
      pp_debug("Areamap format not supported\n");

-   ppq->areamaptex = ppq->p->screen->resource_create(ppq->p->screen, &res);
-   
-   if (ppq->areamaptex == NULL) {
-      pp_debug("Failed to allocate area map texture\n");
-      goto fail;
-   }
-   
+   areamaptex = ppq->p->screen->resource_create(ppq->p->screen, &res);
   u_box_2d(0, 0, 165, 165, &box);

-   ppq->p->pipe->transfer_inline_write(ppq->p->pipe, ppq->areamaptex, 0,
+   ppq->p->pipe->transfer_inline_write(ppq->p->pipe, areamaptex, 0,
                                       PIPE_TRANSFER_WRITE, &box,
                                       areamap, 165 * 2, sizeof(areamap));

+
+
   ppq->shaders[n][1] = pp_tgsi_to_state(ppq->p->pipe, offsetvs, true,
                                         "offsetvs");
   if (iscolor)
@@ -291,35 +272,23 @@ pp_jimenezmlaa_init_run(struct pp_queue_t *ppq, unsigned int n,
                                         "neigh3fs");

   FREE(tmp_text);
-
-   return TRUE;
-
- fail:
-   
-   FREE(tmp_text);
-
-   /*
-    * Call the common free function for destruction of partially initialized
-    * resources.
-    */
-   pp_jimenezmlaa_free(ppq, n);
-
-   return FALSE;
 }

 /** Short wrapper to init the depth version. */
-bool
+void
 pp_jimenezmlaa_init(struct pp_queue_t *ppq, unsigned int n, unsigned int val)
 {
-   return pp_jimenezmlaa_init_run(ppq, n, val, false);
+
+   pp_jimenezmlaa_init_run(ppq, n, val, false);
 }

 /** Short wrapper to init the color version. */
-bool
+void
 pp_jimenezmlaa_init_color(struct pp_queue_t *ppq, unsigned int n,
                          unsigned int val)
 {
-   return pp_jimenezmlaa_init_run(ppq, n, val, true);
+
+   pp_jimenezmlaa_init_run(ppq, n, val, true);
 }

 /** Short wrapper to run the depth version. */
@@ -337,23 +306,3 @@ pp_jimenezmlaa_color(struct pp_queue_t *ppq, struct pipe_resource *in,
 {
   pp_jimenezmlaa_run(ppq, in, out, n, true);
 }
-
-
-/**
- * Short wrapper to free the mlaa filter resources. Shaders are freed in
- * the common code in pp_free.
- */
-void
-pp_jimenezmlaa_free(struct pp_queue_t *ppq, unsigned int n)
-{
-   if (ppq->areamaptex) {
-      ppq->p->screen->resource_destroy(ppq->p->screen, ppq->areamaptex);
-      ppq->areamaptex = NULL;
-   }
-
-   if (ppq->constbuf) {
-      ppq->p->screen->resource_destroy(ppq->p->screen, ppq->constbuf);
-      ppq->constbuf = NULL;
-   }
-}
-
--- a/src/gallium/auxiliary/postprocess/pp_mlaa.h
+++ b/src/gallium/auxiliary/postprocess/pp_mlaa.h
@@ -67,7 +67,7 @@ static const char depth1fs[] = "FRAG\n"
   " 12: DP4 TEMP[0].x, TEMP[2], IMM[0].zzzz\n"
   " 13: SEQ TEMP[1].x, TEMP[0].xxxx, IMM[0].yyyy\n"
   " 14: IF TEMP[1].xxxx :16\n"
-   " 15:   KILL\n"
+   " 15:   KILP\n"
   " 16: ENDIF\n"
   " 17: MOV OUT[0], TEMP[2]\n"
   " 18: END\n";
@@ -99,7 +99,7 @@ static const char color1fs[] = "FRAG\n"
   " 13: DP4 TEMP[0].x, TEMP[2], IMM[1].xxxx\n"
   " 14: SEQ TEMP[1].x, TEMP[0].xxxx, IMM[1].yyyy\n"
   " 15: IF TEMP[1].xxxx :17\n"
-   " 16:   KILL\n"
+   " 16:   KILP\n"
   " 17: ENDIF\n"
   " 18: MOV OUT[0], TEMP[2]\n"
   " 19: END\n";
@@ -126,7 +126,7 @@ static const char neigh3fs[] = "FRAG\n"
   "  8: DP4 TEMP[1].x, TEMP[5], IMM[0].xxxx\n"
   "  9: SLT TEMP[4].x, TEMP[1].xxxx, IMM[0].yyyy\n"
   " 10: IF TEMP[4].xxxx :12\n"
-   " 11:   KILL\n"
+   " 11:   KILP\n"
   " 12: ENDIF\n"
   " 13: TEX TEMP[4], IN[0].xyyy, SAMP[0], 2D\n"
   " 14: TEX TEMP[6], IN[1].zwww, SAMP[0], 2D\n"
--- a/Show More
+++ b/Show More